# Hyper parameter search for TD3+BC

In [4]:
!pip install d3rlpy



In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import itertools
import math
import subprocess
import os
import d3rlpy
# plt.style.use('matplotlibrc')

# from Python.data_sampler import *

## Building an MDPDataset

We first read in a large batch of samples from the file. As `d3rlpy` wants it in the form (observations, actions, rewards, terminal flags), we go ahead and do that. Here's a helper function to get a dataset from a list of chunks of your choosing.

In [6]:
import numpy as np
import torch
import random
import pandas
from Python.data_sampler import *

In [7]:
def get_dataset(chunks : list, batch_size=30000, 
                path="collected_data/rl_det_small.txt") -> d3rlpy.dataset.MDPDataset :
    random.seed(0)
    samples = DataSampler(path_to_data=path)
    samples.setting("coarse")
    states = []
    actions = []
    rewards = []
    next_states = []
    for chunk in chunks:
        samples.use_chunk(chunk)
        samples.read_chunk()
        [statesChunk, actionsChunk, rewardsChunk, nextStatesChunk] = samples.get_batch(batch_size)
        states.append(statesChunk)
        actions.append(actionsChunk)
        rewards.append(rewardsChunk)
        next_states.append(nextStatesChunk)
    states = torch.cat(states)
    actions = torch.cat(actions)
    rewards = torch.cat(rewards)
    next_states = torch.cat(next_states)
    terminals = np.zeros(len(states))
    terminals[::1111] = 1 #episode length 100, change if necessary
    print(states.shape)
    dataset = d3rlpy.dataset.MDPDataset(states.numpy(), 
                                        actions.numpy(), 
                                        rewards.numpy(), terminals)
    return dataset, states.numpy(), actions.numpy(), rewards.numpy()

We can build the dataset from there, just like this, and split into train and test sets.

In [8]:
dataset, states, actions, rewards = get_dataset([i for  i in range(100)], path="../collected_data/rl_purestochastic.txt")

start
[ 0.00000000e+00  7.95731469e+08 -4.75891077e-02 -3.69999953e-02
  2.00999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.50429671e-01 -4.92727243e-01 -5.31666025e-03]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.25610892e-01 -3.35999953e-02
 -2.42000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.08749986e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.90489108e-01 -5.87999953e-02
 -1.01000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.76979602e-02 -6.00000000e-01  6.00000000e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.48010892e-01  4.64000047e-02
 -1.04000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.83151637e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.08389108e-01  3.32000047e-02
 -2.02000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.25137655

[ 0.00000000e+00  7.95731469e+08  3.09110892e-01  5.60000047e-02
 -7.30001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.36295348e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 45 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  8.95108923e-02  3.40000047e-02
  1.95999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  3.06837024e-02 -6.00000000e-01]
Read chunk # 46 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.56189108e-01  5.80000047e-02
  1.50999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.51481072e-01 -2.88867043e-01  6.00000000e-01]
Read chunk # 47 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  5.68000047e-02
  7.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  3.54213589e-03  3.91473614e-01 -9.17637410e-02]
Read chunk # 48 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.60389108e-01  1.12000047e-02
  1.94999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.68637874e-

[ 0.00000000e+00  7.95731469e+08 -7.15891077e-02 -7.99999531e-03
  2.33999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  5.79765872e-01 -3.17402568e-01  6.00000000e-01]
Read chunk # 97 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.09789108e-01 -4.57999953e-02
 -2.20000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -2.84885686e-01 -2.78683294e-01  6.00000000e-01]
Read chunk # 98 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.58389108e-01  5.40000469e-03
 -2.15000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.93350106e-01  6.00000000e-01]
Read chunk # 99 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.20589108e-01  1.38000047e-02
 -2.59000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 100 out of 4999
torch.Size([111000, 6])


In [9]:
print("The behavior policy value statistics are:")
dataset.compute_stats()['return']

The behavior policy value statistics are:


{'mean': -136.20181,
 'std': 106.013954,
 'min': -410.7058,
 'max': 0.0,
 'histogram': (array([ 4,  1,  4,  2,  1,  0,  3,  2,  5,  2,  0,  2,  6,  5,  8, 13, 12,
         28,  1,  1]),
  array([-410.7058  , -390.17053 , -369.63522 , -349.09995 , -328.56464 ,
         -308.02936 , -287.49408 , -266.95877 , -246.4235  , -225.8882  ,
         -205.3529  , -184.81761 , -164.28232 , -143.74704 , -123.21175 ,
         -102.67645 ,  -82.14116 ,  -61.605873,  -41.07058 ,  -20.53529 ,
            0.      ], dtype=float32))}

In [10]:
from sklearn.model_selection import train_test_split
train_episodes, test_episodes = train_test_split(dataset, test_size=0.2)

## Setting up an Algorithm

In [11]:
from d3rlpy.algos import CQL, TD3PlusBC
from d3rlpy.models.encoders import VectorEncoderFactory

from d3rlpy.preprocessing import MinMaxActionScaler
import random
action_scaler = MinMaxActionScaler(minimum=-0.6, maximum=0.6)

from d3rlpy.metrics.scorer import td_error_scorer
from d3rlpy.metrics.scorer import average_value_estimation_scorer
from d3rlpy.metrics.scorer import initial_state_value_estimation_scorer

from d3rlpy.ope import FQE
# metrics to evaluate with
from d3rlpy.metrics.scorer import soft_opc_scorer
import pickle as pkl
from statistics import harmonic_mean as hm

## Perform a random search on hyper params. 

As good fqe on both stochastic and deterministic data is needed for good performance during the evaluation step in the simulator, we take the harmonic mean of both fqes to understand the performance on each dataset. The harmonic mean is large only when each number whose mean is being taken is large. Therefore, a large harmonic mean corresponds to large numbers whose mean is being taken.


In [12]:

# The number of times we would wanna do a random search for hyper-params. In 
# every random search, we randomly pick a new set of hyper-params.
num_search_iterations = 40
largest_fqe = -np.inf

for i in range(num_search_iterations):

    random.seed()

    actor_lr_this_iter = random.uniform(1e-5, 1e-2)
    critic_lr_this_iter = random.uniform(1e-5, 1e-2)
    temp_lr_this_iter = random.uniform(1e-5, 1e-4)
    n_steps_this_iter = random.choice([1, 3, 5, 7])

    actor_encoder = VectorEncoderFactory(hidden_units=[12, 24, 36, 24, 12],
                                          activation='relu', use_batch_norm=True, dropout_rate=0.2)
    critic_encoder = VectorEncoderFactory(hidden_units=[12, 24, 24, 12],
                                          activation='relu', use_batch_norm=True, dropout_rate=0.2)

    print("search iteration: ", i)
    print("using hyper params: ", [actor_lr_this_iter, critic_lr_this_iter, 
                                   temp_lr_this_iter, n_steps_this_iter])

    model = TD3PlusBC(q_func_factory='qr', #qr -> quantile regression q function, but you don't have to use this
                reward_scaler='standard',
#                 actor_encoder_factory = actor_encoder,
#                 critic_encoder_factory = critic_encoder,
                action_scaler=action_scaler,
                actor_learning_rate=actor_lr_this_iter, 
                critic_learning_rate=critic_lr_this_iter,
                n_steps=n_steps_this_iter, 
                use_gpu=True) #change it to true if you have one
    model.build_with_dataset(dataset)

    model.fit(train_episodes,
        eval_episodes=test_episodes,
        n_epochs=50, 
        tensorboard_dir='runs',
        scorers={
            'td_error': td_error_scorer,
            'init_value': initial_state_value_estimation_scorer,
            'ave_value': average_value_estimation_scorer
        })
    
    ope_dataset, states_ope, actions_ope, rewards_ope = get_dataset([i+100 for i  in range(20)], 
                                                                    path="../collected_data/rl_stochpid.txt") #change if you'd prefer different chunks
    ope_train_episodes, ope_test_episodes = train_test_split(ope_dataset, test_size=0.2)

    fqe = FQE(algo=model, action_scaler = action_scaler, use_gpu=True) #change this if you have one!
    history_stoch = fqe.fit(ope_train_episodes,
        eval_episodes=ope_test_episodes,
        tensorboard_dir='runs',
        n_epochs=50, n_steps_per_epoch=1000, #change if overfitting/underfitting
        scorers={
           'init_value': initial_state_value_estimation_scorer,
            'ave_value': average_value_estimation_scorer,
           'soft_opc': soft_opc_scorer(return_threshold=0)
        })
    
    ope_dataset, states_ope, actions_ope, rewards_ope = get_dataset([i+200 for i in range(40)], 
                                                                    path="../collected_data/rl_purestochastic.txt") #change if you'd prefer different chunks
    ope_train_episodes, ope_test_episodes = train_test_split(ope_dataset, test_size=0.2)

    fqe = FQE(algo=model, action_scaler = action_scaler, use_gpu=True) #change this if you have one!
    history_det = fqe.fit(ope_train_episodes,
        eval_episodes=ope_test_episodes,
        tensorboard_dir='runs',
        n_epochs=50, n_steps_per_epoch=1000, #change if overfitting/underfitting
        scorers={
           'init_value': initial_state_value_estimation_scorer,
            'ave_value': average_value_estimation_scorer,
           'soft_opc': soft_opc_scorer(return_threshold=0)
        })
    
    if(hm([history_stoch[-1][1]["ave_value"] + 50, 
           history_det[-1][1]["ave_value"] + 50]) \
                                                          > largest_fqe):
        largest_fqe = hm([history_stoch[-1][1]["ave_value"] + 50, 
                         history_det[-1][1]["ave_value"] + 50])

        # Save the hyper-params
        hyperparams = [actor_lr_this_iter, critic_lr_this_iter, 
                       temp_lr_this_iter, n_steps_this_iter]

        with open("hyperparams_td3bc.pkl", "wb") as f:
            print("most optimal hyper params for td3+bc at this point: ", hyperparams)
            pkl.dump(hyperparams, f)

        # Save model and policy
        model.save_model("model_hyperparams_td3bc.pt")
        model.save_policy("policy_hyperparams_td3bc.pt")


search iteration:  0
using hyper params:  [0.0037935400354651636, 0.004050689617073905, 2.8020594664729005e-05, 7]
2022-04-20 15:25.50 [debug    ] RoundIterator is selected.
2022-04-20 15:25.50 [info     ] Directory is created at d3rlpy_logs/TD3PlusBC_20220420152550
2022-04-20 15:25.50 [debug    ] Fitting scaler...              scaler=standard
2022-04-20 15:25.50 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 15:25.50 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 15:25.50 [info     ] Parameters are saved to d3rlpy_logs/TD3PlusBC_20220420152550/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0037935400354651636, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0,

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:25.53 [info     ] TD3PlusBC_20220420152550: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.0002855327394273546, 'time_algorithm_update': 0.008629117095679567, 'critic_loss': 12.349660272138161, 'actor_loss': 2.666534257911102, 'time_step': 0.008966750568813749, 'td_error': 1.0774244199433352, 'init_value': -11.435041427612305, 'ave_value': -7.221264671509129} step=342
2022-04-20 15:25.53 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420152550/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:25.57 [info     ] TD3PlusBC_20220420152550: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.00029121923167803136, 'time_algorithm_update': 0.008063409063551161, 'critic_loss': 6.0128394013259845, 'actor_loss': 2.5793802933386196, 'time_step': 0.008403367466396756, 'td_error': 1.3118598827780752, 'init_value': -16.009353637695312, 'ave_value': -10.190275628280297} step=684
2022-04-20 15:25.57 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420152550/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:26.00 [info     ] TD3PlusBC_20220420152550: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.00028108783632691144, 'time_algorithm_update': 0.007070833479451854, 'critic_loss': 9.38385568027608, 'actor_loss': 2.5689998872098867, 'time_step': 0.007398944849159286, 'td_error': 1.7343754173981019, 'init_value': -21.159229278564453, 'ave_value': -13.50612156532579} step=1026
2022-04-20 15:26.00 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420152550/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:26.03 [info     ] TD3PlusBC_20220420152550: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.00028685868134972645, 'time_algorithm_update': 0.007559127974928471, 'critic_loss': 13.353212601957265, 'actor_loss': 2.563190736268696, 'time_step': 0.007891153731541327, 'td_error': 2.2020081494904487, 'init_value': -25.85178565979004, 'ave_value': -16.78919452601339} step=1368
2022-04-20 15:26.03 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420152550/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:26.06 [info     ] TD3PlusBC_20220420152550: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.0002760050589578194, 'time_algorithm_update': 0.007455919918261077, 'critic_loss': 17.844333456273663, 'actor_loss': 2.562250430123848, 'time_step': 0.007773358919467145, 'td_error': 2.7492591437074156, 'init_value': -30.522876739501953, 'ave_value': -19.814133664353893} step=1710
2022-04-20 15:26.06 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420152550/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:26.09 [info     ] TD3PlusBC_20220420152550: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.0002858652706034699, 'time_algorithm_update': 0.007368538811890006, 'critic_loss': 22.55894979119998, 'actor_loss': 2.5616539207815427, 'time_step': 0.0076990280932153175, 'td_error': 3.4401152745126486, 'init_value': -35.39461135864258, 'ave_value': -23.097759383215678} step=2052
2022-04-20 15:26.09 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420152550/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:26.12 [info     ] TD3PlusBC_20220420152550: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.00027046775260166814, 'time_algorithm_update': 0.007647501097785102, 'critic_loss': 27.607440220682246, 'actor_loss': 2.5612129732879283, 'time_step': 0.007961552742629023, 'td_error': 3.927930253618221, 'init_value': -40.12720489501953, 'ave_value': -26.047581780161458} step=2394
2022-04-20 15:26.12 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420152550/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:26.15 [info     ] TD3PlusBC_20220420152550: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.00027462543799863223, 'time_algorithm_update': 0.007321541769462719, 'critic_loss': 33.12839552951835, 'actor_loss': 2.5601116411867197, 'time_step': 0.0076407612415782195, 'td_error': 4.493296979672868, 'init_value': -43.500823974609375, 'ave_value': -28.75497340274832} step=2736
2022-04-20 15:26.15 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420152550/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:26.19 [info     ] TD3PlusBC_20220420152550: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.00028695000542534725, 'time_algorithm_update': 0.00761304821884423, 'critic_loss': 39.20940800159298, 'actor_loss': 2.5592440638625833, 'time_step': 0.007946001855950607, 'td_error': 5.051930452402, 'init_value': -47.910057067871094, 'ave_value': -31.56609492460123} step=3078
2022-04-20 15:26.19 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420152550/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:26.22 [info     ] TD3PlusBC_20220420152550: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.00027408376771804186, 'time_algorithm_update': 0.007038759209259212, 'critic_loss': 45.14573120652584, 'actor_loss': 2.5584837121573107, 'time_step': 0.0073583858054980895, 'td_error': 5.475348606396234, 'init_value': -51.14247512817383, 'ave_value': -33.750910224692966} step=3420
2022-04-20 15:26.22 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420152550/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:26.25 [info     ] TD3PlusBC_20220420152550: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.00027512179480658635, 'time_algorithm_update': 0.007448394396151715, 'critic_loss': 51.05505655522932, 'actor_loss': 2.5578388930761324, 'time_step': 0.0077708820153398125, 'td_error': 6.013285483234407, 'init_value': -54.58618927001953, 'ave_value': -36.32536948971433} step=3762
2022-04-20 15:26.25 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420152550/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:26.28 [info     ] TD3PlusBC_20220420152550: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.0002820986753318742, 'time_algorithm_update': 0.007641071464583191, 'critic_loss': 57.47662287148816, 'actor_loss': 2.557812194378055, 'time_step': 0.007968688568873712, 'td_error': 6.541695478179664, 'init_value': -57.6851806640625, 'ave_value': -38.462030443841556} step=4104
2022-04-20 15:26.28 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420152550/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:26.31 [info     ] TD3PlusBC_20220420152550: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.00029050258168003014, 'time_algorithm_update': 0.00785882640303227, 'critic_loss': 63.47689774719595, 'actor_loss': 2.5575104679977683, 'time_step': 0.00819538838682119, 'td_error': 7.068259848128346, 'init_value': -60.818389892578125, 'ave_value': -40.61784414272957} step=4446
2022-04-20 15:26.31 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420152550/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:26.34 [info     ] TD3PlusBC_20220420152550: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.0002682780661778143, 'time_algorithm_update': 0.007401003475077668, 'critic_loss': 69.6208781303718, 'actor_loss': 2.55657126192461, 'time_step': 0.0077119633468271, 'td_error': 7.58091543164487, 'init_value': -62.997764587402344, 'ave_value': -42.54639242603805} step=4788
2022-04-20 15:26.34 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420152550/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:26.37 [info     ] TD3PlusBC_20220420152550: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.00025998360929433366, 'time_algorithm_update': 0.006873316235012478, 'critic_loss': 75.68763942606965, 'actor_loss': 2.55717672939189, 'time_step': 0.007173904898570992, 'td_error': 8.025040814467186, 'init_value': -65.84135437011719, 'ave_value': -44.14262776782358} step=5130
2022-04-20 15:26.37 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420152550/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:26.40 [info     ] TD3PlusBC_20220420152550: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.00027032344661958034, 'time_algorithm_update': 0.0073848314452589605, 'critic_loss': 81.23622116847345, 'actor_loss': 2.557614923220629, 'time_step': 0.007700579905370522, 'td_error': 8.893943094180555, 'init_value': -70.40667724609375, 'ave_value': -46.53980373116554} step=5472
2022-04-20 15:26.40 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420152550/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:26.43 [info     ] TD3PlusBC_20220420152550: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.0002697573767768012, 'time_algorithm_update': 0.0071997412464074925, 'critic_loss': 87.15231744866622, 'actor_loss': 2.556718020411263, 'time_step': 0.007513297928704156, 'td_error': 9.305717907054447, 'init_value': -70.9905014038086, 'ave_value': -48.27527473491751} step=5814
2022-04-20 15:26.43 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420152550/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:26.47 [info     ] TD3PlusBC_20220420152550: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.0002687751201161167, 'time_algorithm_update': 0.007541800800122712, 'critic_loss': 92.5140600371779, 'actor_loss': 2.556964934220788, 'time_step': 0.00785504516802336, 'td_error': 9.602421883257719, 'init_value': -72.02178192138672, 'ave_value': -49.42568523871788} step=6156
2022-04-20 15:26.47 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420152550/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:26.50 [info     ] TD3PlusBC_20220420152550: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.0002810334601597479, 'time_algorithm_update': 0.0074934799071641, 'critic_loss': 97.87570194891322, 'actor_loss': 2.5573825766468605, 'time_step': 0.007819032808493453, 'td_error': 10.30766260366303, 'init_value': -74.56382751464844, 'ave_value': -51.133989420062484} step=6498
2022-04-20 15:26.50 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420152550/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:26.53 [info     ] TD3PlusBC_20220420152550: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.00028036142650403475, 'time_algorithm_update': 0.007920201758892215, 'critic_loss': 103.04053351195932, 'actor_loss': 2.5575937471891703, 'time_step': 0.00823839993504753, 'td_error': 10.275273852012763, 'init_value': -74.9841079711914, 'ave_value': -52.381189423056135} step=6840
2022-04-20 15:26.53 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420152550/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:26.56 [info     ] TD3PlusBC_20220420152550: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.00027248455069915596, 'time_algorithm_update': 0.007447043357536807, 'critic_loss': 108.42870005111249, 'actor_loss': 2.55820844187374, 'time_step': 0.007752320919817651, 'td_error': 11.276254321841114, 'init_value': -77.84165954589844, 'ave_value': -53.96502193071661} step=7182
2022-04-20 15:26.56 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420152550/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:26.59 [info     ] TD3PlusBC_20220420152550: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.0002687256238613909, 'time_algorithm_update': 0.007062939175388269, 'critic_loss': 113.23623826768663, 'actor_loss': 2.55857645419606, 'time_step': 0.0073671814991019624, 'td_error': 11.846664826844046, 'init_value': -80.7688217163086, 'ave_value': -55.31053114223067} step=7524
2022-04-20 15:26.59 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420152550/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:27.02 [info     ] TD3PlusBC_20220420152550: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.0002699595445777938, 'time_algorithm_update': 0.007327787360252693, 'critic_loss': 117.76502370555498, 'actor_loss': 2.559140686403241, 'time_step': 0.007630666097005208, 'td_error': 11.926170997078371, 'init_value': -79.41555786132812, 'ave_value': -56.39786447056191} step=7866
2022-04-20 15:27.02 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420152550/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:27.05 [info     ] TD3PlusBC_20220420152550: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.0002624521478574875, 'time_algorithm_update': 0.0069415694788882605, 'critic_loss': 122.59392021134583, 'actor_loss': 2.5599514420269527, 'time_step': 0.007237630280834889, 'td_error': 11.945354773837686, 'init_value': -80.0400390625, 'ave_value': -56.912839254826594} step=8208
2022-04-20 15:27.05 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420152550/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:27.08 [info     ] TD3PlusBC_20220420152550: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.0002747202477259943, 'time_algorithm_update': 0.007466650845711692, 'critic_loss': 126.97736213639466, 'actor_loss': 2.559594031662969, 'time_step': 0.007776501583077057, 'td_error': 12.502955851977259, 'init_value': -81.9747085571289, 'ave_value': -57.87660451164704} step=8550
2022-04-20 15:27.08 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420152550/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:27.11 [info     ] TD3PlusBC_20220420152550: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.000265203721342031, 'time_algorithm_update': 0.0069328441954495616, 'critic_loss': 131.70848092419362, 'actor_loss': 2.560952153122216, 'time_step': 0.007234632620337414, 'td_error': 12.903428759188397, 'init_value': -79.85514831542969, 'ave_value': -58.95688442130078} step=8892
2022-04-20 15:27.11 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420152550/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:27.14 [info     ] TD3PlusBC_20220420152550: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.00027477253250211305, 'time_algorithm_update': 0.0075010263431839076, 'critic_loss': 135.90260785643818, 'actor_loss': 2.5618840365381965, 'time_step': 0.00781334840763382, 'td_error': 12.985331926865069, 'init_value': -80.50382232666016, 'ave_value': -59.72582001020813} step=9234
2022-04-20 15:27.14 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420152550/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:27.17 [info     ] TD3PlusBC_20220420152550: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.00026547490504750035, 'time_algorithm_update': 0.007591927260683294, 'critic_loss': 140.5279635379189, 'actor_loss': 2.5624441166370238, 'time_step': 0.007895914434689528, 'td_error': 13.02166888769585, 'init_value': -80.05635070800781, 'ave_value': -60.65662350459696} step=9576
2022-04-20 15:27.17 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420152550/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:27.21 [info     ] TD3PlusBC_20220420152550: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.0002763222532662732, 'time_algorithm_update': 0.007383978854843051, 'critic_loss': 144.0328536898072, 'actor_loss': 2.562332100338406, 'time_step': 0.007696079928972568, 'td_error': 13.779218862951982, 'init_value': -83.67167663574219, 'ave_value': -61.753140088316755} step=9918
2022-04-20 15:27.21 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420152550/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:27.24 [info     ] TD3PlusBC_20220420152550: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.0002878472121835452, 'time_algorithm_update': 0.007937867739047224, 'critic_loss': 147.94156044407896, 'actor_loss': 2.562260733710395, 'time_step': 0.008268046100237215, 'td_error': 14.64606417345607, 'init_value': -84.2865982055664, 'ave_value': -62.80066239802234} step=10260
2022-04-20 15:27.24 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420152550/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:27.27 [info     ] TD3PlusBC_20220420152550: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.00029003341295565777, 'time_algorithm_update': 0.0074042263086776285, 'critic_loss': 151.4688001041524, 'actor_loss': 2.562497908608955, 'time_step': 0.007731208327220894, 'td_error': 14.092592009546456, 'init_value': -82.36344146728516, 'ave_value': -62.94608036333645} step=10602
2022-04-20 15:27.27 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420152550/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:27.30 [info     ] TD3PlusBC_20220420152550: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.00027696500744735984, 'time_algorithm_update': 0.0076452221786766725, 'critic_loss': 154.84162907293666, 'actor_loss': 2.563146991339343, 'time_step': 0.00795650621603804, 'td_error': 14.196246399989137, 'init_value': -81.91639709472656, 'ave_value': -63.72114800657238} step=10944
2022-04-20 15:27.30 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420152550/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:27.33 [info     ] TD3PlusBC_20220420152550: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.00028086824026721263, 'time_algorithm_update': 0.007307038669697723, 'critic_loss': 157.82064745718972, 'actor_loss': 2.562702771515874, 'time_step': 0.007625768059178402, 'td_error': 14.418297209630108, 'init_value': -82.80490112304688, 'ave_value': -64.31719458769687} step=11286
2022-04-20 15:27.33 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420152550/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:27.36 [info     ] TD3PlusBC_20220420152550: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.00026693051321464675, 'time_algorithm_update': 0.00746981093758031, 'critic_loss': 160.3373149180273, 'actor_loss': 2.562973382180197, 'time_step': 0.007771750639753732, 'td_error': 14.77840998786924, 'init_value': -84.44493103027344, 'ave_value': -65.04373277904232} step=11628
2022-04-20 15:27.36 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420152550/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:27.40 [info     ] TD3PlusBC_20220420152550: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.00027919373317071566, 'time_algorithm_update': 0.007757146456088239, 'critic_loss': 162.90610791925798, 'actor_loss': 2.5636204613579645, 'time_step': 0.008075083208362959, 'td_error': 15.12146775411105, 'init_value': -83.85505676269531, 'ave_value': -65.38140403567888} step=11970
2022-04-20 15:27.40 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420152550/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:27.43 [info     ] TD3PlusBC_20220420152550: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.00026796575178179827, 'time_algorithm_update': 0.007231234109889694, 'critic_loss': 165.12501967580695, 'actor_loss': 2.5628320334250465, 'time_step': 0.007534234844453153, 'td_error': 15.296098510252735, 'init_value': -82.6849136352539, 'ave_value': -65.42025604499068} step=12312
2022-04-20 15:27.43 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420152550/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:27.46 [info     ] TD3PlusBC_20220420152550: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.00027007735960664803, 'time_algorithm_update': 0.0074659864804898085, 'critic_loss': 167.67716794822647, 'actor_loss': 2.5636599245127183, 'time_step': 0.007770275511936835, 'td_error': 15.701786463468025, 'init_value': -82.96994018554688, 'ave_value': -66.00033670174025} step=12654
2022-04-20 15:27.46 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420152550/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:27.49 [info     ] TD3PlusBC_20220420152550: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.0002671968170076783, 'time_algorithm_update': 0.007008760296113309, 'critic_loss': 169.5077863771316, 'actor_loss': 2.5636191925807306, 'time_step': 0.007310831058792203, 'td_error': 16.29371353826911, 'init_value': -84.56172180175781, 'ave_value': -66.80307946670179} step=12996
2022-04-20 15:27.49 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420152550/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:27.52 [info     ] TD3PlusBC_20220420152550: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.0003186164543642635, 'time_algorithm_update': 0.008293914516069735, 'critic_loss': 171.2554287603724, 'actor_loss': 2.564145565032959, 'time_step': 0.008653377231798674, 'td_error': 16.214279678715172, 'init_value': -81.99232482910156, 'ave_value': -67.0202583867147} step=13338
2022-04-20 15:27.52 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420152550/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:27.55 [info     ] TD3PlusBC_20220420152550: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.0003027086369475426, 'time_algorithm_update': 0.0074875243225989985, 'critic_loss': 172.66537622819868, 'actor_loss': 2.5642599222952858, 'time_step': 0.007832907096684327, 'td_error': 16.35116933412024, 'init_value': -81.06527709960938, 'ave_value': -66.71566617232118} step=13680
2022-04-20 15:27.55 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420152550/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:27.58 [info     ] TD3PlusBC_20220420152550: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.00029886326594659457, 'time_algorithm_update': 0.007417125311511302, 'critic_loss': 174.2659444307026, 'actor_loss': 2.564743991483722, 'time_step': 0.007757171552780776, 'td_error': 17.652165885065404, 'init_value': -81.9131851196289, 'ave_value': -67.34842471555882} step=14022
2022-04-20 15:27.58 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420152550/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:28.02 [info     ] TD3PlusBC_20220420152550: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.00030411823451170447, 'time_algorithm_update': 0.007816590760883532, 'critic_loss': 175.35391958136307, 'actor_loss': 2.5647406062187508, 'time_step': 0.00816242388117383, 'td_error': 17.2327983000157, 'init_value': -81.68199157714844, 'ave_value': -67.94174648544424} step=14364
2022-04-20 15:28.02 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420152550/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:28.05 [info     ] TD3PlusBC_20220420152550: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.00030387911880225466, 'time_algorithm_update': 0.007580771083720246, 'critic_loss': 176.3189727381656, 'actor_loss': 2.5647968283870766, 'time_step': 0.007925346580862302, 'td_error': 15.664206148333784, 'init_value': -78.8987808227539, 'ave_value': -67.39138329792264} step=14706
2022-04-20 15:28.05 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420152550/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:28.08 [info     ] TD3PlusBC_20220420152550: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.00031279332456532976, 'time_algorithm_update': 0.007789152407506753, 'critic_loss': 177.44693098012468, 'actor_loss': 2.564210091417993, 'time_step': 0.00814469585641783, 'td_error': 17.383774763304544, 'init_value': -79.77459716796875, 'ave_value': -68.29585198011586} step=15048
2022-04-20 15:28.08 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420152550/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:28.11 [info     ] TD3PlusBC_20220420152550: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.00031911281117221764, 'time_algorithm_update': 0.007368089162815384, 'critic_loss': 178.24085873609397, 'actor_loss': 2.5650648987084104, 'time_step': 0.007733563233537284, 'td_error': 17.577840326668124, 'init_value': -83.02015686035156, 'ave_value': -68.49328078347837} step=15390
2022-04-20 15:28.11 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420152550/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:28.15 [info     ] TD3PlusBC_20220420152550: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.0003276512636775859, 'time_algorithm_update': 0.008130159991526465, 'critic_loss': 178.90470232601055, 'actor_loss': 2.564463852441799, 'time_step': 0.008507896585074084, 'td_error': 17.56651100725598, 'init_value': -80.25682830810547, 'ave_value': -68.48140778259936} step=15732
2022-04-20 15:28.15 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420152550/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:28.18 [info     ] TD3PlusBC_20220420152550: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.00032624166611342403, 'time_algorithm_update': 0.008017806978950723, 'critic_loss': 179.45104860004625, 'actor_loss': 2.5653906774799724, 'time_step': 0.008396318084315249, 'td_error': 17.595701812556605, 'init_value': -81.24275207519531, 'ave_value': -69.43912872076555} step=16074
2022-04-20 15:28.18 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420152550/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:28.21 [info     ] TD3PlusBC_20220420152550: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.0003243336203502633, 'time_algorithm_update': 0.008039821658218116, 'critic_loss': 180.04664335195085, 'actor_loss': 2.564988530867281, 'time_step': 0.008415933240923965, 'td_error': 17.30638617414491, 'init_value': -81.89878845214844, 'ave_value': -69.36472263039501} step=16416
2022-04-20 15:28.21 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420152550/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:28.25 [info     ] TD3PlusBC_20220420152550: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.00033644556302076195, 'time_algorithm_update': 0.008406089063276324, 'critic_loss': 180.26222316005774, 'actor_loss': 2.5649160446479304, 'time_step': 0.008789690614443773, 'td_error': 17.470352477827927, 'init_value': -78.41861724853516, 'ave_value': -68.78542990260145} step=16758
2022-04-20 15:28.25 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420152550/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:28.28 [info     ] TD3PlusBC_20220420152550: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.00033216099990041634, 'time_algorithm_update': 0.008012476023177655, 'critic_loss': 181.02234417653224, 'actor_loss': 2.5658288587603653, 'time_step': 0.008391767217401872, 'td_error': 17.705075832232396, 'init_value': -83.59843444824219, 'ave_value': -70.09369898542526} step=17100
2022-04-20 15:28.28 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420152550/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 102 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.28189108e-01

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 15:28.29 [info     ] FQE_20220420152828: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00012818445642310452, 'time_algorithm_update': 0.0052223090665886205, 'loss': 0.008291121103789613, 'time_step': 0.005408472325428423, 'init_value': -0.4258043169975281, 'ave_value': -0.40837260683377585, 'soft_opc': nan} step=166




2022-04-20 15:28.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152828/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:28.30 [info     ] FQE_20220420152828: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.0001221205814775214, 'time_algorithm_update': 0.004725240799317877, 'loss': 0.006685690074900995, 'time_step': 0.004902885620852551, 'init_value': -0.5830621123313904, 'ave_value': -0.503778173180448, 'soft_opc': nan} step=332




2022-04-20 15:28.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152828/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:28.31 [info     ] FQE_20220420152828: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00013411763202713197, 'time_algorithm_update': 0.004875513444463891, 'loss': 0.0057343423734026325, 'time_step': 0.005068440035165074, 'init_value': -0.650952935218811, 'ave_value': -0.5441175145608885, 'soft_opc': nan} step=498




2022-04-20 15:28.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152828/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:28.32 [info     ] FQE_20220420152828: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00012454641870705477, 'time_algorithm_update': 0.004686764923922987, 'loss': 0.00561443448953151, 'time_step': 0.004865156598837979, 'init_value': -0.7260831594467163, 'ave_value': -0.5705023581380243, 'soft_opc': nan} step=664




2022-04-20 15:28.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152828/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:28.33 [info     ] FQE_20220420152828: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00012776506952492586, 'time_algorithm_update': 0.0047896396682923096, 'loss': 0.005174996467385755, 'time_step': 0.004974320710423481, 'init_value': -0.8011718988418579, 'ave_value': -0.6214181393921913, 'soft_opc': nan} step=830




2022-04-20 15:28.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152828/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:28.34 [info     ] FQE_20220420152828: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00012138234563620694, 'time_algorithm_update': 0.003811563353940665, 'loss': 0.00493322426442012, 'time_step': 0.003987858094364764, 'init_value': -0.8344616889953613, 'ave_value': -0.6375703995392935, 'soft_opc': nan} step=996




2022-04-20 15:28.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152828/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:28.35 [info     ] FQE_20220420152828: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00012445449829101562, 'time_algorithm_update': 0.004908646445676505, 'loss': 0.004680576025934733, 'time_step': 0.0050892370292939335, 'init_value': -0.840839147567749, 'ave_value': -0.628069853245675, 'soft_opc': nan} step=1162




2022-04-20 15:28.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152828/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:28.36 [info     ] FQE_20220420152828: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.0001285937895257789, 'time_algorithm_update': 0.004944702228867864, 'loss': 0.004506144811489047, 'time_step': 0.00513542703835361, 'init_value': -0.8766201734542847, 'ave_value': -0.6480392451423246, 'soft_opc': nan} step=1328




2022-04-20 15:28.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152828/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:28.36 [info     ] FQE_20220420152828: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00013505263500903026, 'time_algorithm_update': 0.004722881029887372, 'loss': 0.004380144413100578, 'time_step': 0.004917410482843238, 'init_value': -0.860753059387207, 'ave_value': -0.6322700413934014, 'soft_opc': nan} step=1494




2022-04-20 15:28.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152828/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:28.37 [info     ] FQE_20220420152828: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.000134073108075613, 'time_algorithm_update': 0.005192269761878324, 'loss': 0.004414252650009535, 'time_step': 0.005384189536772579, 'init_value': -0.9202499389648438, 'ave_value': -0.6761487236984812, 'soft_opc': nan} step=1660




2022-04-20 15:28.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152828/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:28.38 [info     ] FQE_20220420152828: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00013053417205810547, 'time_algorithm_update': 0.004836438650108245, 'loss': 0.004479567681618753, 'time_step': 0.0050243656319308, 'init_value': -0.9577565789222717, 'ave_value': -0.7117207707602295, 'soft_opc': nan} step=1826




2022-04-20 15:28.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152828/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:28.39 [info     ] FQE_20220420152828: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00013275606086455197, 'time_algorithm_update': 0.005049570497260036, 'loss': 0.0043393528693445385, 'time_step': 0.005239311471042863, 'init_value': -0.9578136801719666, 'ave_value': -0.7039437891160314, 'soft_opc': nan} step=1992




2022-04-20 15:28.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152828/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:28.40 [info     ] FQE_20220420152828: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00012664191694144742, 'time_algorithm_update': 0.004827690411763019, 'loss': 0.004718168654776436, 'time_step': 0.005011435014655791, 'init_value': -1.009222149848938, 'ave_value': -0.7423216173173608, 'soft_opc': nan} step=2158




2022-04-20 15:28.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152828/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:28.41 [info     ] FQE_20220420152828: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.0001239288284117917, 'time_algorithm_update': 0.003738529710884554, 'loss': 0.005001538499757782, 'time_step': 0.003920978810413775, 'init_value': -1.0840481519699097, 'ave_value': -0.807566566310135, 'soft_opc': nan} step=2324




2022-04-20 15:28.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152828/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:28.42 [info     ] FQE_20220420152828: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00012831802827766142, 'time_algorithm_update': 0.004836997353886983, 'loss': 0.005144803154321544, 'time_step': 0.005021916814597256, 'init_value': -1.070486068725586, 'ave_value': -0.8010917173555909, 'soft_opc': nan} step=2490




2022-04-20 15:28.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152828/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:28.43 [info     ] FQE_20220420152828: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00012238054390413216, 'time_algorithm_update': 0.004646933222391519, 'loss': 0.005580869511734829, 'time_step': 0.00482419599969703, 'init_value': -1.1043739318847656, 'ave_value': -0.822265854093607, 'soft_opc': nan} step=2656




2022-04-20 15:28.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152828/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:28.44 [info     ] FQE_20220420152828: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.0001259223524346409, 'time_algorithm_update': 0.004847020988004753, 'loss': 0.00589313495662961, 'time_step': 0.005034093397209443, 'init_value': -1.1397719383239746, 'ave_value': -0.8490568765590119, 'soft_opc': nan} step=2822




2022-04-20 15:28.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152828/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:28.45 [info     ] FQE_20220420152828: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00013832873608692582, 'time_algorithm_update': 0.005008971834757242, 'loss': 0.006440916381978306, 'time_step': 0.005209193172225033, 'init_value': -1.1421570777893066, 'ave_value': -0.8556974392015118, 'soft_opc': nan} step=2988




2022-04-20 15:28.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152828/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:28.45 [info     ] FQE_20220420152828: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00013150364519601846, 'time_algorithm_update': 0.005056603845343532, 'loss': 0.0068035202440300795, 'time_step': 0.005248690225991858, 'init_value': -1.1545240879058838, 'ave_value': -0.8635484307568084, 'soft_opc': nan} step=3154




2022-04-20 15:28.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152828/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:28.46 [info     ] FQE_20220420152828: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00012717189559017318, 'time_algorithm_update': 0.004693852849753506, 'loss': 0.006811892276444663, 'time_step': 0.004879049507968397, 'init_value': -1.1885462999343872, 'ave_value': -0.8717348119143296, 'soft_opc': nan} step=3320




2022-04-20 15:28.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152828/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:28.47 [info     ] FQE_20220420152828: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00012898014252444348, 'time_algorithm_update': 0.005101784166083278, 'loss': 0.008078637173825717, 'time_step': 0.005288362503051758, 'init_value': -1.2286107540130615, 'ave_value': -0.9171077514159586, 'soft_opc': nan} step=3486




2022-04-20 15:28.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152828/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:28.48 [info     ] FQE_20220420152828: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.0001268702817250447, 'time_algorithm_update': 0.0038028826196509673, 'loss': 0.008079568786036044, 'time_step': 0.0039842803794217395, 'init_value': -1.328941822052002, 'ave_value': -0.9892290047923656, 'soft_opc': nan} step=3652




2022-04-20 15:28.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152828/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:28.49 [info     ] FQE_20220420152828: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.0001243539603359728, 'time_algorithm_update': 0.0046553396316896005, 'loss': 0.009223313343638262, 'time_step': 0.0048357822808874655, 'init_value': -1.3595211505889893, 'ave_value': -1.0058791716650195, 'soft_opc': nan} step=3818




2022-04-20 15:28.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152828/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:28.50 [info     ] FQE_20220420152828: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.0001242218247379165, 'time_algorithm_update': 0.004740282713648784, 'loss': 0.009524434670968631, 'time_step': 0.004922217633350786, 'init_value': -1.3662126064300537, 'ave_value': -0.9951441215175028, 'soft_opc': nan} step=3984




2022-04-20 15:28.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152828/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:28.51 [info     ] FQE_20220420152828: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00012794891035700418, 'time_algorithm_update': 0.00473574127059385, 'loss': 0.010437499122351902, 'time_step': 0.004922239177198295, 'init_value': -1.4397742748260498, 'ave_value': -1.057213174451881, 'soft_opc': nan} step=4150




2022-04-20 15:28.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152828/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:28.52 [info     ] FQE_20220420152828: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.0001225198607846915, 'time_algorithm_update': 0.004739254353994347, 'loss': 0.011095157867235816, 'time_step': 0.004916607615459396, 'init_value': -1.41849946975708, 'ave_value': -1.0078245583833574, 'soft_opc': nan} step=4316




2022-04-20 15:28.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152828/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:28.53 [info     ] FQE_20220420152828: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00012479776359466184, 'time_algorithm_update': 0.004781152828630194, 'loss': 0.012272287176989288, 'time_step': 0.004964413413082261, 'init_value': -1.4630204439163208, 'ave_value': -1.0438214528768717, 'soft_opc': nan} step=4482




2022-04-20 15:28.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152828/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:28.53 [info     ] FQE_20220420152828: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00012662037309393826, 'time_algorithm_update': 0.004727785845836961, 'loss': 0.012099364357271108, 'time_step': 0.004909947694066059, 'init_value': -1.5010528564453125, 'ave_value': -1.0878156424252534, 'soft_opc': nan} step=4648




2022-04-20 15:28.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152828/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:28.54 [info     ] FQE_20220420152828: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00012756973864084267, 'time_algorithm_update': 0.004729477755994682, 'loss': 0.013002421964290944, 'time_step': 0.00491511390869876, 'init_value': -1.6245601177215576, 'ave_value': -1.1916278366528048, 'soft_opc': nan} step=4814




2022-04-20 15:28.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152828/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:28.55 [info     ] FQE_20220420152828: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00012371913496270236, 'time_algorithm_update': 0.003899956324014319, 'loss': 0.014313759317296186, 'time_step': 0.00408529373536627, 'init_value': -1.6498379707336426, 'ave_value': -1.2206416426608255, 'soft_opc': nan} step=4980




2022-04-20 15:28.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152828/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:28.56 [info     ] FQE_20220420152828: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00012617513357874858, 'time_algorithm_update': 0.004785622458860099, 'loss': 0.014868859990628952, 'time_step': 0.0049667071147137375, 'init_value': -1.723731279373169, 'ave_value': -1.2791490869210647, 'soft_opc': nan} step=5146




2022-04-20 15:28.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152828/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:28.57 [info     ] FQE_20220420152828: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00012631157794630672, 'time_algorithm_update': 0.0046959224953708875, 'loss': 0.01550813949335905, 'time_step': 0.004880342138818948, 'init_value': -1.796449899673462, 'ave_value': -1.3538167123401723, 'soft_opc': nan} step=5312




2022-04-20 15:28.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152828/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:28.58 [info     ] FQE_20220420152828: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.0001283395721251706, 'time_algorithm_update': 0.0048514604568481445, 'loss': 0.016621704247684766, 'time_step': 0.0050363497561719045, 'init_value': -1.7643709182739258, 'ave_value': -1.3174021553432753, 'soft_opc': nan} step=5478




2022-04-20 15:28.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152828/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:28.59 [info     ] FQE_20220420152828: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00012689038931605327, 'time_algorithm_update': 0.004788362836263266, 'loss': 0.017658740395668848, 'time_step': 0.00497760542904038, 'init_value': -1.8245820999145508, 'ave_value': -1.3552855255828986, 'soft_opc': nan} step=5644




2022-04-20 15:28.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152828/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:29.00 [info     ] FQE_20220420152828: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00012372057121920298, 'time_algorithm_update': 0.004722421427807176, 'loss': 0.018637972921750182, 'time_step': 0.0049030651529151275, 'init_value': -1.9527066946029663, 'ave_value': -1.4984621463192476, 'soft_opc': nan} step=5810




2022-04-20 15:29.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152828/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:29.00 [info     ] FQE_20220420152828: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00012824765170913144, 'time_algorithm_update': 0.004867828035929117, 'loss': 0.020013412543437838, 'time_step': 0.005050369055874376, 'init_value': -1.9301130771636963, 'ave_value': -1.454705785813968, 'soft_opc': nan} step=5976




2022-04-20 15:29.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152828/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:29.01 [info     ] FQE_20220420152828: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00012958624276770167, 'time_algorithm_update': 0.004771913390561759, 'loss': 0.02106181421199342, 'time_step': 0.004959334810096097, 'init_value': -1.8692104816436768, 'ave_value': -1.387860309352746, 'soft_opc': nan} step=6142




2022-04-20 15:29.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152828/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:29.02 [info     ] FQE_20220420152828: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.0001215245350297675, 'time_algorithm_update': 0.00393040496182729, 'loss': 0.021794106070013964, 'time_step': 0.004107166485614087, 'init_value': -1.9923603534698486, 'ave_value': -1.4699664888005737, 'soft_opc': nan} step=6308




2022-04-20 15:29.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152828/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:29.03 [info     ] FQE_20220420152828: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00012532343347388577, 'time_algorithm_update': 0.004747520010155368, 'loss': 0.022694860843111234, 'time_step': 0.004933298352253006, 'init_value': -2.0343263149261475, 'ave_value': -1.5271600290545606, 'soft_opc': nan} step=6474




2022-04-20 15:29.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152828/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:29.04 [info     ] FQE_20220420152828: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00012531912470438393, 'time_algorithm_update': 0.004793823483478592, 'loss': 0.023775949508950383, 'time_step': 0.004981529281800051, 'init_value': -2.2142820358276367, 'ave_value': -1.7023095073832852, 'soft_opc': nan} step=6640




2022-04-20 15:29.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152828/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:29.05 [info     ] FQE_20220420152828: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00012934351541909827, 'time_algorithm_update': 0.0047954091106552675, 'loss': 0.025273027105385953, 'time_step': 0.004984111670988152, 'init_value': -2.1324658393859863, 'ave_value': -1.6248092255924267, 'soft_opc': nan} step=6806




2022-04-20 15:29.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152828/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:29.06 [info     ] FQE_20220420152828: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00013316826648022756, 'time_algorithm_update': 0.004721823945102921, 'loss': 0.02560688523121223, 'time_step': 0.004913444978645049, 'init_value': -2.126314640045166, 'ave_value': -1.6141962167018236, 'soft_opc': nan} step=6972




2022-04-20 15:29.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152828/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:29.07 [info     ] FQE_20220420152828: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00013351009552737316, 'time_algorithm_update': 0.004766546100018972, 'loss': 0.02615612723825342, 'time_step': 0.004959732653146766, 'init_value': -2.2165887355804443, 'ave_value': -1.7149971247415754, 'soft_opc': nan} step=7138




2022-04-20 15:29.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152828/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:29.07 [info     ] FQE_20220420152828: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00012533205101288944, 'time_algorithm_update': 0.004686974617372076, 'loss': 0.027234817199347294, 'time_step': 0.004870420478912721, 'init_value': -2.2784974575042725, 'ave_value': -1.7717781602530867, 'soft_opc': nan} step=7304




2022-04-20 15:29.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152828/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:29.08 [info     ] FQE_20220420152828: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00012914818453501505, 'time_algorithm_update': 0.004727179745593703, 'loss': 0.02769201120616958, 'time_step': 0.004912122186407985, 'init_value': -2.2094364166259766, 'ave_value': -1.716215911005867, 'soft_opc': nan} step=7470




2022-04-20 15:29.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152828/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:29.09 [info     ] FQE_20220420152828: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00012251555201518968, 'time_algorithm_update': 0.004134165235312588, 'loss': 0.029054619028255433, 'time_step': 0.004313649901424546, 'init_value': -2.1784934997558594, 'ave_value': -1.6763372953981162, 'soft_opc': nan} step=7636




2022-04-20 15:29.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152828/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:29.10 [info     ] FQE_20220420152828: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00012339166848056288, 'time_algorithm_update': 0.004502553537667516, 'loss': 0.029448779986425113, 'time_step': 0.00468099835407303, 'init_value': -2.280982732772827, 'ave_value': -1.7665242237919891, 'soft_opc': nan} step=7802




2022-04-20 15:29.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152828/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:29.11 [info     ] FQE_20220420152828: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00012243224913815418, 'time_algorithm_update': 0.0047722351120178955, 'loss': 0.030631481704908054, 'time_step': 0.004956592996436429, 'init_value': -2.3938088417053223, 'ave_value': -1.8509800519811006, 'soft_opc': nan} step=7968




2022-04-20 15:29.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152828/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:29.12 [info     ] FQE_20220420152828: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00012991083673683993, 'time_algorithm_update': 0.00485278755785471, 'loss': 0.03185587572789448, 'time_step': 0.005042188138846892, 'init_value': -2.407907485961914, 'ave_value': -1.8976982009110486, 'soft_opc': nan} step=8134




2022-04-20 15:29.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152828/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:29.13 [info     ] FQE_20220420152828: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00012395180851580148, 'time_algorithm_update': 0.004759578819734505, 'loss': 0.032779642140930404, 'time_step': 0.0049392573804740445, 'init_value': -2.382652759552002, 'ave_value': -1.8481644003710769, 'soft_opc': nan} step=8300




2022-04-20 15:29.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152828/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.43210892e-01 -3.25999953e-02
 -1.38000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.28049971e-01  6.00000000e-01  4.67532035e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.87189108e-01  2.46000047e-02
 -8.00013420e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.61927906e-02  2.08952959e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.53789108e-01  1.32000047e-02
  8.79998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -9.71586383e-02 -6.00000000e-01 -5.33093061e-02]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.39489108e-01 -4.75999953e-02
 -9.30001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  1.41298638e-01  5.43892365e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.9573146

2022-04-20 15:29.13 [debug    ] RoundIterator is selected.
2022-04-20 15:29.13 [info     ] Directory is created at d3rlpy_logs/FQE_20220420152913
2022-04-20 15:29.13 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 15:29.13 [debug    ] Building models...
2022-04-20 15:29.13 [debug    ] Models have been built.
2022-04-20 15:29.13 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420152913/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}},

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 15:29.15 [info     ] FQE_20220420152913: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00013058407362117323, 'time_algorithm_update': 0.004739768283311711, 'loss': 0.029578788752862534, 'time_step': 0.00493083305137102, 'init_value': -1.36089026927948, 'ave_value': -1.3619438228574958, 'soft_opc': nan} step=344




2022-04-20 15:29.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152913/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:29.16 [info     ] FQE_20220420152913: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00013123071470925974, 'time_algorithm_update': 0.004260846348695977, 'loss': 0.02644886788208211, 'time_step': 0.004448726426723392, 'init_value': -2.10296893119812, 'ave_value': -2.0784547195643994, 'soft_opc': nan} step=688




2022-04-20 15:29.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152913/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:29.18 [info     ] FQE_20220420152913: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.0001324054806731468, 'time_algorithm_update': 0.004716690889624662, 'loss': 0.02946954975608563, 'time_step': 0.00491043301515801, 'init_value': -3.031630039215088, 'ave_value': -2.9536096952788466, 'soft_opc': nan} step=1032




2022-04-20 15:29.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152913/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:29.20 [info     ] FQE_20220420152913: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00013439045395962027, 'time_algorithm_update': 0.004762426365253537, 'loss': 0.03148838203010518, 'time_step': 0.004955144122589466, 'init_value': -3.662306308746338, 'ave_value': -3.4911359553133043, 'soft_opc': nan} step=1376




2022-04-20 15:29.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152913/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:29.22 [info     ] FQE_20220420152913: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00013366064359975416, 'time_algorithm_update': 0.004770767550135768, 'loss': 0.039682339032202266, 'time_step': 0.0049645304679870605, 'init_value': -4.422543525695801, 'ave_value': -4.141148694490528, 'soft_opc': nan} step=1720




2022-04-20 15:29.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152913/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:29.24 [info     ] FQE_20220420152913: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00012960822083229242, 'time_algorithm_update': 0.004224362068398054, 'loss': 0.04783934543212486, 'time_step': 0.00440954607586528, 'init_value': -5.126683712005615, 'ave_value': -4.746377054816279, 'soft_opc': nan} step=2064




2022-04-20 15:29.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152913/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:29.25 [info     ] FQE_20220420152913: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.0001323431037193121, 'time_algorithm_update': 0.004873183577559715, 'loss': 0.05709131846886648, 'time_step': 0.00506604272265767, 'init_value': -5.85068416595459, 'ave_value': -5.398456202473319, 'soft_opc': nan} step=2408




2022-04-20 15:29.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152913/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:29.27 [info     ] FQE_20220420152913: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.000131894682728967, 'time_algorithm_update': 0.004653789276300475, 'loss': 0.07253870628449287, 'time_step': 0.0048447528550791185, 'init_value': -6.385390281677246, 'ave_value': -5.846131892127157, 'soft_opc': nan} step=2752




2022-04-20 15:29.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152913/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:29.29 [info     ] FQE_20220420152913: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.0001326425130977187, 'time_algorithm_update': 0.004690842573032822, 'loss': 0.08500454557767181, 'time_step': 0.004882773687673169, 'init_value': -6.951218605041504, 'ave_value': -6.291033267018674, 'soft_opc': nan} step=3096




2022-04-20 15:29.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152913/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:29.31 [info     ] FQE_20220420152913: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.0001299318879149681, 'time_algorithm_update': 0.004211210234220638, 'loss': 0.1025458303615884, 'time_step': 0.004401176474815191, 'init_value': -7.691583633422852, 'ave_value': -6.975490928085013, 'soft_opc': nan} step=3440




2022-04-20 15:29.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152913/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:29.33 [info     ] FQE_20220420152913: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00013045585432717966, 'time_algorithm_update': 0.004779024179591689, 'loss': 0.12042204725155391, 'time_step': 0.004966055237969687, 'init_value': -8.31734848022461, 'ave_value': -7.546749891006065, 'soft_opc': nan} step=3784




2022-04-20 15:29.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152913/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:29.34 [info     ] FQE_20220420152913: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.0001358937385470368, 'time_algorithm_update': 0.004750282958496449, 'loss': 0.13836344952023652, 'time_step': 0.004944919153701427, 'init_value': -8.980788230895996, 'ave_value': -8.091790213221044, 'soft_opc': nan} step=4128




2022-04-20 15:29.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152913/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:29.36 [info     ] FQE_20220420152913: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00013217399286669353, 'time_algorithm_update': 0.004777046830155129, 'loss': 0.1568475891781841, 'time_step': 0.004968160806700241, 'init_value': -9.526439666748047, 'ave_value': -8.663998318396494, 'soft_opc': nan} step=4472




2022-04-20 15:29.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152913/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:29.38 [info     ] FQE_20220420152913: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00013120091238687205, 'time_algorithm_update': 0.004225558319757151, 'loss': 0.17757737505531243, 'time_step': 0.004414923662363097, 'init_value': -10.096731185913086, 'ave_value': -9.14702192326952, 'soft_opc': nan} step=4816




2022-04-20 15:29.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152913/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:29.40 [info     ] FQE_20220420152913: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00013035189273745515, 'time_algorithm_update': 0.00476551748985468, 'loss': 0.19450096586221005, 'time_step': 0.004953308853992196, 'init_value': -10.6134033203125, 'ave_value': -9.567689299487734, 'soft_opc': nan} step=5160




2022-04-20 15:29.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152913/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:29.42 [info     ] FQE_20220420152913: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00013507036275641862, 'time_algorithm_update': 0.0047410518624061765, 'loss': 0.22349880907013145, 'time_step': 0.004932682874590852, 'init_value': -11.299420356750488, 'ave_value': -10.173169973156591, 'soft_opc': nan} step=5504




2022-04-20 15:29.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152913/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:29.43 [info     ] FQE_20220420152913: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00013684741286344306, 'time_algorithm_update': 0.004791045604750167, 'loss': 0.2393841291723643, 'time_step': 0.004991142555724743, 'init_value': -11.649627685546875, 'ave_value': -10.377072281883462, 'soft_opc': nan} step=5848




2022-04-20 15:29.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152913/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:29.45 [info     ] FQE_20220420152913: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00013002753257751465, 'time_algorithm_update': 0.004184282103250193, 'loss': 0.268319416655834, 'time_step': 0.004377749770186668, 'init_value': -12.202947616577148, 'ave_value': -10.869263144275294, 'soft_opc': nan} step=6192




2022-04-20 15:29.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152913/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:29.47 [info     ] FQE_20220420152913: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.0001311212085014166, 'time_algorithm_update': 0.0047534981439279955, 'loss': 0.29581923320412984, 'time_step': 0.004943289035974547, 'init_value': -12.794145584106445, 'ave_value': -11.279767346706215, 'soft_opc': nan} step=6536




2022-04-20 15:29.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152913/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:29.49 [info     ] FQE_20220420152913: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00013457550558932993, 'time_algorithm_update': 0.004765125901200051, 'loss': 0.32681731219121885, 'time_step': 0.00495808484942414, 'init_value': -13.42170524597168, 'ave_value': -11.845123079288458, 'soft_opc': nan} step=6880




2022-04-20 15:29.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152913/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:29.51 [info     ] FQE_20220420152913: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00013072060984234478, 'time_algorithm_update': 0.004681281572164491, 'loss': 0.34867192671612596, 'time_step': 0.004874663297520127, 'init_value': -13.962261199951172, 'ave_value': -12.353752896554981, 'soft_opc': nan} step=7224




2022-04-20 15:29.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152913/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:29.52 [info     ] FQE_20220420152913: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00012845355410908544, 'time_algorithm_update': 0.004264627085175625, 'loss': 0.3676898646612413, 'time_step': 0.004452642313269682, 'init_value': -14.138437271118164, 'ave_value': -12.450123837224767, 'soft_opc': nan} step=7568




2022-04-20 15:29.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152913/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:29.54 [info     ] FQE_20220420152913: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.0001315058663833973, 'time_algorithm_update': 0.004723950180896493, 'loss': 0.39074862396431176, 'time_step': 0.004915633173876031, 'init_value': -14.68195915222168, 'ave_value': -12.971944767797995, 'soft_opc': nan} step=7912




2022-04-20 15:29.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152913/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:29.56 [info     ] FQE_20220420152913: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00013113021850585938, 'time_algorithm_update': 0.0047873223936835, 'loss': 0.411099309989706, 'time_step': 0.0049767362516979835, 'init_value': -15.263853073120117, 'ave_value': -13.446336563414805, 'soft_opc': nan} step=8256




2022-04-20 15:29.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152913/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:29.58 [info     ] FQE_20220420152913: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00013119883315507755, 'time_algorithm_update': 0.004790453023688738, 'loss': 0.4326109949087854, 'time_step': 0.004982808994692426, 'init_value': -15.439637184143066, 'ave_value': -13.706125248757914, 'soft_opc': nan} step=8600




2022-04-20 15:29.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152913/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:29.59 [info     ] FQE_20220420152913: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00012865662574768066, 'time_algorithm_update': 0.0042786466520886085, 'loss': 0.4590497989597362, 'time_step': 0.00446857962497445, 'init_value': -15.803810119628906, 'ave_value': -14.001890252199892, 'soft_opc': nan} step=8944




2022-04-20 15:29.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152913/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:30.01 [info     ] FQE_20220420152913: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.0001355090806650561, 'time_algorithm_update': 0.004845275435336801, 'loss': 0.4824204039543347, 'time_step': 0.00504113421883694, 'init_value': -16.1748046875, 'ave_value': -14.371379642063534, 'soft_opc': nan} step=9288




2022-04-20 15:30.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152913/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:30.03 [info     ] FQE_20220420152913: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.0001323340937148693, 'time_algorithm_update': 0.00474803322969481, 'loss': 0.5070598520728391, 'time_step': 0.004938816608384598, 'init_value': -16.367918014526367, 'ave_value': -14.447322840938806, 'soft_opc': nan} step=9632




2022-04-20 15:30.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152913/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:30.05 [info     ] FQE_20220420152913: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00013305558714755747, 'time_algorithm_update': 0.004723683346149533, 'loss': 0.527539596589672, 'time_step': 0.004915660896966624, 'init_value': -16.585342407226562, 'ave_value': -14.617039546345532, 'soft_opc': nan} step=9976




2022-04-20 15:30.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152913/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:30.07 [info     ] FQE_20220420152913: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.0001253603502761486, 'time_algorithm_update': 0.004274947698726211, 'loss': 0.5566730124464389, 'time_step': 0.0044576726680578185, 'init_value': -17.238922119140625, 'ave_value': -15.020026082894994, 'soft_opc': nan} step=10320




2022-04-20 15:30.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152913/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:30.08 [info     ] FQE_20220420152913: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00013075041216473247, 'time_algorithm_update': 0.004702038543168889, 'loss': 0.5890905235753243, 'time_step': 0.004891758741334427, 'init_value': -17.677135467529297, 'ave_value': -15.567405195160887, 'soft_opc': nan} step=10664




2022-04-20 15:30.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152913/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:30.10 [info     ] FQE_20220420152913: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00013510709585145463, 'time_algorithm_update': 0.004765870959259743, 'loss': 0.608123256575836, 'time_step': 0.004962426285411037, 'init_value': -18.15237808227539, 'ave_value': -16.0915524510758, 'soft_opc': nan} step=11008




2022-04-20 15:30.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152913/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:30.12 [info     ] FQE_20220420152913: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00013560056686401367, 'time_algorithm_update': 0.0046938948853071346, 'loss': 0.6084097099468805, 'time_step': 0.0048884728620218676, 'init_value': -17.912851333618164, 'ave_value': -15.913707460499335, 'soft_opc': nan} step=11352




2022-04-20 15:30.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152913/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:30.14 [info     ] FQE_20220420152913: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00013060278670732364, 'time_algorithm_update': 0.004245600728101508, 'loss': 0.6258002705005712, 'time_step': 0.004434002000232076, 'init_value': -18.349708557128906, 'ave_value': -16.409269601260906, 'soft_opc': nan} step=11696




2022-04-20 15:30.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152913/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:30.16 [info     ] FQE_20220420152913: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00013095625611238702, 'time_algorithm_update': 0.0046974829463071605, 'loss': 0.6319158957772997, 'time_step': 0.004887037499006404, 'init_value': -18.281312942504883, 'ave_value': -16.463354233586543, 'soft_opc': nan} step=12040




2022-04-20 15:30.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152913/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:30.17 [info     ] FQE_20220420152913: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.0001349844211755797, 'time_algorithm_update': 0.00500024959098461, 'loss': 0.6488977346547641, 'time_step': 0.0051951664824818455, 'init_value': -18.603042602539062, 'ave_value': -16.69223595632202, 'soft_opc': nan} step=12384




2022-04-20 15:30.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152913/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:30.19 [info     ] FQE_20220420152913: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.0001322952813880388, 'time_algorithm_update': 0.004694348157838334, 'loss': 0.6666877090302837, 'time_step': 0.004889565844868504, 'init_value': -18.821746826171875, 'ave_value': -17.02147627772364, 'soft_opc': nan} step=12728




2022-04-20 15:30.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152913/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:30.21 [info     ] FQE_20220420152913: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00013137626093487407, 'time_algorithm_update': 0.0042540195376374, 'loss': 0.6707266169968386, 'time_step': 0.004442788140718327, 'init_value': -18.619548797607422, 'ave_value': -17.155055309424807, 'soft_opc': nan} step=13072




2022-04-20 15:30.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152913/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:30.23 [info     ] FQE_20220420152913: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00013210052667662154, 'time_algorithm_update': 0.004783705916515616, 'loss': 0.6839739166654993, 'time_step': 0.004976673181666885, 'init_value': -19.073440551757812, 'ave_value': -17.542678107989026, 'soft_opc': nan} step=13416




2022-04-20 15:30.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152913/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:30.25 [info     ] FQE_20220420152913: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00013402936070464378, 'time_algorithm_update': 0.004804180805073228, 'loss': 0.7045395141142572, 'time_step': 0.004999575919883196, 'init_value': -18.862674713134766, 'ave_value': -17.608768011679214, 'soft_opc': nan} step=13760




2022-04-20 15:30.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152913/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:30.26 [info     ] FQE_20220420152913: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00013731939848079237, 'time_algorithm_update': 0.004740466905194659, 'loss': 0.7183080352842808, 'time_step': 0.004940127910569657, 'init_value': -19.28736114501953, 'ave_value': -18.114313896807978, 'soft_opc': nan} step=14104




2022-04-20 15:30.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152913/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:30.28 [info     ] FQE_20220420152913: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00013442649397739145, 'time_algorithm_update': 0.004347533680671869, 'loss': 0.7370420219051803, 'time_step': 0.004543639892755553, 'init_value': -19.12390899658203, 'ave_value': -18.15565410891186, 'soft_opc': nan} step=14448




2022-04-20 15:30.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152913/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:30.30 [info     ] FQE_20220420152913: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.0001393882341163103, 'time_algorithm_update': 0.004786745753399161, 'loss': 0.7353771196916526, 'time_step': 0.004989540161088456, 'init_value': -18.53771209716797, 'ave_value': -17.611106032294195, 'soft_opc': nan} step=14792




2022-04-20 15:30.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152913/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:30.32 [info     ] FQE_20220420152913: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00013816772505294446, 'time_algorithm_update': 0.004813664874365163, 'loss': 0.7153871289216155, 'time_step': 0.005012857359509135, 'init_value': -18.591617584228516, 'ave_value': -17.872676148042117, 'soft_opc': nan} step=15136




2022-04-20 15:30.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152913/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:30.34 [info     ] FQE_20220420152913: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00013558254685512808, 'time_algorithm_update': 0.004711408254712127, 'loss': 0.7171066860928265, 'time_step': 0.004907607339149298, 'init_value': -17.945337295532227, 'ave_value': -17.467116247634937, 'soft_opc': nan} step=15480




2022-04-20 15:30.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152913/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:30.35 [info     ] FQE_20220420152913: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00012944465459779252, 'time_algorithm_update': 0.00425518460051958, 'loss': 0.7107983191800845, 'time_step': 0.004445568766704825, 'init_value': -17.64330291748047, 'ave_value': -17.369930411072744, 'soft_opc': nan} step=15824




2022-04-20 15:30.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152913/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:30.37 [info     ] FQE_20220420152913: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00013471620027409045, 'time_algorithm_update': 0.004711017359134762, 'loss': 0.715357863734108, 'time_step': 0.0049059203890867015, 'init_value': -17.388736724853516, 'ave_value': -17.3843294888105, 'soft_opc': nan} step=16168




2022-04-20 15:30.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152913/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:30.39 [info     ] FQE_20220420152913: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00013465451639752056, 'time_algorithm_update': 0.00475527380788049, 'loss': 0.7006440944319894, 'time_step': 0.004949858715367871, 'init_value': -17.08340835571289, 'ave_value': -17.399307139391507, 'soft_opc': nan} step=16512




2022-04-20 15:30.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152913/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:30.41 [info     ] FQE_20220420152913: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.0001365632511848627, 'time_algorithm_update': 0.004803625650184099, 'loss': 0.7026125874781851, 'time_step': 0.005001501288524893, 'init_value': -16.817840576171875, 'ave_value': -17.396747431957774, 'soft_opc': nan} step=16856




2022-04-20 15:30.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152913/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:30.42 [info     ] FQE_20220420152913: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00013149408406989518, 'time_algorithm_update': 0.004217050796331361, 'loss': 0.7041210884358301, 'time_step': 0.004406504852827205, 'init_value': -16.87640953063965, 'ave_value': -17.875232020669422, 'soft_opc': nan} step=17200




2022-04-20 15:30.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420152913/model_17200.pt
most optimal hyper params for td3+bc at this point:  [0.0037935400354651636, 0.004050689617073905, 2.8020594664729005e-05, 7]
search iteration:  1
using hyper params:  [0.00631897278667268, 0.005207526973925864, 1.879259776420574e-05, 7]
2022-04-20 15:30.43 [debug    ] RoundIterator is selected.
2022-04-20 15:30.43 [info     ] Directory is created at d3rlpy_logs/TD3PlusBC_20220420153043
2022-04-20 15:30.43 [debug    ] Fitting scaler...              scaler=standard


  mean = torch.tensor(self._mean, dtype=torch.float32, device=x.device)
  std = torch.tensor(self._std, dtype=torch.float32, device=x.device)
  minimum = torch.tensor(
  maximum = torch.tensor(


2022-04-20 15:30.43 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 15:30.43 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 15:30.43 [info     ] Parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153043/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.00631897278667268, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'alpha': 2.5, 'batch_size': 256, 'critic_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'critic_learning_rate': 0.005207526973925864, 'critic_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:30.46 [info     ] TD3PlusBC_20220420153043: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.00032853034504672936, 'time_algorithm_update': 0.008060124185350206, 'critic_loss': 11.008790835651041, 'actor_loss': 2.655721456683867, 'time_step': 0.008451530110766317, 'td_error': 1.040045959193644, 'init_value': -11.441760063171387, 'ave_value': -7.283444395279806} step=342
2022-04-20 15:30.46 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153043/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:30.49 [info     ] TD3PlusBC_20220420153043: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.0003286683768556829, 'time_algorithm_update': 0.008015086079201502, 'critic_loss': 6.154569234415802, 'actor_loss': 2.5735439752277576, 'time_step': 0.008406496884530051, 'td_error': 1.2987105863100055, 'init_value': -15.734646797180176, 'ave_value': -10.066361052592427} step=684
2022-04-20 15:30.49 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153043/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:30.53 [info     ] TD3PlusBC_20220420153043: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.00033235340787653335, 'time_algorithm_update': 0.008156209661249528, 'critic_loss': 9.576332606767354, 'actor_loss': 2.566226957834255, 'time_step': 0.008550468941181028, 'td_error': 1.700618310901373, 'init_value': -20.861980438232422, 'ave_value': -13.389161346236094} step=1026
2022-04-20 15:30.53 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153043/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:30.56 [info     ] TD3PlusBC_20220420153043: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.0003344566501372042, 'time_algorithm_update': 0.00804341745655439, 'critic_loss': 13.961988139570805, 'actor_loss': 2.5649513640598944, 'time_step': 0.008436470003853067, 'td_error': 2.205738428971758, 'init_value': -25.972660064697266, 'ave_value': -16.819003071591744} step=1368
2022-04-20 15:30.56 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153043/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:30.59 [info     ] TD3PlusBC_20220420153043: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.0003288391737910042, 'time_algorithm_update': 0.00772868820101197, 'critic_loss': 18.762006906040927, 'actor_loss': 2.5633170953270983, 'time_step': 0.008117921171132584, 'td_error': 2.720175609998351, 'init_value': -30.300024032592773, 'ave_value': -19.821425529655556} step=1710
2022-04-20 15:30.59 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153043/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:31.03 [info     ] TD3PlusBC_20220420153043: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.00032859029825667887, 'time_algorithm_update': 0.008153094185723199, 'critic_loss': 23.97487735469439, 'actor_loss': 2.561679873550147, 'time_step': 0.008542256048548292, 'td_error': 3.231959760899839, 'init_value': -36.235755920410156, 'ave_value': -23.378156370143195} step=2052
2022-04-20 15:31.03 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153043/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:31.06 [info     ] TD3PlusBC_20220420153043: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.00032520154763383477, 'time_algorithm_update': 0.007611465035823353, 'critic_loss': 29.529206261997334, 'actor_loss': 2.560871762838977, 'time_step': 0.007995189281932095, 'td_error': 3.761328919295722, 'init_value': -39.55073547363281, 'ave_value': -25.8612364878164} step=2394
2022-04-20 15:31.06 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153043/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:31.09 [info     ] TD3PlusBC_20220420153043: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.0003326566595780222, 'time_algorithm_update': 0.00812718603346083, 'critic_loss': 35.13382655138161, 'actor_loss': 2.5604560681950974, 'time_step': 0.008522157780608238, 'td_error': 4.168012342657221, 'init_value': -43.65203094482422, 'ave_value': -28.507244438567692} step=2736
2022-04-20 15:31.09 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153043/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:31.13 [info     ] TD3PlusBC_20220420153043: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.000327110987657692, 'time_algorithm_update': 0.00829670582598413, 'critic_loss': 41.33485690334387, 'actor_loss': 2.559711691928886, 'time_step': 0.008680906212120726, 'td_error': 4.896236782444006, 'init_value': -48.255027770996094, 'ave_value': -31.651811374968908} step=3078
2022-04-20 15:31.13 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153043/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:31.16 [info     ] TD3PlusBC_20220420153043: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.0003299162401790507, 'time_algorithm_update': 0.007876633203517624, 'critic_loss': 47.32452282152678, 'actor_loss': 2.558971689458479, 'time_step': 0.008263030247381556, 'td_error': 5.362016673476745, 'init_value': -50.707008361816406, 'ave_value': -33.7782719980812} step=3420
2022-04-20 15:31.16 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153043/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:31.19 [info     ] TD3PlusBC_20220420153043: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.00032763522967957614, 'time_algorithm_update': 0.008061949969732274, 'critic_loss': 53.63904128716006, 'actor_loss': 2.5585649348141852, 'time_step': 0.008450794638248911, 'td_error': 6.062003062196023, 'init_value': -55.04575729370117, 'ave_value': -36.22898162920842} step=3762
2022-04-20 15:31.19 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153043/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:31.23 [info     ] TD3PlusBC_20220420153043: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.00032263750221297056, 'time_algorithm_update': 0.0075375364537824665, 'critic_loss': 59.74681578184429, 'actor_loss': 2.5588698289547747, 'time_step': 0.007917933993869357, 'td_error': 6.563170257677879, 'init_value': -59.2025260925293, 'ave_value': -38.70760613200666} step=4104
2022-04-20 15:31.23 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153043/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:31.26 [info     ] TD3PlusBC_20220420153043: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.00032882453405369094, 'time_algorithm_update': 0.008001298235173811, 'critic_loss': 65.82713679263466, 'actor_loss': 2.558596033799021, 'time_step': 0.008388155385067588, 'td_error': 7.003938912902303, 'init_value': -60.95738983154297, 'ave_value': -40.50381527108082} step=4446
2022-04-20 15:31.26 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153043/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:31.29 [info     ] TD3PlusBC_20220420153043: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.0003305464460138689, 'time_algorithm_update': 0.007180678217034591, 'critic_loss': 71.79660639846534, 'actor_loss': 2.557183914017259, 'time_step': 0.0075703427108407715, 'td_error': 7.47410653638611, 'init_value': -63.283103942871094, 'ave_value': -42.65851696348212} step=4788
2022-04-20 15:31.29 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153043/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:31.31 [info     ] TD3PlusBC_20220420153043: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.00032331232439007673, 'time_algorithm_update': 0.0055616891872116, 'critic_loss': 77.42908572592931, 'actor_loss': 2.5585425374103568, 'time_step': 0.005941059854295518, 'td_error': 8.423973452471012, 'init_value': -66.7295913696289, 'ave_value': -44.513673413854576} step=5130
2022-04-20 15:31.31 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153043/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:31.34 [info     ] TD3PlusBC_20220420153043: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.00032107383884184544, 'time_algorithm_update': 0.005506568484836155, 'critic_loss': 83.61646344368918, 'actor_loss': 2.5576352292334126, 'time_step': 0.005887990806534973, 'td_error': 8.973920875945476, 'init_value': -69.03902435302734, 'ave_value': -46.45852295233818} step=5472
2022-04-20 15:31.34 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153043/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:31.36 [info     ] TD3PlusBC_20220420153043: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.00032165873120402734, 'time_algorithm_update': 0.00554554992251926, 'critic_loss': 89.40102541516399, 'actor_loss': 2.5586886266518754, 'time_step': 0.005924429112707662, 'td_error': 9.394560629520901, 'init_value': -71.21058654785156, 'ave_value': -48.09818829245538} step=5814
2022-04-20 15:31.36 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153043/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:31.38 [info     ] TD3PlusBC_20220420153043: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.00032274137463485984, 'time_algorithm_update': 0.00561329007845873, 'critic_loss': 95.13011159394917, 'actor_loss': 2.5586991825996086, 'time_step': 0.005992952843158566, 'td_error': 9.680236424381444, 'init_value': -72.4551010131836, 'ave_value': -49.711221422950004} step=6156
2022-04-20 15:31.38 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153043/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:31.41 [info     ] TD3PlusBC_20220420153043: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.000319322647407041, 'time_algorithm_update': 0.005526162727534423, 'critic_loss': 101.02292794790881, 'actor_loss': 2.5591915309080604, 'time_step': 0.005898858371533845, 'td_error': 10.302268698629883, 'init_value': -75.26425170898438, 'ave_value': -51.12890690190122} step=6498
2022-04-20 15:31.41 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153043/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:31.43 [info     ] TD3PlusBC_20220420153043: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.00031902218422694514, 'time_algorithm_update': 0.005436318659642983, 'critic_loss': 106.28166465313114, 'actor_loss': 2.5594896447588824, 'time_step': 0.005798435350607711, 'td_error': 10.554420289623483, 'init_value': -74.57975006103516, 'ave_value': -52.1522145875686} step=6840
2022-04-20 15:31.43 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153043/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:31.45 [info     ] TD3PlusBC_20220420153043: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.0003196258991085298, 'time_algorithm_update': 0.005379577826338204, 'critic_loss': 111.28985361467328, 'actor_loss': 2.559190172898142, 'time_step': 0.005744100314134743, 'td_error': 11.446576696894017, 'init_value': -78.59065246582031, 'ave_value': -54.20863254058569} step=7182
2022-04-20 15:31.45 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153043/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:31.48 [info     ] TD3PlusBC_20220420153043: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.0003178586736757156, 'time_algorithm_update': 0.005465512387236656, 'critic_loss': 116.55903984649837, 'actor_loss': 2.5605314140431363, 'time_step': 0.005828264163948639, 'td_error': 10.652198614020104, 'init_value': -77.38838195800781, 'ave_value': -54.652174273898474} step=7524
2022-04-20 15:31.48 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153043/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:31.50 [info     ] TD3PlusBC_20220420153043: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.00031773040169163754, 'time_algorithm_update': 0.005541462647287469, 'critic_loss': 121.30593655681052, 'actor_loss': 2.56065957727488, 'time_step': 0.005905445556194462, 'td_error': 12.119842980700112, 'init_value': -78.03540802001953, 'ave_value': -55.90990833925973} step=7866
2022-04-20 15:31.50 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153043/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:31.52 [info     ] TD3PlusBC_20220420153043: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.000322432545890585, 'time_algorithm_update': 0.005567500465794613, 'critic_loss': 125.95270634255213, 'actor_loss': 2.5614913444072878, 'time_step': 0.0059372312144229285, 'td_error': 13.003797642639356, 'init_value': -80.73804473876953, 'ave_value': -57.11140610842756} step=8208
2022-04-20 15:31.52 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153043/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:31.55 [info     ] TD3PlusBC_20220420153043: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.00031937911496524924, 'time_algorithm_update': 0.005600817022267838, 'critic_loss': 130.82405938042535, 'actor_loss': 2.562420170209561, 'time_step': 0.005967464363365842, 'td_error': 13.174447196428156, 'init_value': -80.63134002685547, 'ave_value': -58.161337186602694} step=8550
2022-04-20 15:31.55 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153043/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:31.57 [info     ] TD3PlusBC_20220420153043: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.00032216484783685693, 'time_algorithm_update': 0.0055582376948574135, 'critic_loss': 135.33357591238635, 'actor_loss': 2.56320541783383, 'time_step': 0.005931168271784196, 'td_error': 13.24653952429455, 'init_value': -81.60540771484375, 'ave_value': -59.047513444052655} step=8892
2022-04-20 15:31.57 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153043/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:31.59 [info     ] TD3PlusBC_20220420153043: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.0003249408208835892, 'time_algorithm_update': 0.005599208742554425, 'critic_loss': 139.02283310471918, 'actor_loss': 2.562861470451132, 'time_step': 0.005974061307851334, 'td_error': 13.545706352746135, 'init_value': -82.79148864746094, 'ave_value': -60.128157295337665} step=9234
2022-04-20 15:31.59 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153043/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:32.02 [info     ] TD3PlusBC_20220420153043: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.00032533888231244, 'time_algorithm_update': 0.005631594630012735, 'critic_loss': 143.07554510462353, 'actor_loss': 2.562935134820771, 'time_step': 0.0060055478971604015, 'td_error': 13.887849004317342, 'init_value': -83.38858032226562, 'ave_value': -61.15520205597838} step=9576
2022-04-20 15:32.02 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153043/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:32.04 [info     ] TD3PlusBC_20220420153043: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.0003263552983601888, 'time_algorithm_update': 0.00552734296921401, 'critic_loss': 146.92576891358135, 'actor_loss': 2.5621509789026273, 'time_step': 0.005907833924767567, 'td_error': 13.674087230822723, 'init_value': -80.24039459228516, 'ave_value': -61.37742385321171} step=9918
2022-04-20 15:32.04 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153043/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:32.06 [info     ] TD3PlusBC_20220420153043: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.00032305368903087593, 'time_algorithm_update': 0.005584405179609332, 'critic_loss': 149.98046810306303, 'actor_loss': 2.562730690192061, 'time_step': 0.0059566295634933385, 'td_error': 14.577068294040384, 'init_value': -85.7322998046875, 'ave_value': -62.98524780395835} step=10260
2022-04-20 15:32.06 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153043/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:32.09 [info     ] TD3PlusBC_20220420153043: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.00031804968739113613, 'time_algorithm_update': 0.005519624341998184, 'critic_loss': 152.91485961557132, 'actor_loss': 2.563063960326345, 'time_step': 0.0058897684889230115, 'td_error': 14.027769197171741, 'init_value': -81.64189147949219, 'ave_value': -62.400008894344204} step=10602
2022-04-20 15:32.09 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153043/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:32.11 [info     ] TD3PlusBC_20220420153043: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.0003189838420577914, 'time_algorithm_update': 0.0055049134973894085, 'critic_loss': 156.10464176378753, 'actor_loss': 2.5638455410449827, 'time_step': 0.005873674537703308, 'td_error': 15.119225556767278, 'init_value': -82.77723693847656, 'ave_value': -63.62206944793748} step=10944
2022-04-20 15:32.11 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153043/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:32.14 [info     ] TD3PlusBC_20220420153043: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.0003202609848557857, 'time_algorithm_update': 0.005591796852691829, 'critic_loss': 158.54506560654667, 'actor_loss': 2.5637673467223405, 'time_step': 0.005957187267771938, 'td_error': 15.097167790066448, 'init_value': -84.0660629272461, 'ave_value': -64.22646432033699} step=11286
2022-04-20 15:32.14 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153043/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:32.16 [info     ] TD3PlusBC_20220420153043: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.0003248962045413012, 'time_algorithm_update': 0.005651025744209512, 'critic_loss': 161.50603906731857, 'actor_loss': 2.5636116621787086, 'time_step': 0.006022198855528358, 'td_error': 15.920579938474342, 'init_value': -84.14830017089844, 'ave_value': -64.9571512581596} step=11628
2022-04-20 15:32.16 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153043/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:32.18 [info     ] TD3PlusBC_20220420153043: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.000327946846945244, 'time_algorithm_update': 0.005622718766418814, 'critic_loss': 162.87935814662288, 'actor_loss': 2.564730847788136, 'time_step': 0.0060040943803843, 'td_error': 14.343611704257928, 'init_value': -81.95002746582031, 'ave_value': -64.75490139593748} step=11970
2022-04-20 15:32.18 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153043/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:32.21 [info     ] TD3PlusBC_20220420153043: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.00031955409468266004, 'time_algorithm_update': 0.005648561388428448, 'critic_loss': 164.8697341115851, 'actor_loss': 2.562980826138056, 'time_step': 0.006015443662453813, 'td_error': 16.155192414041572, 'init_value': -86.21226501464844, 'ave_value': -66.92061817439743} step=12312
2022-04-20 15:32.21 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153043/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:32.23 [info     ] TD3PlusBC_20220420153043: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.000322393506591083, 'time_algorithm_update': 0.005553328502945035, 'critic_loss': 167.2927523607399, 'actor_loss': 2.563640708811799, 'time_step': 0.0059237528962698596, 'td_error': 15.341519765827599, 'init_value': -86.40284729003906, 'ave_value': -66.82834419817387} step=12654
2022-04-20 15:32.23 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153043/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:32.26 [info     ] TD3PlusBC_20220420153043: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.00033035264377705533, 'time_algorithm_update': 0.005742700476395457, 'critic_loss': 168.38423263817504, 'actor_loss': 2.564262268836038, 'time_step': 0.006123374777230603, 'td_error': 15.392477382646899, 'init_value': -83.84339904785156, 'ave_value': -66.66155749607479} step=12996
2022-04-20 15:32.26 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153043/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:32.28 [info     ] TD3PlusBC_20220420153043: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.0003218943612617359, 'time_algorithm_update': 0.0055564920804653944, 'critic_loss': 169.23256502653422, 'actor_loss': 2.564096938796908, 'time_step': 0.005925058621412132, 'td_error': 14.537936440643232, 'init_value': -82.79840087890625, 'ave_value': -66.63178571844239} step=13338
2022-04-20 15:32.28 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153043/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:32.30 [info     ] TD3PlusBC_20220420153043: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.0003237536079005191, 'time_algorithm_update': 0.005593972596508717, 'critic_loss': 171.136370073285, 'actor_loss': 2.564696926819651, 'time_step': 0.005969673569439448, 'td_error': 16.292028587781417, 'init_value': -86.15247344970703, 'ave_value': -67.88725974112799} step=13680
2022-04-20 15:32.30 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153043/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:32.33 [info     ] TD3PlusBC_20220420153043: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.00032738495988455434, 'time_algorithm_update': 0.005604506236070778, 'critic_loss': 172.15450041475353, 'actor_loss': 2.5644907337880274, 'time_step': 0.005986156519393475, 'td_error': 16.955116524985776, 'init_value': -85.41849517822266, 'ave_value': -67.86877083255465} step=14022
2022-04-20 15:32.33 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153043/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:32.35 [info     ] TD3PlusBC_20220420153043: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.00032236074146471525, 'time_algorithm_update': 0.005505135184840152, 'critic_loss': 173.45293216816864, 'actor_loss': 2.565287854936388, 'time_step': 0.00587819542801171, 'td_error': 17.24414170632037, 'init_value': -86.84205627441406, 'ave_value': -69.04005236133882} step=14364
2022-04-20 15:32.35 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153043/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:32.37 [info     ] TD3PlusBC_20220420153043: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.00032381356111046863, 'time_algorithm_update': 0.005647715072185673, 'critic_loss': 174.7893072652538, 'actor_loss': 2.5658121861909566, 'time_step': 0.006022473524885568, 'td_error': 16.791660534844635, 'init_value': -85.5512466430664, 'ave_value': -68.67230910543587} step=14706
2022-04-20 15:32.37 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153043/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:32.40 [info     ] TD3PlusBC_20220420153043: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.0003188255934687386, 'time_algorithm_update': 0.005533831858495522, 'critic_loss': 176.10713349727163, 'actor_loss': 2.566231556106032, 'time_step': 0.0059004408574243735, 'td_error': 17.44875659689469, 'init_value': -84.54914855957031, 'ave_value': -68.76355307041474} step=15048
2022-04-20 15:32.40 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153043/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:32.42 [info     ] TD3PlusBC_20220420153043: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.000322368409898546, 'time_algorithm_update': 0.005687296041968273, 'critic_loss': 176.58665937011006, 'actor_loss': 2.5655271700251174, 'time_step': 0.0060577204352930975, 'td_error': 18.09275840636864, 'init_value': -83.67112731933594, 'ave_value': -68.86488269585713} step=15390
2022-04-20 15:32.42 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153043/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:32.45 [info     ] TD3PlusBC_20220420153043: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.0003218873899582534, 'time_algorithm_update': 0.005503581281293903, 'critic_loss': 177.77761189421716, 'actor_loss': 2.5660654773489076, 'time_step': 0.0058759123261211904, 'td_error': 18.789268388878533, 'init_value': -82.37760162353516, 'ave_value': -69.04425022397594} step=15732
2022-04-20 15:32.45 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153043/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:32.47 [info     ] TD3PlusBC_20220420153043: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.0003211581916139837, 'time_algorithm_update': 0.005522523707116557, 'critic_loss': 178.35247378879123, 'actor_loss': 2.5658379493401067, 'time_step': 0.005892033465424476, 'td_error': 16.827081602241527, 'init_value': -83.62242126464844, 'ave_value': -69.33591222579902} step=16074
2022-04-20 15:32.47 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153043/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:32.49 [info     ] TD3PlusBC_20220420153043: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.0003373434669093082, 'time_algorithm_update': 0.0057497589211714896, 'critic_loss': 179.24763696235522, 'actor_loss': 2.5677964394552664, 'time_step': 0.006137335509584661, 'td_error': 19.018566470551033, 'init_value': -85.95694732666016, 'ave_value': -69.9402621251583} step=16416
2022-04-20 15:32.49 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153043/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:32.52 [info     ] TD3PlusBC_20220420153043: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.00032451417711046006, 'time_algorithm_update': 0.005662080140141716, 'critic_loss': 179.88445047746626, 'actor_loss': 2.567325823488291, 'time_step': 0.0060365478894863905, 'td_error': 17.619753324836008, 'init_value': -83.31379699707031, 'ave_value': -69.56746341028226} step=16758
2022-04-20 15:32.52 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153043/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:32.54 [info     ] TD3PlusBC_20220420153043: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.00032214114540501644, 'time_algorithm_update': 0.005489903583861234, 'critic_loss': 180.72017593829952, 'actor_loss': 2.567146306846574, 'time_step': 0.005862074985838773, 'td_error': 18.827720442957812, 'init_value': -85.71229553222656, 'ave_value': -70.86811224574659} step=17100
2022-04-20 15:32.54 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153043/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 102 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.28189108e-01 

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 15:32.55 [info     ] FQE_20220420153254: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00011843945606645332, 'time_algorithm_update': 0.003048616719533162, 'loss': 0.008564994432009667, 'time_step': 0.0032224482800587116, 'init_value': -0.456485390663147, 'ave_value': -0.3767466452610385, 'soft_opc': nan} step=166




2022-04-20 15:32.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153254/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:32.56 [info     ] FQE_20220420153254: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00012548573045845492, 'time_algorithm_update': 0.0033697148403489447, 'loss': 0.0064641932538901286, 'time_step': 0.0035502048860113307, 'init_value': -0.5939079523086548, 'ave_value': -0.4545934709812607, 'soft_opc': nan} step=332




2022-04-20 15:32.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153254/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:32.56 [info     ] FQE_20220420153254: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.0001194577619253871, 'time_algorithm_update': 0.0031483044107276275, 'loss': 0.005588185046350651, 'time_step': 0.003321957875447101, 'init_value': -0.6358722448348999, 'ave_value': -0.47815665682559616, 'soft_opc': nan} step=498




2022-04-20 15:32.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153254/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:32.57 [info     ] FQE_20220420153254: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00012846596269722445, 'time_algorithm_update': 0.0032714102641645685, 'loss': 0.005282509036878326, 'time_step': 0.0034584108605442278, 'init_value': -0.7057451009750366, 'ave_value': -0.5152650658746023, 'soft_opc': nan} step=664




2022-04-20 15:32.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153254/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:32.57 [info     ] FQE_20220420153254: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00012286456234483835, 'time_algorithm_update': 0.003176614462611187, 'loss': 0.004840215762056051, 'time_step': 0.003358042383768472, 'init_value': -0.7850067615509033, 'ave_value': -0.5782604385905706, 'soft_opc': nan} step=830




2022-04-20 15:32.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153254/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:32.58 [info     ] FQE_20220420153254: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.0001208552395004824, 'time_algorithm_update': 0.003158280648380877, 'loss': 0.004461197785651648, 'time_step': 0.003335754555391978, 'init_value': -0.7982555627822876, 'ave_value': -0.5847097304473455, 'soft_opc': nan} step=996




2022-04-20 15:32.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153254/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:32.59 [info     ] FQE_20220420153254: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00011816943984433829, 'time_algorithm_update': 0.0031447855823011285, 'loss': 0.004318902515862362, 'time_step': 0.0033182738775230317, 'init_value': -0.8315415978431702, 'ave_value': -0.5875626111872964, 'soft_opc': nan} step=1162




2022-04-20 15:32.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153254/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:32.59 [info     ] FQE_20220420153254: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.000124401356800493, 'time_algorithm_update': 0.0032547410712184676, 'loss': 0.00425425893907626, 'time_step': 0.003434059131576354, 'init_value': -0.901964008808136, 'ave_value': -0.621030648012419, 'soft_opc': nan} step=1328




2022-04-20 15:32.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153254/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:33.00 [info     ] FQE_20220420153254: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00012016152761068689, 'time_algorithm_update': 0.0031323691448533393, 'loss': 0.003988590320089765, 'time_step': 0.0033067263752581126, 'init_value': -0.9393982291221619, 'ave_value': -0.6470004262671143, 'soft_opc': nan} step=1494




2022-04-20 15:33.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153254/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:33.00 [info     ] FQE_20220420153254: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00012367748352418463, 'time_algorithm_update': 0.0032418923205639943, 'loss': 0.004269249323497425, 'time_step': 0.003421843770038651, 'init_value': -0.9894454479217529, 'ave_value': -0.6749892281653644, 'soft_opc': nan} step=1660




2022-04-20 15:33.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153254/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:33.01 [info     ] FQE_20220420153254: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00012987636658082525, 'time_algorithm_update': 0.0031184561281319126, 'loss': 0.004461969842247844, 'time_step': 0.003303309521043157, 'init_value': -1.0812699794769287, 'ave_value': -0.7424902618233417, 'soft_opc': nan} step=1826




2022-04-20 15:33.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153254/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:33.02 [info     ] FQE_20220420153254: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.0001258749559701207, 'time_algorithm_update': 0.0034569128450140894, 'loss': 0.004611025995670268, 'time_step': 0.0036379386143512034, 'init_value': -1.0871186256408691, 'ave_value': -0.7381572696136999, 'soft_opc': nan} step=1992




2022-04-20 15:33.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153254/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:33.02 [info     ] FQE_20220420153254: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00012153458882527179, 'time_algorithm_update': 0.0030869719493820005, 'loss': 0.005285013734267643, 'time_step': 0.0032622641827686726, 'init_value': -1.1947205066680908, 'ave_value': -0.8058049378228617, 'soft_opc': nan} step=2158




2022-04-20 15:33.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153254/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:33.03 [info     ] FQE_20220420153254: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00012259598237922392, 'time_algorithm_update': 0.0032340101448886365, 'loss': 0.005619984568960695, 'time_step': 0.0034131931971354656, 'init_value': -1.3022783994674683, 'ave_value': -0.9076988368420987, 'soft_opc': nan} step=2324




2022-04-20 15:33.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153254/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:33.04 [info     ] FQE_20220420153254: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00012560063097850387, 'time_algorithm_update': 0.00341026897890022, 'loss': 0.006254857413033135, 'time_step': 0.003591010369450213, 'init_value': -1.3372167348861694, 'ave_value': -0.9324001289870556, 'soft_opc': nan} step=2490




2022-04-20 15:33.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153254/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:33.04 [info     ] FQE_20220420153254: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00012401787631482962, 'time_algorithm_update': 0.0032892069184636496, 'loss': 0.0068484968512812746, 'time_step': 0.003472259245723127, 'init_value': -1.450650691986084, 'ave_value': -1.0135809336814123, 'soft_opc': nan} step=2656




2022-04-20 15:33.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153254/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:33.05 [info     ] FQE_20220420153254: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00012347209884459713, 'time_algorithm_update': 0.0035262840339936406, 'loss': 0.007148286355771573, 'time_step': 0.0037078728158789947, 'init_value': -1.5519459247589111, 'ave_value': -1.0872560951388903, 'soft_opc': nan} step=2822




2022-04-20 15:33.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153254/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:33.06 [info     ] FQE_20220420153254: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00012138952691871, 'time_algorithm_update': 0.003617087042475321, 'loss': 0.008087160239691835, 'time_step': 0.003795467227338308, 'init_value': -1.5707899332046509, 'ave_value': -1.0828190879023694, 'soft_opc': nan} step=2988




2022-04-20 15:33.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153254/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:33.06 [info     ] FQE_20220420153254: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00012934351541909827, 'time_algorithm_update': 0.0033352647919252694, 'loss': 0.009324204443858555, 'time_step': 0.003525043108377112, 'init_value': -1.5817757844924927, 'ave_value': -1.080584590226781, 'soft_opc': nan} step=3154




2022-04-20 15:33.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153254/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:33.07 [info     ] FQE_20220420153254: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00012623402009527367, 'time_algorithm_update': 0.0036160974617463998, 'loss': 0.009893787463745439, 'time_step': 0.003799867917256183, 'init_value': -1.5911434888839722, 'ave_value': -1.0861899423589174, 'soft_opc': nan} step=3320




2022-04-20 15:33.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153254/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:33.08 [info     ] FQE_20220420153254: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00012167390570583113, 'time_algorithm_update': 0.0035338100180568465, 'loss': 0.010833985366471425, 'time_step': 0.003712147115224815, 'init_value': -1.7440611124038696, 'ave_value': -1.2203594645351037, 'soft_opc': nan} step=3486




2022-04-20 15:33.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153254/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:33.08 [info     ] FQE_20220420153254: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00012380531035273908, 'time_algorithm_update': 0.0033656085830136954, 'loss': 0.011752964557058751, 'time_step': 0.0035447313124874987, 'init_value': -1.7717247009277344, 'ave_value': -1.2476926906411914, 'soft_opc': nan} step=3652




2022-04-20 15:33.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153254/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:33.09 [info     ] FQE_20220420153254: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00012603868921119045, 'time_algorithm_update': 0.004345710019031203, 'loss': 0.012826902101781354, 'time_step': 0.004529028053743294, 'init_value': -1.8186075687408447, 'ave_value': -1.289265044020103, 'soft_opc': nan} step=3818




2022-04-20 15:33.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153254/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:33.10 [info     ] FQE_20220420153254: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00013245444699942348, 'time_algorithm_update': 0.005191306033766413, 'loss': 0.014013323195121554, 'time_step': 0.005379682563873659, 'init_value': -1.9164884090423584, 'ave_value': -1.3754402033783295, 'soft_opc': nan} step=3984




2022-04-20 15:33.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153254/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:33.11 [info     ] FQE_20220420153254: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00013200058994523013, 'time_algorithm_update': 0.004833409585148455, 'loss': 0.01528118368019406, 'time_step': 0.0050247720925204725, 'init_value': -1.9332382678985596, 'ave_value': -1.4069266015697304, 'soft_opc': nan} step=4150




2022-04-20 15:33.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153254/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:33.12 [info     ] FQE_20220420153254: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00012970688831375306, 'time_algorithm_update': 0.004615586924265666, 'loss': 0.016920706199016422, 'time_step': 0.004805685525917145, 'init_value': -1.9748955965042114, 'ave_value': -1.4168237453032924, 'soft_opc': nan} step=4316




2022-04-20 15:33.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153254/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:33.13 [info     ] FQE_20220420153254: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00013299878821315537, 'time_algorithm_update': 0.004914976028074701, 'loss': 0.018423989246156156, 'time_step': 0.005108862038118294, 'init_value': -2.020953893661499, 'ave_value': -1.4510705532925623, 'soft_opc': nan} step=4482




2022-04-20 15:33.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153254/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:33.14 [info     ] FQE_20220420153254: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00013363792235592762, 'time_algorithm_update': 0.004909706402973956, 'loss': 0.019180531436801856, 'time_step': 0.005102621503623135, 'init_value': -1.9837960004806519, 'ave_value': -1.4423128179366793, 'soft_opc': nan} step=4648




2022-04-20 15:33.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153254/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:33.15 [info     ] FQE_20220420153254: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00013261961649699383, 'time_algorithm_update': 0.005072643957942365, 'loss': 0.020469867700783826, 'time_step': 0.005264661398278661, 'init_value': -2.105562210083008, 'ave_value': -1.5548544874465144, 'soft_opc': nan} step=4814




2022-04-20 15:33.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153254/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:33.16 [info     ] FQE_20220420153254: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00013518333435058594, 'time_algorithm_update': 0.004532193563070642, 'loss': 0.021881862183056593, 'time_step': 0.004724582993840596, 'init_value': -2.087103843688965, 'ave_value': -1.5305575926662296, 'soft_opc': nan} step=4980




2022-04-20 15:33.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153254/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:33.16 [info     ] FQE_20220420153254: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00013480703514742563, 'time_algorithm_update': 0.004266563668308488, 'loss': 0.022194798711087017, 'time_step': 0.004461472292980516, 'init_value': -2.1478395462036133, 'ave_value': -1.6215218470205326, 'soft_opc': nan} step=5146




2022-04-20 15:33.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153254/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:33.17 [info     ] FQE_20220420153254: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.0001347179872443877, 'time_algorithm_update': 0.004850070160555552, 'loss': 0.02464567000223099, 'time_step': 0.005043668919299023, 'init_value': -2.1064345836639404, 'ave_value': -1.5788175275490561, 'soft_opc': nan} step=5312




2022-04-20 15:33.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153254/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:33.18 [info     ] FQE_20220420153254: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00013325587812676486, 'time_algorithm_update': 0.005371358021196112, 'loss': 0.025367678944666655, 'time_step': 0.005569335926009948, 'init_value': -2.0210185050964355, 'ave_value': -1.4770080043137208, 'soft_opc': nan} step=5478




2022-04-20 15:33.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153254/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:33.19 [info     ] FQE_20220420153254: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00013509715896054922, 'time_algorithm_update': 0.005141572779919727, 'loss': 0.027323590513004595, 'time_step': 0.00534013093235981, 'init_value': -2.1039888858795166, 'ave_value': -1.5774687913691139, 'soft_opc': nan} step=5644




2022-04-20 15:33.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153254/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:33.20 [info     ] FQE_20220420153254: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00013549787452421993, 'time_algorithm_update': 0.004779706518334079, 'loss': 0.02900922110904547, 'time_step': 0.004972261118601604, 'init_value': -2.206979274749756, 'ave_value': -1.6505486527415882, 'soft_opc': nan} step=5810




2022-04-20 15:33.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153254/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:33.21 [info     ] FQE_20220420153254: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00012985051396381423, 'time_algorithm_update': 0.005219739603709026, 'loss': 0.03046806669704257, 'time_step': 0.005408651857490999, 'init_value': -2.262228488922119, 'ave_value': -1.7145609964876696, 'soft_opc': nan} step=5976




2022-04-20 15:33.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153254/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:33.22 [info     ] FQE_20220420153254: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00012938947562711784, 'time_algorithm_update': 0.004804718925292234, 'loss': 0.03220939601586391, 'time_step': 0.00499624229339232, 'init_value': -2.1585888862609863, 'ave_value': -1.63076442283251, 'soft_opc': nan} step=6142




2022-04-20 15:33.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153254/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:33.23 [info     ] FQE_20220420153254: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00013113165476236, 'time_algorithm_update': 0.00503244313849024, 'loss': 0.03333008487313335, 'time_step': 0.005221783396709396, 'init_value': -2.1886672973632812, 'ave_value': -1.670996806224355, 'soft_opc': nan} step=6308




2022-04-20 15:33.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153254/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:33.24 [info     ] FQE_20220420153254: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00013276324214705503, 'time_algorithm_update': 0.0041259986808501094, 'loss': 0.035728967018532065, 'time_step': 0.004316055631063071, 'init_value': -2.2177071571350098, 'ave_value': -1.7085540001893098, 'soft_opc': nan} step=6474




2022-04-20 15:33.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153254/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:33.25 [info     ] FQE_20220420153254: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.0001319934086627271, 'time_algorithm_update': 0.004793024924864252, 'loss': 0.03676929080252906, 'time_step': 0.004981978830084743, 'init_value': -2.268573522567749, 'ave_value': -1.7678268022096908, 'soft_opc': nan} step=6640




2022-04-20 15:33.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153254/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:33.26 [info     ] FQE_20220420153254: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00014164936111634037, 'time_algorithm_update': 0.004881114844816277, 'loss': 0.03867006499014503, 'time_step': 0.005081056112266448, 'init_value': -2.2293753623962402, 'ave_value': -1.754563292209898, 'soft_opc': nan} step=6806




2022-04-20 15:33.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153254/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:33.26 [info     ] FQE_20220420153254: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00013347418911485788, 'time_algorithm_update': 0.004682278058615075, 'loss': 0.04180401212434812, 'time_step': 0.004871470382414669, 'init_value': -2.199586868286133, 'ave_value': -1.7299431019194156, 'soft_opc': nan} step=6972




2022-04-20 15:33.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153254/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:33.27 [info     ] FQE_20220420153254: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00013533414128315017, 'time_algorithm_update': 0.004831760762685753, 'loss': 0.04262454595405563, 'time_step': 0.005028977451554264, 'init_value': -2.2310519218444824, 'ave_value': -1.7400875572752845, 'soft_opc': nan} step=7138




2022-04-20 15:33.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153254/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:33.28 [info     ] FQE_20220420153254: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00013889031237866506, 'time_algorithm_update': 0.004936654883694936, 'loss': 0.044938167273796285, 'time_step': 0.005137969212359692, 'init_value': -2.322993278503418, 'ave_value': -1.8491542387216746, 'soft_opc': nan} step=7304




2022-04-20 15:33.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153254/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:33.29 [info     ] FQE_20220420153254: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00013955386288194772, 'time_algorithm_update': 0.005084302051957831, 'loss': 0.04638717672436958, 'time_step': 0.005287743476499994, 'init_value': -2.309086322784424, 'ave_value': -1.8136255697741568, 'soft_opc': nan} step=7470




2022-04-20 15:33.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153254/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:33.30 [info     ] FQE_20220420153254: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00013953231903443853, 'time_algorithm_update': 0.004870219403002636, 'loss': 0.049083172932760905, 'time_step': 0.00507772543344153, 'init_value': -2.150874376296997, 'ave_value': -1.6641377212244783, 'soft_opc': nan} step=7636




2022-04-20 15:33.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153254/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:33.31 [info     ] FQE_20220420153254: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.0001351517367075725, 'time_algorithm_update': 0.0038329779383647873, 'loss': 0.04988753621702094, 'time_step': 0.004031368048794298, 'init_value': -2.2978336811065674, 'ave_value': -1.812103637872549, 'soft_opc': nan} step=7802




2022-04-20 15:33.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153254/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:33.32 [info     ] FQE_20220420153254: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00013370111764195454, 'time_algorithm_update': 0.00484480484422431, 'loss': 0.05096291591615293, 'time_step': 0.005039788154234369, 'init_value': -2.280813455581665, 'ave_value': -1.8031107832199416, 'soft_opc': nan} step=7968




2022-04-20 15:33.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153254/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:33.33 [info     ] FQE_20220420153254: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00013994452465011413, 'time_algorithm_update': 0.005191234220941383, 'loss': 0.054664076754054707, 'time_step': 0.005393477807562035, 'init_value': -2.2664239406585693, 'ave_value': -1.7581649689591146, 'soft_opc': nan} step=8134




2022-04-20 15:33.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153254/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:33.34 [info     ] FQE_20220420153254: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.0001354705856507083, 'time_algorithm_update': 0.004853285938860422, 'loss': 0.056136869537890946, 'time_step': 0.005048117005681417, 'init_value': -2.338618516921997, 'ave_value': -1.8553288918114392, 'soft_opc': nan} step=8300




2022-04-20 15:33.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153254/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.43210892e-01 -3.25999953e-02
 -1.38000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.28049971e-01  6.00000000e-01  4.67532035e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.87189108e-01  2.46000047e-02
 -8.00013420e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.61927906e-02  2.08952959e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.53789108e-01  1.32000047e-02
  8.79998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -9.71586383e-02 -6.00000000e-01 -5.33093061e-02]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.39489108e-01 -4.75999953e-02
 -9.30001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  1.41298638e-01  5.43892365e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.9573146

2022-04-20 15:33.34 [debug    ] RoundIterator is selected.
2022-04-20 15:33.34 [info     ] Directory is created at d3rlpy_logs/FQE_20220420153334
2022-04-20 15:33.34 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 15:33.34 [debug    ] Building models...
2022-04-20 15:33.34 [debug    ] Models have been built.
2022-04-20 15:33.34 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420153334/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}},

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 15:33.36 [info     ] FQE_20220420153334: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00013971536658531012, 'time_algorithm_update': 0.0050207348757011945, 'loss': 0.029810410445057896, 'time_step': 0.005222904127697612, 'init_value': -0.6094425916671753, 'ave_value': -0.6061841454910668, 'soft_opc': nan} step=344




2022-04-20 15:33.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153334/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:33.38 [info     ] FQE_20220420153334: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.0001650951629461244, 'time_algorithm_update': 0.005354807820431021, 'loss': 0.02503728246327143, 'time_step': 0.005593477986579718, 'init_value': -1.4469796419143677, 'ave_value': -1.4434558520580196, 'soft_opc': nan} step=688




2022-04-20 15:33.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153334/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:33.40 [info     ] FQE_20220420153334: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00016047164451244266, 'time_algorithm_update': 0.00458332896232605, 'loss': 0.026717044082795117, 'time_step': 0.00481869661530783, 'init_value': -2.587522506713867, 'ave_value': -2.6147683825310284, 'soft_opc': nan} step=1032




2022-04-20 15:33.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153334/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:33.42 [info     ] FQE_20220420153334: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00016673567683197731, 'time_algorithm_update': 0.005380602770073469, 'loss': 0.02822587475841215, 'time_step': 0.005618164012598437, 'init_value': -3.3829855918884277, 'ave_value': -3.4322048617384255, 'soft_opc': nan} step=1376




2022-04-20 15:33.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153334/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:33.44 [info     ] FQE_20220420153334: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.0001642918863961863, 'time_algorithm_update': 0.005137595326401467, 'loss': 0.03680939490224646, 'time_step': 0.0053776571916979415, 'init_value': -4.303074836730957, 'ave_value': -4.341460397169372, 'soft_opc': nan} step=1720




2022-04-20 15:33.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153334/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:33.46 [info     ] FQE_20220420153334: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00016691795615262763, 'time_algorithm_update': 0.005095091670058494, 'loss': 0.0463259753630345, 'time_step': 0.005336857812349187, 'init_value': -5.054810523986816, 'ave_value': -5.0704745742804915, 'soft_opc': nan} step=2064




2022-04-20 15:33.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153334/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:33.48 [info     ] FQE_20220420153334: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.0001639605954635975, 'time_algorithm_update': 0.004662506802137508, 'loss': 0.05903428060349163, 'time_step': 0.004899881606878236, 'init_value': -6.038015365600586, 'ave_value': -6.051472483371985, 'soft_opc': nan} step=2408




2022-04-20 15:33.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153334/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:33.50 [info     ] FQE_20220420153334: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00016117303870445075, 'time_algorithm_update': 0.005200031884880953, 'loss': 0.07165612619326905, 'time_step': 0.005435592906419621, 'init_value': -6.843672752380371, 'ave_value': -6.854871692875115, 'soft_opc': nan} step=2752




2022-04-20 15:33.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153334/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:33.52 [info     ] FQE_20220420153334: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00016253077706625295, 'time_algorithm_update': 0.005059979682744935, 'loss': 0.08341504325602897, 'time_step': 0.0052961492261221245, 'init_value': -7.398956298828125, 'ave_value': -7.478531600650754, 'soft_opc': nan} step=3096




2022-04-20 15:33.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153334/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:33.54 [info     ] FQE_20220420153334: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00016552625700484876, 'time_algorithm_update': 0.005128867404405461, 'loss': 0.10147953016661801, 'time_step': 0.005369363829146984, 'init_value': -8.339393615722656, 'ave_value': -8.476649319955703, 'soft_opc': nan} step=3440




2022-04-20 15:33.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153334/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:33.56 [info     ] FQE_20220420153334: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.0001652240753173828, 'time_algorithm_update': 0.005225998024607814, 'loss': 0.11431201717675511, 'time_step': 0.005465176909468895, 'init_value': -8.875179290771484, 'ave_value': -9.105278303692964, 'soft_opc': nan} step=3784




2022-04-20 15:33.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153334/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:33.58 [info     ] FQE_20220420153334: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00016390722851420558, 'time_algorithm_update': 0.00463059198024661, 'loss': 0.13016060376431532, 'time_step': 0.004868870557740677, 'init_value': -9.518592834472656, 'ave_value': -9.810072540279755, 'soft_opc': nan} step=4128




2022-04-20 15:33.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153334/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:34.00 [info     ] FQE_20220420153334: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00016562536705371945, 'time_algorithm_update': 0.005118126786032388, 'loss': 0.14553654147312045, 'time_step': 0.005359554706617843, 'init_value': -9.92832088470459, 'ave_value': -10.324697108428925, 'soft_opc': nan} step=4472




2022-04-20 15:34.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153334/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:34.02 [info     ] FQE_20220420153334: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00016240117161772972, 'time_algorithm_update': 0.005157579516255578, 'loss': 0.16362076349079954, 'time_step': 0.005391070316004199, 'init_value': -10.662982940673828, 'ave_value': -11.225432246095902, 'soft_opc': nan} step=4816




2022-04-20 15:34.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153334/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:34.04 [info     ] FQE_20220420153334: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.0001652718976486561, 'time_algorithm_update': 0.005134994207426559, 'loss': 0.1794799803810324, 'time_step': 0.005372086929720502, 'init_value': -11.071022987365723, 'ave_value': -11.763978336898502, 'soft_opc': nan} step=5160




2022-04-20 15:34.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153334/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:34.05 [info     ] FQE_20220420153334: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.0001611744248589804, 'time_algorithm_update': 0.004671124524848406, 'loss': 0.19384421299341634, 'time_step': 0.0049020717310351, 'init_value': -11.59962272644043, 'ave_value': -12.5278579178267, 'soft_opc': nan} step=5504




2022-04-20 15:34.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153334/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:34.07 [info     ] FQE_20220420153334: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00016457188961117766, 'time_algorithm_update': 0.005066884811534438, 'loss': 0.21064127878242628, 'time_step': 0.005304763483446698, 'init_value': -12.114143371582031, 'ave_value': -13.296073771247153, 'soft_opc': nan} step=5848




2022-04-20 15:34.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153334/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:34.09 [info     ] FQE_20220420153334: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00016522754070370696, 'time_algorithm_update': 0.005092536294183066, 'loss': 0.22915496574281607, 'time_step': 0.005330295756805775, 'init_value': -12.541034698486328, 'ave_value': -13.966354609840877, 'soft_opc': nan} step=6192




2022-04-20 15:34.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153334/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:34.11 [info     ] FQE_20220420153334: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.0001693652119747428, 'time_algorithm_update': 0.005184300417123839, 'loss': 0.2454331595948789, 'time_step': 0.0054302638353303425, 'init_value': -13.062126159667969, 'ave_value': -14.760094884228431, 'soft_opc': nan} step=6536




2022-04-20 15:34.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153334/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:34.13 [info     ] FQE_20220420153334: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00016633438509564068, 'time_algorithm_update': 0.005164897026017655, 'loss': 0.25898010665442533, 'time_step': 0.005404334428698518, 'init_value': -13.41265869140625, 'ave_value': -15.326217877600124, 'soft_opc': nan} step=6880




2022-04-20 15:34.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153334/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:34.15 [info     ] FQE_20220420153334: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.0001620927522348803, 'time_algorithm_update': 0.004876703716987787, 'loss': 0.2734854928391098, 'time_step': 0.005110604818477187, 'init_value': -13.832643508911133, 'ave_value': -16.01749760087507, 'soft_opc': nan} step=7224




2022-04-20 15:34.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153334/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:34.17 [info     ] FQE_20220420153334: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00016516308451807775, 'time_algorithm_update': 0.005164716132851534, 'loss': 0.2925772210907971, 'time_step': 0.0054045444311097614, 'init_value': -13.900123596191406, 'ave_value': -16.295973252301703, 'soft_opc': nan} step=7568




2022-04-20 15:34.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153334/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:34.19 [info     ] FQE_20220420153334: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00016430852025054222, 'time_algorithm_update': 0.00508909170017686, 'loss': 0.30178399334748296, 'time_step': 0.005326363236405129, 'init_value': -14.484825134277344, 'ave_value': -17.030843308840712, 'soft_opc': nan} step=7912




2022-04-20 15:34.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153334/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:34.21 [info     ] FQE_20220420153334: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00016614309577054755, 'time_algorithm_update': 0.005179225012313488, 'loss': 0.3195035679612395, 'time_step': 0.005419038755949153, 'init_value': -14.89448356628418, 'ave_value': -17.627862004286282, 'soft_opc': nan} step=8256




2022-04-20 15:34.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153334/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:34.23 [info     ] FQE_20220420153334: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00015870152517806653, 'time_algorithm_update': 0.004638906828192777, 'loss': 0.33497553180123485, 'time_step': 0.004867991735768872, 'init_value': -15.213602066040039, 'ave_value': -18.06539718672756, 'soft_opc': nan} step=8600




2022-04-20 15:34.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153334/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:34.25 [info     ] FQE_20220420153334: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00016428079715994902, 'time_algorithm_update': 0.005160304002983626, 'loss': 0.35475400610025537, 'time_step': 0.0054001953712729525, 'init_value': -15.814719200134277, 'ave_value': -18.68471659980481, 'soft_opc': nan} step=8944




2022-04-20 15:34.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153334/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:34.27 [info     ] FQE_20220420153334: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00016458990962006325, 'time_algorithm_update': 0.005187841348869856, 'loss': 0.3654844656824892, 'time_step': 0.005424467630164568, 'init_value': -16.20187759399414, 'ave_value': -19.13554679324886, 'soft_opc': nan} step=9288




2022-04-20 15:34.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153334/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:34.29 [info     ] FQE_20220420153334: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.0001666732998781426, 'time_algorithm_update': 0.0051685488501260445, 'loss': 0.3782346598305848, 'time_step': 0.005408646062363026, 'init_value': -16.922367095947266, 'ave_value': -19.980580888238215, 'soft_opc': nan} step=9632




2022-04-20 15:34.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153334/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:34.31 [info     ] FQE_20220420153334: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00016665597294652186, 'time_algorithm_update': 0.0048245323258777, 'loss': 0.38511502736269737, 'time_step': 0.005063612793767175, 'init_value': -17.172027587890625, 'ave_value': -20.218866740178836, 'soft_opc': nan} step=9976




2022-04-20 15:34.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153334/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:34.33 [info     ] FQE_20220420153334: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00016399247701777967, 'time_algorithm_update': 0.005178265793378963, 'loss': 0.393379396087555, 'time_step': 0.005415183167124904, 'init_value': -17.87435531616211, 'ave_value': -20.904643688220236, 'soft_opc': nan} step=10320




2022-04-20 15:34.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153334/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:34.35 [info     ] FQE_20220420153334: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00016379772230636243, 'time_algorithm_update': 0.005084140356196914, 'loss': 0.4059016288009061, 'time_step': 0.005323762810507486, 'init_value': -18.250629425048828, 'ave_value': -21.20511010714496, 'soft_opc': nan} step=10664




2022-04-20 15:34.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153334/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:34.37 [info     ] FQE_20220420153334: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.0001634220744288245, 'time_algorithm_update': 0.005128246407176173, 'loss': 0.42075682787744456, 'time_step': 0.005366850037907445, 'init_value': -18.432758331298828, 'ave_value': -21.294405503170605, 'soft_opc': nan} step=11008




2022-04-20 15:34.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153334/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:34.39 [info     ] FQE_20220420153334: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00016600309416305188, 'time_algorithm_update': 0.005139339108799779, 'loss': 0.4225887598329072, 'time_step': 0.0053806304931640625, 'init_value': -18.76958465576172, 'ave_value': -21.66672232122473, 'soft_opc': nan} step=11352




2022-04-20 15:34.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153334/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:34.41 [info     ] FQE_20220420153334: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00015948470248732457, 'time_algorithm_update': 0.004588332287100858, 'loss': 0.43359916010698263, 'time_step': 0.004821960316147915, 'init_value': -19.216703414916992, 'ave_value': -22.0466792702914, 'soft_opc': nan} step=11696




2022-04-20 15:34.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153334/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:34.43 [info     ] FQE_20220420153334: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00016306375348290733, 'time_algorithm_update': 0.005173814851184224, 'loss': 0.4376219153934873, 'time_step': 0.00540985201680383, 'init_value': -18.60982894897461, 'ave_value': -21.477911308690537, 'soft_opc': nan} step=12040




2022-04-20 15:34.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153334/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:34.45 [info     ] FQE_20220420153334: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00016428426254627316, 'time_algorithm_update': 0.005076603140941886, 'loss': 0.4304731500085963, 'time_step': 0.005314720924510513, 'init_value': -18.89316177368164, 'ave_value': -21.721947456983038, 'soft_opc': nan} step=12384




2022-04-20 15:34.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153334/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:34.47 [info     ] FQE_20220420153334: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00016333543977072073, 'time_algorithm_update': 0.005165836145711499, 'loss': 0.4383490263852616, 'time_step': 0.00540328303048777, 'init_value': -18.886539459228516, 'ave_value': -21.651952430781247, 'soft_opc': nan} step=12728




2022-04-20 15:34.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153334/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:34.49 [info     ] FQE_20220420153334: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00016257859939752624, 'time_algorithm_update': 0.004615780226019926, 'loss': 0.436601145869814, 'time_step': 0.004852320565733799, 'init_value': -19.1827449798584, 'ave_value': -21.97088762536883, 'soft_opc': nan} step=13072




2022-04-20 15:34.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153334/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:34.51 [info     ] FQE_20220420153334: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00016230275464612385, 'time_algorithm_update': 0.0051240297250969465, 'loss': 0.452694064417724, 'time_step': 0.005360095306884411, 'init_value': -19.467763900756836, 'ave_value': -22.296272307055425, 'soft_opc': nan} step=13416




2022-04-20 15:34.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153334/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:34.53 [info     ] FQE_20220420153334: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00016596289568169173, 'time_algorithm_update': 0.005207223947658096, 'loss': 0.44709133438571075, 'time_step': 0.0054467992727146594, 'init_value': -19.697296142578125, 'ave_value': -22.539267127611883, 'soft_opc': nan} step=13760




2022-04-20 15:34.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153334/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:34.55 [info     ] FQE_20220420153334: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00016420317250628804, 'time_algorithm_update': 0.005140647638675778, 'loss': 0.453102933876433, 'time_step': 0.0053779933341713835, 'init_value': -19.584585189819336, 'ave_value': -22.49650067905644, 'soft_opc': nan} step=14104




2022-04-20 15:34.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153334/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:34.57 [info     ] FQE_20220420153334: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00016507506370544434, 'time_algorithm_update': 0.005190025235331336, 'loss': 0.4460508761396848, 'time_step': 0.0054295596688292746, 'init_value': -19.75185203552246, 'ave_value': -22.66621955257656, 'soft_opc': nan} step=14448




2022-04-20 15:34.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153334/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:34.59 [info     ] FQE_20220420153334: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.0001649613990340122, 'time_algorithm_update': 0.0047773316849109735, 'loss': 0.44140039804543174, 'time_step': 0.005015418280002682, 'init_value': -19.64680290222168, 'ave_value': -22.634242844338075, 'soft_opc': nan} step=14792




2022-04-20 15:34.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153334/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:35.01 [info     ] FQE_20220420153334: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00016636418741802837, 'time_algorithm_update': 0.00523329543512921, 'loss': 0.4439780849930939, 'time_step': 0.005472974721775498, 'init_value': -19.14394760131836, 'ave_value': -22.298240976757263, 'soft_opc': nan} step=15136




2022-04-20 15:35.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153334/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:35.03 [info     ] FQE_20220420153334: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00016521159992661586, 'time_algorithm_update': 0.005166919425476429, 'loss': 0.4326088586064099, 'time_step': 0.0054062792035036306, 'init_value': -19.178173065185547, 'ave_value': -22.534858301919854, 'soft_opc': nan} step=15480




2022-04-20 15:35.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153334/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:35.05 [info     ] FQE_20220420153334: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.0001647451589273852, 'time_algorithm_update': 0.005154252745384394, 'loss': 0.4309156818022995, 'time_step': 0.005394604316977567, 'init_value': -19.451217651367188, 'ave_value': -22.736459686311782, 'soft_opc': nan} step=15824




2022-04-20 15:35.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153334/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:35.06 [info     ] FQE_20220420153334: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00016315108121827592, 'time_algorithm_update': 0.004695112622061441, 'loss': 0.4307422609837336, 'time_step': 0.00493083443752555, 'init_value': -19.471752166748047, 'ave_value': -22.89256951323037, 'soft_opc': nan} step=16168




2022-04-20 15:35.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153334/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:35.08 [info     ] FQE_20220420153334: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00016436743181805278, 'time_algorithm_update': 0.005089779232823571, 'loss': 0.4266237342177869, 'time_step': 0.005329958921255067, 'init_value': -19.479406356811523, 'ave_value': -22.968888051184273, 'soft_opc': nan} step=16512




2022-04-20 15:35.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153334/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:35.10 [info     ] FQE_20220420153334: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.0001669131046117738, 'time_algorithm_update': 0.005123408727867659, 'loss': 0.4330655939573812, 'time_step': 0.005365623984225961, 'init_value': -20.009540557861328, 'ave_value': -23.58857382076461, 'soft_opc': nan} step=16856




2022-04-20 15:35.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153334/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:35.12 [info     ] FQE_20220420153334: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00017085463501686273, 'time_algorithm_update': 0.005117637473483419, 'loss': 0.4408096813131124, 'time_step': 0.005364169908124347, 'init_value': -19.993314743041992, 'ave_value': -23.63197533090747, 'soft_opc': nan} step=17200




2022-04-20 15:35.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153334/model_17200.pt
search iteration:  2
using hyper params:  [0.002236147015763379, 0.000326424809205375, 1.8201579745027705e-05, 3]
2022-04-20 15:35.12 [debug    ] RoundIterator is selected.
2022-04-20 15:35.12 [info     ] Directory is created at d3rlpy_logs/TD3PlusBC_20220420153512
2022-04-20 15:35.12 [debug    ] Fitting scaler...              scaler=standard
2022-04-20 15:35.13 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 15:35.13 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 15:35.13 [info     ] Parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153512/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.00223614701576

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:35.16 [info     ] TD3PlusBC_20220420153512: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.00035174896842554996, 'time_algorithm_update': 0.008438294393974439, 'critic_loss': 14.356809413223935, 'actor_loss': 2.6417757387746845, 'time_step': 0.00886983062788757, 'td_error': 0.8747323947205649, 'init_value': -3.8693366050720215, 'ave_value': -2.155285839904813} step=342
2022-04-20 15:35.16 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153512/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:35.20 [info     ] TD3PlusBC_20220420153512: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.000347209952728093, 'time_algorithm_update': 0.008917973752607378, 'critic_loss': 1.9959936312764708, 'actor_loss': 2.415673108128776, 'time_step': 0.009340660613879823, 'td_error': 0.8914880039632421, 'init_value': -5.8719682693481445, 'ave_value': -3.369154015689006} step=684
2022-04-20 15:35.20 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153512/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:35.24 [info     ] TD3PlusBC_20220420153512: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.0003483630063240988, 'time_algorithm_update': 0.008872601023891516, 'critic_loss': 2.180099933990958, 'actor_loss': 2.3697589126943845, 'time_step': 0.00929274963356598, 'td_error': 0.9482237885845081, 'init_value': -8.057183265686035, 'ave_value': -4.630839557319686} step=1026
2022-04-20 15:35.24 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153512/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:35.27 [info     ] TD3PlusBC_20220420153512: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.00035210589916385407, 'time_algorithm_update': 0.008647089813187805, 'critic_loss': 2.453348587479508, 'actor_loss': 2.35756876733568, 'time_step': 0.009072637697409468, 'td_error': 1.0403733319648456, 'init_value': -10.154902458190918, 'ave_value': -5.861942854049607} step=1368
2022-04-20 15:35.27 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153512/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:35.31 [info     ] TD3PlusBC_20220420153512: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.0003487659476653874, 'time_algorithm_update': 0.008886462066605774, 'critic_loss': 2.8343709844943374, 'actor_loss': 2.346762056239167, 'time_step': 0.009309891371699104, 'td_error': 1.1677486168653426, 'init_value': -12.388049125671387, 'ave_value': -7.148042775462717} step=1710
2022-04-20 15:35.31 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153512/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:35.35 [info     ] TD3PlusBC_20220420153512: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.00035342696117378813, 'time_algorithm_update': 0.008562072675827651, 'critic_loss': 3.285574461981567, 'actor_loss': 2.3377104488729734, 'time_step': 0.00899022294763933, 'td_error': 1.3262556870829096, 'init_value': -14.583842277526855, 'ave_value': -8.421292111236552} step=2052
2022-04-20 15:35.35 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153512/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:35.38 [info     ] TD3PlusBC_20220420153512: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.0003536319174961737, 'time_algorithm_update': 0.008832107510483056, 'critic_loss': 3.774164909856361, 'actor_loss': 2.3339030129170557, 'time_step': 0.00926198945407979, 'td_error': 1.5121409505039094, 'init_value': -16.759187698364258, 'ave_value': -9.658658837069408} step=2394
2022-04-20 15:35.38 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153512/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:35.42 [info     ] TD3PlusBC_20220420153512: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.00035122612066436235, 'time_algorithm_update': 0.008938492390147428, 'critic_loss': 4.269202816730354, 'actor_loss': 2.3313623492480717, 'time_step': 0.009365332754034745, 'td_error': 1.7223685962141824, 'init_value': -18.713672637939453, 'ave_value': -10.765264720558609} step=2736
2022-04-20 15:35.42 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153512/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:35.46 [info     ] TD3PlusBC_20220420153512: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.00034827725929126405, 'time_algorithm_update': 0.00849872304682146, 'critic_loss': 4.794995822927408, 'actor_loss': 2.3242090124832955, 'time_step': 0.008921409908093904, 'td_error': 1.9690981683990056, 'init_value': -21.06081771850586, 'ave_value': -12.127334077410307} step=3078
2022-04-20 15:35.46 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153512/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:35.49 [info     ] TD3PlusBC_20220420153512: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.00035333006005538136, 'time_algorithm_update': 0.00895717966626262, 'critic_loss': 5.289771223800225, 'actor_loss': 2.326727249468976, 'time_step': 0.00938805571773596, 'td_error': 2.2257434404903584, 'init_value': -23.021108627319336, 'ave_value': -13.226370288312964} step=3420
2022-04-20 15:35.49 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153512/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:35.53 [info     ] TD3PlusBC_20220420153512: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.0003501372030603955, 'time_algorithm_update': 0.008456004990471734, 'critic_loss': 5.812468083978397, 'actor_loss': 2.3241570191076626, 'time_step': 0.008880123060349136, 'td_error': 2.5153727152964067, 'init_value': -25.03714370727539, 'ave_value': -14.37124240667899} step=3762
2022-04-20 15:35.53 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153512/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:35.57 [info     ] TD3PlusBC_20220420153512: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.0003563507258543494, 'time_algorithm_update': 0.009226711172806589, 'critic_loss': 6.291905522172214, 'actor_loss': 2.325141445237991, 'time_step': 0.009661476514492815, 'td_error': 2.827173795332772, 'init_value': -27.01275634765625, 'ave_value': -15.543584252669191} step=4104
2022-04-20 15:35.57 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153512/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:36.01 [info     ] TD3PlusBC_20220420153512: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.00035198390135291027, 'time_algorithm_update': 0.009014021583467896, 'critic_loss': 6.801792648277785, 'actor_loss': 2.321029131872612, 'time_step': 0.00945031782339888, 'td_error': 3.1431365109565927, 'init_value': -28.955514907836914, 'ave_value': -16.644882724348513} step=4446
2022-04-20 15:36.01 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153512/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:36.04 [info     ] TD3PlusBC_20220420153512: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.00035334121414095336, 'time_algorithm_update': 0.00858345505786918, 'critic_loss': 7.264532267001638, 'actor_loss': 2.3231382579134223, 'time_step': 0.009016711809481794, 'td_error': 3.493928582343013, 'init_value': -30.8087158203125, 'ave_value': -17.709771453759565} step=4788
2022-04-20 15:36.04 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153512/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:36.08 [info     ] TD3PlusBC_20220420153512: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.00035953800580654926, 'time_algorithm_update': 0.009044433197779962, 'critic_loss': 7.747972034222898, 'actor_loss': 2.319448929781105, 'time_step': 0.009483112229241265, 'td_error': 3.838141560138792, 'init_value': -32.768150329589844, 'ave_value': -18.816031215924365} step=5130
2022-04-20 15:36.08 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153512/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:36.12 [info     ] TD3PlusBC_20220420153512: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.0003562670702125594, 'time_algorithm_update': 0.00867723930648893, 'critic_loss': 8.177121195528242, 'actor_loss': 2.320101417296114, 'time_step': 0.009112382492823907, 'td_error': 4.185033148128634, 'init_value': -34.528900146484375, 'ave_value': -19.856127983398046} step=5472
2022-04-20 15:36.12 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153512/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:36.16 [info     ] TD3PlusBC_20220420153512: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.0003577087357727408, 'time_algorithm_update': 0.009133756509301259, 'critic_loss': 8.67623983523999, 'actor_loss': 2.322940967236346, 'time_step': 0.009572297508953607, 'td_error': 4.552194692765571, 'init_value': -36.248313903808594, 'ave_value': -20.83718454261214} step=5814
2022-04-20 15:36.16 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153512/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:36.19 [info     ] TD3PlusBC_20220420153512: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.00035046833997581437, 'time_algorithm_update': 0.009094579875120643, 'critic_loss': 9.12036198081329, 'actor_loss': 2.317916067023026, 'time_step': 0.009523558337786044, 'td_error': 4.927927305342568, 'init_value': -37.91626739501953, 'ave_value': -21.812393117446266} step=6156
2022-04-20 15:36.19 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153512/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:36.23 [info     ] TD3PlusBC_20220420153512: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.00035801129034388135, 'time_algorithm_update': 0.008696526811833968, 'critic_loss': 9.58022774311534, 'actor_loss': 2.3173448607238414, 'time_step': 0.009136082833273369, 'td_error': 5.316846392230085, 'init_value': -39.53913116455078, 'ave_value': -22.745528542609247} step=6498
2022-04-20 15:36.23 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153512/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:36.27 [info     ] TD3PlusBC_20220420153512: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.00035591362512599656, 'time_algorithm_update': 0.00896345523365757, 'critic_loss': 10.020910959843306, 'actor_loss': 2.318527054368404, 'time_step': 0.009398358607152749, 'td_error': 5.69478220609513, 'init_value': -41.139503479003906, 'ave_value': -23.67877912157534} step=6840
2022-04-20 15:36.27 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153512/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:36.30 [info     ] TD3PlusBC_20220420153512: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.00034977260388826067, 'time_algorithm_update': 0.008542636681718436, 'critic_loss': 10.468472642508166, 'actor_loss': 2.3186517166115386, 'time_step': 0.008973591508921127, 'td_error': 6.093591878930036, 'init_value': -42.775577545166016, 'ave_value': -24.63932759808049} step=7182
2022-04-20 15:36.30 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153512/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:36.34 [info     ] TD3PlusBC_20220420153512: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.0003582720170941269, 'time_algorithm_update': 0.009055152274014657, 'critic_loss': 10.876815580136595, 'actor_loss': 2.3171561363844844, 'time_step': 0.009486669685408386, 'td_error': 6.490328492735568, 'init_value': -44.300533294677734, 'ave_value': -25.514750018623356} step=7524
2022-04-20 15:36.34 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153512/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:36.38 [info     ] TD3PlusBC_20220420153512: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.0003541917131658186, 'time_algorithm_update': 0.008991752451623392, 'critic_loss': 11.277465851334801, 'actor_loss': 2.3203706713447794, 'time_step': 0.009419859501353482, 'td_error': 6.867581392807096, 'init_value': -45.653018951416016, 'ave_value': -26.277963189082513} step=7866
2022-04-20 15:36.38 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153512/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:36.42 [info     ] TD3PlusBC_20220420153512: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.00035128119396187407, 'time_algorithm_update': 0.008604261610243056, 'critic_loss': 11.631288799974653, 'actor_loss': 2.3175182635324045, 'time_step': 0.009033343248199998, 'td_error': 7.238398030843218, 'init_value': -47.07406234741211, 'ave_value': -27.139382455297202} step=8208
2022-04-20 15:36.42 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153512/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:36.45 [info     ] TD3PlusBC_20220420153512: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.0003559003796493798, 'time_algorithm_update': 0.008999038160892954, 'critic_loss': 12.05881991233045, 'actor_loss': 2.3170572149823285, 'time_step': 0.009428777889898645, 'td_error': 7.669209639109877, 'init_value': -48.51952362060547, 'ave_value': -27.93400038092817} step=8550
2022-04-20 15:36.45 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153512/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:36.49 [info     ] TD3PlusBC_20220420153512: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.00035811028285333285, 'time_algorithm_update': 0.008730124311837537, 'critic_loss': 12.500593295919964, 'actor_loss': 2.319916354285346, 'time_step': 0.009165693444815295, 'td_error': 8.037694451992035, 'init_value': -49.760459899902344, 'ave_value': -28.671940302582048} step=8892
2022-04-20 15:36.49 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153512/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:36.53 [info     ] TD3PlusBC_20220420153512: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.0003617318750124926, 'time_algorithm_update': 0.008951523847747268, 'critic_loss': 12.894874245102642, 'actor_loss': 2.3187777661440667, 'time_step': 0.009394990770440353, 'td_error': 8.447440125190845, 'init_value': -51.211036682128906, 'ave_value': -29.53817810558974} step=9234
2022-04-20 15:36.53 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153512/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:36.57 [info     ] TD3PlusBC_20220420153512: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.0003570109082941423, 'time_algorithm_update': 0.008979303097864341, 'critic_loss': 13.264059222929658, 'actor_loss': 2.31456254379094, 'time_step': 0.009414914058662995, 'td_error': 8.848111622546767, 'init_value': -52.536216735839844, 'ave_value': -30.288159703787937} step=9576
2022-04-20 15:36.57 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153512/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:37.00 [info     ] TD3PlusBC_20220420153512: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.00036006015643738864, 'time_algorithm_update': 0.008707742942006965, 'critic_loss': 13.661658794210668, 'actor_loss': 2.3201831655892713, 'time_step': 0.009140382036131028, 'td_error': 9.236921657867667, 'init_value': -53.8315544128418, 'ave_value': -31.033000317566383} step=9918
2022-04-20 15:37.00 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153512/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:37.04 [info     ] TD3PlusBC_20220420153512: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.0003528678626344915, 'time_algorithm_update': 0.00892189720220733, 'critic_loss': 14.056697415678125, 'actor_loss': 2.3187014554676257, 'time_step': 0.009352619885004054, 'td_error': 9.677300356386038, 'init_value': -55.131858825683594, 'ave_value': -31.823269637970856} step=10260
2022-04-20 15:37.04 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153512/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:37.08 [info     ] TD3PlusBC_20220420153512: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.0003544154920076069, 'time_algorithm_update': 0.008563505975823654, 'critic_loss': 14.42672346000783, 'actor_loss': 2.321705012293587, 'time_step': 0.008993619366696006, 'td_error': 10.057433124319846, 'init_value': -56.306251525878906, 'ave_value': -32.48993166684391} step=10602
2022-04-20 15:37.08 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153512/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:37.11 [info     ] TD3PlusBC_20220420153512: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.00035351898237975717, 'time_algorithm_update': 0.008980460334242437, 'critic_loss': 14.747994229110361, 'actor_loss': 2.318090858515243, 'time_step': 0.009409993015534697, 'td_error': 10.399397975809688, 'init_value': -57.4406623840332, 'ave_value': -33.1490593434215} step=10944
2022-04-20 15:37.11 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153512/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:37.15 [info     ] TD3PlusBC_20220420153512: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.0003544140977469104, 'time_algorithm_update': 0.008931714888901739, 'critic_loss': 15.20932023845918, 'actor_loss': 2.317040807322452, 'time_step': 0.009361504114162155, 'td_error': 10.803175161155046, 'init_value': -58.55060958862305, 'ave_value': -33.797267794936246} step=11286
2022-04-20 15:37.15 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153512/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:37.19 [info     ] TD3PlusBC_20220420153512: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.0003560112233747516, 'time_algorithm_update': 0.008592788936101903, 'critic_loss': 15.58771680461036, 'actor_loss': 2.3200520674387612, 'time_step': 0.00902229024652849, 'td_error': 11.144191246631582, 'init_value': -59.693397521972656, 'ave_value': -34.44759755239453} step=11628
2022-04-20 15:37.19 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153512/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:37.23 [info     ] TD3PlusBC_20220420153512: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.0003550763715777481, 'time_algorithm_update': 0.009107418227614019, 'critic_loss': 15.954409523316992, 'actor_loss': 2.317138676057782, 'time_step': 0.009537843235752038, 'td_error': 11.586456713289959, 'init_value': -60.89323043823242, 'ave_value': -35.181201220816} step=11970
2022-04-20 15:37.23 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153512/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:37.26 [info     ] TD3PlusBC_20220420153512: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.00035324779867428784, 'time_algorithm_update': 0.0087107169000726, 'critic_loss': 16.27878052240227, 'actor_loss': 2.3179403494673165, 'time_step': 0.009140103881122076, 'td_error': 11.852133116139255, 'init_value': -61.61322784423828, 'ave_value': -35.603379552708184} step=12312
2022-04-20 15:37.26 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153512/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:37.30 [info     ] TD3PlusBC_20220420153512: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.0003577240726404023, 'time_algorithm_update': 0.00904870102977195, 'critic_loss': 16.598799909764562, 'actor_loss': 2.3202778777183846, 'time_step': 0.009485507569117852, 'td_error': 12.246852961987425, 'init_value': -62.881797790527344, 'ave_value': -36.37478854294237} step=12654
2022-04-20 15:37.30 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153512/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:37.34 [info     ] TD3PlusBC_20220420153512: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.00035713360323543435, 'time_algorithm_update': 0.00912387538374516, 'critic_loss': 16.994686052116037, 'actor_loss': 2.323805588727806, 'time_step': 0.009556598133511014, 'td_error': 12.579628000315598, 'init_value': -63.764617919921875, 'ave_value': -36.827075189749486} step=12996
2022-04-20 15:37.34 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153512/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:37.37 [info     ] TD3PlusBC_20220420153512: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.0003511828985827708, 'time_algorithm_update': 0.00854063103770652, 'critic_loss': 17.374996027054145, 'actor_loss': 2.320517556709156, 'time_step': 0.008972306697689302, 'td_error': 12.981810686298287, 'init_value': -64.82905578613281, 'ave_value': -37.49767352189491} step=13338
2022-04-20 15:37.37 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153512/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:37.41 [info     ] TD3PlusBC_20220420153512: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.0003536723510563722, 'time_algorithm_update': 0.009066212944119995, 'critic_loss': 17.740700210744176, 'actor_loss': 2.319282753425732, 'time_step': 0.009495553217436137, 'td_error': 13.3672011074624, 'init_value': -65.90770721435547, 'ave_value': -38.158601963592126} step=13680
2022-04-20 15:37.41 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153512/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:37.45 [info     ] TD3PlusBC_20220420153512: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.00035464136224043995, 'time_algorithm_update': 0.008782609861496597, 'critic_loss': 17.987254494114925, 'actor_loss': 2.3230661690583703, 'time_step': 0.009216251429061444, 'td_error': 13.662330658033879, 'init_value': -66.71212005615234, 'ave_value': -38.605335344454225} step=14022
2022-04-20 15:37.45 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153512/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:37.49 [info     ] TD3PlusBC_20220420153512: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.0003552297402543631, 'time_algorithm_update': 0.009059082694918091, 'critic_loss': 18.445271713691845, 'actor_loss': 2.3173492317311246, 'time_step': 0.009493040062530695, 'td_error': 13.978802737147905, 'init_value': -67.53013610839844, 'ave_value': -39.13981327923468} step=14364
2022-04-20 15:37.49 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153512/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:37.52 [info     ] TD3PlusBC_20220420153512: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.00035254369702255515, 'time_algorithm_update': 0.008944055490326463, 'critic_loss': 18.745711576868917, 'actor_loss': 2.3211739997417604, 'time_step': 0.00937046223913717, 'td_error': 14.288273937172908, 'init_value': -68.27457427978516, 'ave_value': -39.56229682447649} step=14706
2022-04-20 15:37.52 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153512/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:37.56 [info     ] TD3PlusBC_20220420153512: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.0003558571575677883, 'time_algorithm_update': 0.008712765069035759, 'critic_loss': 19.058979493832727, 'actor_loss': 2.3220580962666295, 'time_step': 0.009147283626578705, 'td_error': 14.64227500789351, 'init_value': -69.28328704833984, 'ave_value': -40.14389817066122} step=15048
2022-04-20 15:37.56 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153512/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:38.00 [info     ] TD3PlusBC_20220420153512: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.00035755048718368797, 'time_algorithm_update': 0.009053929507383826, 'critic_loss': 19.503430878209787, 'actor_loss': 2.32322541314956, 'time_step': 0.009490128846196403, 'td_error': 14.956886711074292, 'init_value': -70.16619873046875, 'ave_value': -40.67438303798278} step=15390
2022-04-20 15:38.00 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153512/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:38.03 [info     ] TD3PlusBC_20220420153512: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.0003558697059140568, 'time_algorithm_update': 0.008868123355664705, 'critic_loss': 19.810333155052007, 'actor_loss': 2.3220119030154938, 'time_step': 0.009304866456148917, 'td_error': 15.270450680487436, 'init_value': -70.79344940185547, 'ave_value': -41.054019310713684} step=15732
2022-04-20 15:38.03 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153512/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:38.07 [info     ] TD3PlusBC_20220420153512: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.0003549697106344658, 'time_algorithm_update': 0.009069716024119951, 'critic_loss': 20.185559395461055, 'actor_loss': 2.3265349084173725, 'time_step': 0.009500073410614191, 'td_error': 15.555189230087395, 'init_value': -71.62496185302734, 'ave_value': -41.55102608772297} step=16074
2022-04-20 15:38.07 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153512/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:38.11 [info     ] TD3PlusBC_20220420153512: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.0003286683768556829, 'time_algorithm_update': 0.008693690885577286, 'critic_loss': 20.57245014843188, 'actor_loss': 2.3257367666701825, 'time_step': 0.009091968424836097, 'td_error': 15.823649028316966, 'init_value': -72.24647521972656, 'ave_value': -41.88967669975163} step=16416
2022-04-20 15:38.11 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153512/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:38.14 [info     ] TD3PlusBC_20220420153512: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.0003060137319285967, 'time_algorithm_update': 0.00803048987137644, 'critic_loss': 20.922623860208613, 'actor_loss': 2.324611953824584, 'time_step': 0.008403776681911179, 'td_error': 16.116896228938728, 'init_value': -73.11408233642578, 'ave_value': -42.398369990068296} step=16758
2022-04-20 15:38.14 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153512/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:38.18 [info     ] TD3PlusBC_20220420153512: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.0003488238094842922, 'time_algorithm_update': 0.00903438266954924, 'critic_loss': 21.297682459591424, 'actor_loss': 2.3235118110277497, 'time_step': 0.009459434894093295, 'td_error': 16.454894412781954, 'init_value': -74.01480865478516, 'ave_value': -42.998720031872494} step=17100
2022-04-20 15:38.18 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420153512/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 102 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.28189108e-01 

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 15:38.19 [info     ] FQE_20220420153818: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.0001632046986775226, 'time_algorithm_update': 0.005211363355797458, 'loss': 0.007899302515167609, 'time_step': 0.005447493978293545, 'init_value': -0.37000343203544617, 'ave_value': -0.2928759809331717, 'soft_opc': nan} step=166




2022-04-20 15:38.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153818/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:38.20 [info     ] FQE_20220420153818: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.0001644844032195677, 'time_algorithm_update': 0.005045833357845445, 'loss': 0.005733438681934522, 'time_step': 0.005284589457224651, 'init_value': -0.4978027045726776, 'ave_value': -0.36198239967327667, 'soft_opc': nan} step=332




2022-04-20 15:38.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153818/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:38.21 [info     ] FQE_20220420153818: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00016249806047922158, 'time_algorithm_update': 0.0048028489193284365, 'loss': 0.00520153079709285, 'time_step': 0.005036881171077131, 'init_value': -0.5499684810638428, 'ave_value': -0.3804317536877116, 'soft_opc': nan} step=498




2022-04-20 15:38.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153818/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:38.22 [info     ] FQE_20220420153818: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00016285999711737576, 'time_algorithm_update': 0.004879023655351386, 'loss': 0.005049408848274006, 'time_step': 0.005119455866066806, 'init_value': -0.6177024245262146, 'ave_value': -0.44012070461386094, 'soft_opc': nan} step=664




2022-04-20 15:38.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153818/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:38.23 [info     ] FQE_20220420153818: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00016127293368419968, 'time_algorithm_update': 0.005247170666614211, 'loss': 0.004606932118333068, 'time_step': 0.0054834348609648555, 'init_value': -0.5975911617279053, 'ave_value': -0.40698983986559956, 'soft_opc': nan} step=830




2022-04-20 15:38.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153818/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:38.24 [info     ] FQE_20220420153818: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00016380792640777956, 'time_algorithm_update': 0.005146958741797022, 'loss': 0.004208997175299439, 'time_step': 0.005387635116117546, 'init_value': -0.6043626070022583, 'ave_value': -0.41780825714236713, 'soft_opc': nan} step=996




2022-04-20 15:38.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153818/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:38.25 [info     ] FQE_20220420153818: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00015814476702586715, 'time_algorithm_update': 0.005106798137526914, 'loss': 0.004337878163528056, 'time_step': 0.005336645137832825, 'init_value': -0.6077103614807129, 'ave_value': -0.4240732938000882, 'soft_opc': nan} step=1162




2022-04-20 15:38.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153818/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:38.26 [info     ] FQE_20220420153818: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00016507183212831797, 'time_algorithm_update': 0.0050966696566846, 'loss': 0.004228880659800516, 'time_step': 0.005338006708995405, 'init_value': -0.6383702754974365, 'ave_value': -0.4651270920150653, 'soft_opc': nan} step=1328




2022-04-20 15:38.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153818/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:38.27 [info     ] FQE_20220420153818: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00016632281154035087, 'time_algorithm_update': 0.005108441214963615, 'loss': 0.0039901917867631795, 'time_step': 0.00535128059157406, 'init_value': -0.5995718240737915, 'ave_value': -0.45168505275773035, 'soft_opc': nan} step=1494




2022-04-20 15:38.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153818/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:38.28 [info     ] FQE_20220420153818: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00016733680862978282, 'time_algorithm_update': 0.005229221769126065, 'loss': 0.004285014980663377, 'time_step': 0.0054728553955813485, 'init_value': -0.600126326084137, 'ave_value': -0.47417989972316, 'soft_opc': nan} step=1660




2022-04-20 15:38.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153818/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:38.29 [info     ] FQE_20220420153818: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00016627110630632882, 'time_algorithm_update': 0.005220001002392137, 'loss': 0.004608115069119327, 'time_step': 0.00546159083584705, 'init_value': -0.6168867349624634, 'ave_value': -0.5111236903491337, 'soft_opc': nan} step=1826




2022-04-20 15:38.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153818/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:38.30 [info     ] FQE_20220420153818: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.0001652441829083914, 'time_algorithm_update': 0.004937905863106969, 'loss': 0.004737885173758021, 'time_step': 0.005179270204291286, 'init_value': -0.6370632648468018, 'ave_value': -0.5465653589104411, 'soft_opc': nan} step=1992




2022-04-20 15:38.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153818/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:38.31 [info     ] FQE_20220420153818: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00016608726547425053, 'time_algorithm_update': 0.004788507898169828, 'loss': 0.005279864144584456, 'time_step': 0.005031676177518913, 'init_value': -0.6121261119842529, 'ave_value': -0.5409832475488787, 'soft_opc': nan} step=2158




2022-04-20 15:38.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153818/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:38.32 [info     ] FQE_20220420153818: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00016330667288906603, 'time_algorithm_update': 0.0051829556384718565, 'loss': 0.005462883694619165, 'time_step': 0.005419854658195771, 'init_value': -0.6307702660560608, 'ave_value': -0.5801413851999887, 'soft_opc': nan} step=2324




2022-04-20 15:38.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153818/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:38.33 [info     ] FQE_20220420153818: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.0001653447208634342, 'time_algorithm_update': 0.005242547356938741, 'loss': 0.005817447745532964, 'time_step': 0.0054864064756646215, 'init_value': -0.6907503604888916, 'ave_value': -0.6478459525031147, 'soft_opc': nan} step=2490




2022-04-20 15:38.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153818/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:38.34 [info     ] FQE_20220420153818: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00016615620578627988, 'time_algorithm_update': 0.005215959376599415, 'loss': 0.006572694251179157, 'time_step': 0.005456858370677534, 'init_value': -0.6764888763427734, 'ave_value': -0.646496514329133, 'soft_opc': nan} step=2656




2022-04-20 15:38.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153818/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:38.35 [info     ] FQE_20220420153818: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.0001654768564614905, 'time_algorithm_update': 0.00524965682661677, 'loss': 0.006845022030651614, 'time_step': 0.00549212995781956, 'init_value': -0.7336832284927368, 'ave_value': -0.7158598921295289, 'soft_opc': nan} step=2822




2022-04-20 15:38.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153818/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:38.36 [info     ] FQE_20220420153818: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00016254689320024238, 'time_algorithm_update': 0.005157605711236058, 'loss': 0.007478610938555191, 'time_step': 0.005399196980947472, 'init_value': -0.7998586893081665, 'ave_value': -0.7807268105602513, 'soft_opc': nan} step=2988




2022-04-20 15:38.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153818/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:38.37 [info     ] FQE_20220420153818: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00016178998602441996, 'time_algorithm_update': 0.005053212843745588, 'loss': 0.007673338419583862, 'time_step': 0.005286690700485046, 'init_value': -0.7770225405693054, 'ave_value': -0.7517446303677217, 'soft_opc': nan} step=3154




2022-04-20 15:38.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153818/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:38.38 [info     ] FQE_20220420153818: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.000161334692713726, 'time_algorithm_update': 0.005244766373232186, 'loss': 0.007868450233174196, 'time_step': 0.005480682993509683, 'init_value': -0.7840864658355713, 'ave_value': -0.7428872913113787, 'soft_opc': nan} step=3320




2022-04-20 15:38.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153818/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:38.39 [info     ] FQE_20220420153818: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00016511779233633754, 'time_algorithm_update': 0.004843336990080684, 'loss': 0.008481097861027614, 'time_step': 0.005082312836704484, 'init_value': -0.8972861766815186, 'ave_value': -0.8538730167611795, 'soft_opc': nan} step=3486




2022-04-20 15:38.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153818/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:38.39 [info     ] FQE_20220420153818: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00016516949757035957, 'time_algorithm_update': 0.00473306552473321, 'loss': 0.008797436467293336, 'time_step': 0.004975950861551675, 'init_value': -0.8459295034408569, 'ave_value': -0.8010297114931483, 'soft_opc': nan} step=3652




2022-04-20 15:38.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153818/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:38.40 [info     ] FQE_20220420153818: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00016787971358701406, 'time_algorithm_update': 0.005262254232383636, 'loss': 0.00811274029968127, 'time_step': 0.005507101495581937, 'init_value': -0.8650480508804321, 'ave_value': -0.8351837718708289, 'soft_opc': nan} step=3818




2022-04-20 15:38.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153818/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:38.41 [info     ] FQE_20220420153818: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.000165611864572548, 'time_algorithm_update': 0.005292895328567688, 'loss': 0.009795174971138722, 'time_step': 0.005533030234187482, 'init_value': -0.9370871782302856, 'ave_value': -0.8743966595886427, 'soft_opc': nan} step=3984




2022-04-20 15:38.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153818/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:38.42 [info     ] FQE_20220420153818: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00016285856086087515, 'time_algorithm_update': 0.005296215953597103, 'loss': 0.010540450349483773, 'time_step': 0.005533512816371688, 'init_value': -0.9814079999923706, 'ave_value': -0.9161002449661929, 'soft_opc': nan} step=4150




2022-04-20 15:38.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153818/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:38.43 [info     ] FQE_20220420153818: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00016493538776075984, 'time_algorithm_update': 0.00533510690712067, 'loss': 0.010878419099622462, 'time_step': 0.005572556013084319, 'init_value': -1.0483248233795166, 'ave_value': -0.9743596475835689, 'soft_opc': nan} step=4316




2022-04-20 15:38.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153818/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:38.44 [info     ] FQE_20220420153818: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00016007509576268942, 'time_algorithm_update': 0.005204551191215056, 'loss': 0.010928769744596029, 'time_step': 0.005440331367124994, 'init_value': -1.118030309677124, 'ave_value': -1.01589882481709, 'soft_opc': nan} step=4482




2022-04-20 15:38.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153818/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:38.45 [info     ] FQE_20220420153818: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00016378494630376976, 'time_algorithm_update': 0.005298167826181434, 'loss': 0.011903961763453546, 'time_step': 0.005535739013947636, 'init_value': -1.1553289890289307, 'ave_value': -1.0414182053132168, 'soft_opc': nan} step=4648




2022-04-20 15:38.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153818/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:38.46 [info     ] FQE_20220420153818: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.000162137560097568, 'time_algorithm_update': 0.0050811480326824875, 'loss': 0.011508580375075654, 'time_step': 0.00531918887632439, 'init_value': -1.2322688102722168, 'ave_value': -1.0885639299286176, 'soft_opc': nan} step=4814




2022-04-20 15:38.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153818/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:38.47 [info     ] FQE_20220420153818: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00016521689403487975, 'time_algorithm_update': 0.004927284746284944, 'loss': 0.012396807486404586, 'time_step': 0.005168417850172663, 'init_value': -1.2748609781265259, 'ave_value': -1.1210493505340875, 'soft_opc': nan} step=4980




2022-04-20 15:38.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153818/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:38.48 [info     ] FQE_20220420153818: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00016270057264580783, 'time_algorithm_update': 0.004692975297031632, 'loss': 0.012656530768593705, 'time_step': 0.004928909152387136, 'init_value': -1.3921492099761963, 'ave_value': -1.2033754496952636, 'soft_opc': nan} step=5146




2022-04-20 15:38.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153818/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:38.49 [info     ] FQE_20220420153818: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00016689156911459314, 'time_algorithm_update': 0.005189915737473821, 'loss': 0.013245545819272703, 'time_step': 0.0054284951773034525, 'init_value': -1.4568631649017334, 'ave_value': -1.246100202007303, 'soft_opc': nan} step=5312




2022-04-20 15:38.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153818/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:38.50 [info     ] FQE_20220420153818: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.0001661317894257695, 'time_algorithm_update': 0.005021740155047681, 'loss': 0.013958476120549962, 'time_step': 0.0052623375352606715, 'init_value': -1.5191154479980469, 'ave_value': -1.3022736038706424, 'soft_opc': nan} step=5478




2022-04-20 15:38.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153818/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:38.51 [info     ] FQE_20220420153818: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00016563771718955902, 'time_algorithm_update': 0.005055509417890066, 'loss': 0.014299847310442612, 'time_step': 0.005298999418695289, 'init_value': -1.5970741510391235, 'ave_value': -1.3296613974482039, 'soft_opc': nan} step=5644




2022-04-20 15:38.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153818/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:38.52 [info     ] FQE_20220420153818: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.0001667048557695136, 'time_algorithm_update': 0.005138160234474274, 'loss': 0.015846665029228855, 'time_step': 0.0053798434246017275, 'init_value': -1.717097520828247, 'ave_value': -1.413038270547986, 'soft_opc': nan} step=5810




2022-04-20 15:38.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153818/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:38.53 [info     ] FQE_20220420153818: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.0001645677060966032, 'time_algorithm_update': 0.005001817841127694, 'loss': 0.015812698828830688, 'time_step': 0.005245304969419916, 'init_value': -1.7723397016525269, 'ave_value': -1.4449356864970009, 'soft_opc': nan} step=5976




2022-04-20 15:38.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153818/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:38.54 [info     ] FQE_20220420153818: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00015949772064944348, 'time_algorithm_update': 0.0050927458039249284, 'loss': 0.01701636338418529, 'time_step': 0.0053242933319275635, 'init_value': -1.925922155380249, 'ave_value': -1.5710358167553806, 'soft_opc': nan} step=6142




2022-04-20 15:38.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153818/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:38.55 [info     ] FQE_20220420153818: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00016220506415309677, 'time_algorithm_update': 0.005103296544178423, 'loss': 0.017381852984299367, 'time_step': 0.005338710474680705, 'init_value': -1.9317985773086548, 'ave_value': -1.555213936891088, 'soft_opc': nan} step=6308




2022-04-20 15:38.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153818/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:38.56 [info     ] FQE_20220420153818: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.0001663285565663533, 'time_algorithm_update': 0.004942372620823872, 'loss': 0.017481721725864673, 'time_step': 0.005182549177882183, 'init_value': -1.97981595993042, 'ave_value': -1.5804799095156963, 'soft_opc': nan} step=6474




2022-04-20 15:38.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153818/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:38.57 [info     ] FQE_20220420153818: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00015792789229427474, 'time_algorithm_update': 0.004366969487753259, 'loss': 0.017782808348104494, 'time_step': 0.004597432642097932, 'init_value': -2.018404006958008, 'ave_value': -1.6105977495758887, 'soft_opc': nan} step=6640




2022-04-20 15:38.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153818/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:38.58 [info     ] FQE_20220420153818: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00016346609736063393, 'time_algorithm_update': 0.005043915955417128, 'loss': 0.018756489149513597, 'time_step': 0.005279930241136666, 'init_value': -2.0875542163848877, 'ave_value': -1.6466792234056, 'soft_opc': nan} step=6806




2022-04-20 15:38.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153818/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:38.59 [info     ] FQE_20220420153818: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.0001638107989207808, 'time_algorithm_update': 0.005194809063371405, 'loss': 0.019154728670093983, 'time_step': 0.005432755114084266, 'init_value': -2.1852779388427734, 'ave_value': -1.7281078902668916, 'soft_opc': nan} step=6972




2022-04-20 15:38.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153818/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:39.00 [info     ] FQE_20220420153818: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00016648798103792122, 'time_algorithm_update': 0.0050446498824889404, 'loss': 0.019616776311500782, 'time_step': 0.005286241152200354, 'init_value': -2.2375552654266357, 'ave_value': -1.7088837320740158, 'soft_opc': nan} step=7138




2022-04-20 15:39.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153818/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:39.01 [info     ] FQE_20220420153818: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00016650952488543038, 'time_algorithm_update': 0.005112784454621464, 'loss': 0.020121279693663074, 'time_step': 0.005353383271090956, 'init_value': -2.2977383136749268, 'ave_value': -1.7646317041076316, 'soft_opc': nan} step=7304




2022-04-20 15:39.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153818/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:39.02 [info     ] FQE_20220420153818: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.0001664635646774108, 'time_algorithm_update': 0.0049953647406704455, 'loss': 0.021004516898559023, 'time_step': 0.0052380877805043415, 'init_value': -2.3790221214294434, 'ave_value': -1.8067551211772805, 'soft_opc': nan} step=7470




2022-04-20 15:39.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153818/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:39.03 [info     ] FQE_20220420153818: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00016898132232298334, 'time_algorithm_update': 0.005062891776303211, 'loss': 0.021674585527269148, 'time_step': 0.00530484785516578, 'init_value': -2.4829564094543457, 'ave_value': -1.88084368762384, 'soft_opc': nan} step=7636




2022-04-20 15:39.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153818/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:39.04 [info     ] FQE_20220420153818: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.0001657698527876153, 'time_algorithm_update': 0.005106052720403096, 'loss': 0.02155805626499792, 'time_step': 0.005349107535488634, 'init_value': -2.495872974395752, 'ave_value': -1.8975228633534431, 'soft_opc': nan} step=7802




2022-04-20 15:39.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153818/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:39.05 [info     ] FQE_20220420153818: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.0001678481159440006, 'time_algorithm_update': 0.005130118634327349, 'loss': 0.022607934653495317, 'time_step': 0.005373068602688341, 'init_value': -2.5377326011657715, 'ave_value': -1.8992342073529087, 'soft_opc': nan} step=7968




2022-04-20 15:39.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153818/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:39.05 [info     ] FQE_20220420153818: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00015947043177593187, 'time_algorithm_update': 0.004120737673288368, 'loss': 0.02293061878720119, 'time_step': 0.004349655415638384, 'init_value': -2.6160011291503906, 'ave_value': -1.9343149288440005, 'soft_opc': nan} step=8134




2022-04-20 15:39.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153818/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:39.06 [info     ] FQE_20220420153818: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.0001674833067928452, 'time_algorithm_update': 0.0051048807350985975, 'loss': 0.023558208087728506, 'time_step': 0.005348538777914392, 'init_value': -2.671328544616699, 'ave_value': -1.9884633370528317, 'soft_opc': nan} step=8300




2022-04-20 15:39.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153818/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.43210892e-01 -3.25999953e-02
 -1.38000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.28049971e-01  6.00000000e-01  4.67532035e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.87189108e-01  2.46000047e-02
 -8.00013420e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.61927906e-02  2.08952959e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.53789108e-01  1.32000047e-02
  8.79998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -9.71586383e-02 -6.00000000e-01 -5.33093061e-02]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.39489108e-01 -4.75999953e-02
 -9.30001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  1.41298638e-01  5.43892365e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.9573146

2022-04-20 15:39.07 [info     ] Directory is created at d3rlpy_logs/FQE_20220420153907
2022-04-20 15:39.07 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 15:39.07 [debug    ] Building models...
2022-04-20 15:39.07 [debug    ] Models have been built.
2022-04-20 15:39.07 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420153907/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 15:39.09 [info     ] FQE_20220420153907: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00016957937284957532, 'time_algorithm_update': 0.005097404468891232, 'loss': 0.02796391171923037, 'time_step': 0.005344571762306746, 'init_value': -1.4109034538269043, 'ave_value': -1.4217168043392736, 'soft_opc': nan} step=344




2022-04-20 15:39.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153907/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:39.11 [info     ] FQE_20220420153907: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00017156088074972463, 'time_algorithm_update': 0.005124136458995731, 'loss': 0.024780807339824563, 'time_step': 0.00537078671677168, 'init_value': -2.1989996433258057, 'ave_value': -2.1953297647202876, 'soft_opc': nan} step=688




2022-04-20 15:39.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153907/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:39.13 [info     ] FQE_20220420153907: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.0001704720563666765, 'time_algorithm_update': 0.0050599595835042555, 'loss': 0.028506293965958406, 'time_step': 0.005308034808136696, 'init_value': -3.174333333969116, 'ave_value': -3.1639705621712917, 'soft_opc': nan} step=1032




2022-04-20 15:39.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153907/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:39.15 [info     ] FQE_20220420153907: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00017295257989750353, 'time_algorithm_update': 0.004619059867637102, 'loss': 0.032975514378766856, 'time_step': 0.004868583623753037, 'init_value': -4.037487030029297, 'ave_value': -4.007187798089004, 'soft_opc': nan} step=1376




2022-04-20 15:39.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153907/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:39.17 [info     ] FQE_20220420153907: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00017134533372036245, 'time_algorithm_update': 0.005074988963992097, 'loss': 0.04339723297405641, 'time_step': 0.005321501299392345, 'init_value': -4.861631870269775, 'ave_value': -4.8126379579439895, 'soft_opc': nan} step=1720




2022-04-20 15:39.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153907/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:39.18 [info     ] FQE_20220420153907: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00017002086306727208, 'time_algorithm_update': 0.005019865756811097, 'loss': 0.05356207275546567, 'time_step': 0.0052672624588012695, 'init_value': -5.519599437713623, 'ave_value': -5.4734684103369915, 'soft_opc': nan} step=2064




2022-04-20 15:39.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153907/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:39.20 [info     ] FQE_20220420153907: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00016996472380882086, 'time_algorithm_update': 0.005066405895144441, 'loss': 0.07141874199949724, 'time_step': 0.005312109409376632, 'init_value': -6.354122161865234, 'ave_value': -6.285069700218905, 'soft_opc': nan} step=2408




2022-04-20 15:39.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153907/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:39.22 [info     ] FQE_20220420153907: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00017101889432862748, 'time_algorithm_update': 0.004824377769647643, 'loss': 0.08916639890196885, 'time_step': 0.005071393279142158, 'init_value': -7.207233428955078, 'ave_value': -7.106674133158113, 'soft_opc': nan} step=2752




2022-04-20 15:39.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153907/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:39.24 [info     ] FQE_20220420153907: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00016948164895523425, 'time_algorithm_update': 0.004982995432476664, 'loss': 0.10871174845432993, 'time_step': 0.00522915707078091, 'init_value': -7.814655303955078, 'ave_value': -7.697144350861325, 'soft_opc': nan} step=3096




2022-04-20 15:39.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153907/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:39.26 [info     ] FQE_20220420153907: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.0001733989216560541, 'time_algorithm_update': 0.005126495694005212, 'loss': 0.13849499893677963, 'time_step': 0.005375305580538373, 'init_value': -8.612882614135742, 'ave_value': -8.517008193434627, 'soft_opc': nan} step=3440




2022-04-20 15:39.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153907/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:39.28 [info     ] FQE_20220420153907: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.0001697082852208337, 'time_algorithm_update': 0.005105825357658919, 'loss': 0.15607007132669867, 'time_step': 0.005351918381313945, 'init_value': -9.168359756469727, 'ave_value': -9.066611546444967, 'soft_opc': nan} step=3784




2022-04-20 15:39.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153907/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:39.30 [info     ] FQE_20220420153907: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00017245148503503135, 'time_algorithm_update': 0.005143446284671163, 'loss': 0.18244325517416868, 'time_step': 0.0053925541944282, 'init_value': -9.865251541137695, 'ave_value': -9.716548474809272, 'soft_opc': nan} step=4128




2022-04-20 15:39.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153907/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:39.32 [info     ] FQE_20220420153907: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00017085255578506823, 'time_algorithm_update': 0.004597654176312823, 'loss': 0.2111292169591802, 'time_step': 0.00484234094619751, 'init_value': -10.579753875732422, 'ave_value': -10.430638796883123, 'soft_opc': nan} step=4472




2022-04-20 15:39.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153907/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:39.34 [info     ] FQE_20220420153907: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00017451061758884165, 'time_algorithm_update': 0.005062808131062707, 'loss': 0.24786326869087683, 'time_step': 0.005311803762302842, 'init_value': -11.156806945800781, 'ave_value': -11.006172109197255, 'soft_opc': nan} step=4816




2022-04-20 15:39.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153907/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:39.36 [info     ] FQE_20220420153907: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00017043601634890535, 'time_algorithm_update': 0.005050754824350047, 'loss': 0.27797096811754757, 'time_step': 0.005298480738041012, 'init_value': -11.538046836853027, 'ave_value': -11.273771768209297, 'soft_opc': nan} step=5160




2022-04-20 15:39.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153907/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:39.38 [info     ] FQE_20220420153907: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00017413150432497957, 'time_algorithm_update': 0.005136651355166768, 'loss': 0.298505426021726, 'time_step': 0.005386511253756146, 'init_value': -11.809619903564453, 'ave_value': -11.415526137500162, 'soft_opc': nan} step=5504




2022-04-20 15:39.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153907/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:39.40 [info     ] FQE_20220420153907: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00017080611960832462, 'time_algorithm_update': 0.004826481952223667, 'loss': 0.3355012063023656, 'time_step': 0.005072619332823642, 'init_value': -12.405092239379883, 'ave_value': -11.781251372088184, 'soft_opc': nan} step=5848




2022-04-20 15:39.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153907/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:39.42 [info     ] FQE_20220420153907: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00017010056695272756, 'time_algorithm_update': 0.005129198002260785, 'loss': 0.35759126628359217, 'time_step': 0.00537572419920633, 'init_value': -13.197107315063477, 'ave_value': -12.398987427658426, 'soft_opc': nan} step=6192




2022-04-20 15:39.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153907/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:39.44 [info     ] FQE_20220420153907: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.0001679866812949957, 'time_algorithm_update': 0.0050697590029516884, 'loss': 0.3968675182157651, 'time_step': 0.005312968825185021, 'init_value': -14.199302673339844, 'ave_value': -13.068214558845654, 'soft_opc': nan} step=6536




2022-04-20 15:39.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153907/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:39.46 [info     ] FQE_20220420153907: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00017510735711386038, 'time_algorithm_update': 0.005097220803416053, 'loss': 0.42495880507712447, 'time_step': 0.00535099797470625, 'init_value': -14.538505554199219, 'ave_value': -13.298453431269406, 'soft_opc': nan} step=6880




2022-04-20 15:39.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153907/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:39.48 [info     ] FQE_20220420153907: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00016938323198362838, 'time_algorithm_update': 0.005156028409336888, 'loss': 0.4614564020533201, 'time_step': 0.005402102719905765, 'init_value': -15.232666015625, 'ave_value': -13.861409811397829, 'soft_opc': nan} step=7224




2022-04-20 15:39.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153907/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:39.50 [info     ] FQE_20220420153907: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00016829371452331543, 'time_algorithm_update': 0.004620978998583417, 'loss': 0.4947328154040977, 'time_step': 0.004864432090936705, 'init_value': -15.961685180664062, 'ave_value': -14.364445109423754, 'soft_opc': nan} step=7568




2022-04-20 15:39.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153907/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:39.52 [info     ] FQE_20220420153907: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00016873728397280672, 'time_algorithm_update': 0.005076505417047545, 'loss': 0.5231438898914602, 'time_step': 0.005319031865097756, 'init_value': -16.699298858642578, 'ave_value': -14.884924073253448, 'soft_opc': nan} step=7912




2022-04-20 15:39.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153907/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:39.54 [info     ] FQE_20220420153907: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.0001720113809718642, 'time_algorithm_update': 0.005055817060692366, 'loss': 0.5619166462768822, 'time_step': 0.005306154489517212, 'init_value': -17.62570571899414, 'ave_value': -15.499764538768611, 'soft_opc': nan} step=8256




2022-04-20 15:39.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153907/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:39.56 [info     ] FQE_20220420153907: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00016792430434116098, 'time_algorithm_update': 0.005090400923130124, 'loss': 0.601218348325685, 'time_step': 0.0053344708542491116, 'init_value': -18.51207160949707, 'ave_value': -16.17122921887198, 'soft_opc': nan} step=8600




2022-04-20 15:39.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153907/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:39.58 [info     ] FQE_20220420153907: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00017140424528787302, 'time_algorithm_update': 0.0047226305617842565, 'loss': 0.6517743020491717, 'time_step': 0.004972464816514836, 'init_value': -19.673847198486328, 'ave_value': -17.23730863748823, 'soft_opc': nan} step=8944




2022-04-20 15:39.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153907/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:39.59 [info     ] FQE_20220420153907: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.0001679062843322754, 'time_algorithm_update': 0.005089646855065989, 'loss': 0.687947272420536, 'time_step': 0.005333400049874949, 'init_value': -20.186901092529297, 'ave_value': -17.51510563492775, 'soft_opc': nan} step=9288




2022-04-20 15:40.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153907/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:40.01 [info     ] FQE_20220420153907: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00016991135685942894, 'time_algorithm_update': 0.005043200975240663, 'loss': 0.7103618449737253, 'time_step': 0.005290199157803557, 'init_value': -20.620756149291992, 'ave_value': -17.644242432950115, 'soft_opc': nan} step=9632




2022-04-20 15:40.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153907/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:40.03 [info     ] FQE_20220420153907: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00017140424528787302, 'time_algorithm_update': 0.005079322776129079, 'loss': 0.7483085061011966, 'time_step': 0.00532645957414494, 'init_value': -21.201005935668945, 'ave_value': -17.970064902533398, 'soft_opc': nan} step=9976




2022-04-20 15:40.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153907/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:40.05 [info     ] FQE_20220420153907: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00017278970674026843, 'time_algorithm_update': 0.005130121874254804, 'loss': 0.771161190537346, 'time_step': 0.005381600801334824, 'init_value': -21.585098266601562, 'ave_value': -18.056200381800497, 'soft_opc': nan} step=10320




2022-04-20 15:40.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153907/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:40.07 [info     ] FQE_20220420153907: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.0001619867114133613, 'time_algorithm_update': 0.0045629005099451815, 'loss': 0.8059411833474283, 'time_step': 0.004798483709956325, 'init_value': -22.320589065551758, 'ave_value': -18.502300195374133, 'soft_opc': nan} step=10664




2022-04-20 15:40.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153907/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:40.09 [info     ] FQE_20220420153907: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00017189425091410792, 'time_algorithm_update': 0.005086958408355713, 'loss': 0.8359206524720892, 'time_step': 0.005338206540706546, 'init_value': -23.017532348632812, 'ave_value': -19.03145108813722, 'soft_opc': nan} step=11008




2022-04-20 15:40.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153907/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:40.11 [info     ] FQE_20220420153907: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.0001681065836618113, 'time_algorithm_update': 0.005128073137859965, 'loss': 0.8777461007973829, 'time_step': 0.005375401225200919, 'init_value': -23.588701248168945, 'ave_value': -19.526486442744865, 'soft_opc': nan} step=11352




2022-04-20 15:40.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153907/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:40.13 [info     ] FQE_20220420153907: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00017198227172674134, 'time_algorithm_update': 0.005057863717855409, 'loss': 0.8841285699501981, 'time_step': 0.005308855411618255, 'init_value': -23.69650650024414, 'ave_value': -19.2184469063674, 'soft_opc': nan} step=11696




2022-04-20 15:40.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153907/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:40.15 [info     ] FQE_20220420153907: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.0001692522403805755, 'time_algorithm_update': 0.0046333705270013145, 'loss': 0.8975024586435147, 'time_step': 0.004876403614532116, 'init_value': -24.244766235351562, 'ave_value': -19.627955179261715, 'soft_opc': nan} step=12040




2022-04-20 15:40.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153907/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:40.17 [info     ] FQE_20220420153907: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.0001727037651594295, 'time_algorithm_update': 0.005148554957190225, 'loss': 0.9026418299262606, 'time_step': 0.005398798820584319, 'init_value': -24.192138671875, 'ave_value': -19.490080041617894, 'soft_opc': nan} step=12384




2022-04-20 15:40.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153907/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:40.19 [info     ] FQE_20220420153907: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00016994393149087595, 'time_algorithm_update': 0.005070966343547023, 'loss': 0.923522227162192, 'time_step': 0.005318111458490061, 'init_value': -24.706022262573242, 'ave_value': -19.8615260609283, 'soft_opc': nan} step=12728




2022-04-20 15:40.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153907/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:40.21 [info     ] FQE_20220420153907: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00017408506814823595, 'time_algorithm_update': 0.005136801059855972, 'loss': 0.9187972660360555, 'time_step': 0.005388329195421796, 'init_value': -25.145957946777344, 'ave_value': -20.119120910775486, 'soft_opc': nan} step=13072




2022-04-20 15:40.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153907/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:40.23 [info     ] FQE_20220420153907: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00016951422358668126, 'time_algorithm_update': 0.005071218623671421, 'loss': 0.9393188817166659, 'time_step': 0.0053183713624643724, 'init_value': -25.886343002319336, 'ave_value': -20.68626900334079, 'soft_opc': nan} step=13416




2022-04-20 15:40.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153907/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:40.25 [info     ] FQE_20220420153907: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.0001725907935652622, 'time_algorithm_update': 0.004573740238367125, 'loss': 0.9453873876698835, 'time_step': 0.0048249613407046294, 'init_value': -26.459718704223633, 'ave_value': -21.270515920691242, 'soft_opc': nan} step=13760




2022-04-20 15:40.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153907/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:40.27 [info     ] FQE_20220420153907: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00017182217087856558, 'time_algorithm_update': 0.005038756270741307, 'loss': 0.9582412432619306, 'time_step': 0.005287162786306337, 'init_value': -27.003713607788086, 'ave_value': -21.661279956775356, 'soft_opc': nan} step=14104




2022-04-20 15:40.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153907/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:40.29 [info     ] FQE_20220420153907: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00017514547636342603, 'time_algorithm_update': 0.0051130818766216895, 'loss': 0.9849036953866829, 'time_step': 0.005368225796278133, 'init_value': -27.198028564453125, 'ave_value': -21.846390688432347, 'soft_opc': nan} step=14448




2022-04-20 15:40.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153907/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:40.31 [info     ] FQE_20220420153907: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00017328040544376818, 'time_algorithm_update': 0.005140890215718469, 'loss': 0.9598346798366673, 'time_step': 0.005390963582105415, 'init_value': -27.069095611572266, 'ave_value': -21.732847922671215, 'soft_opc': nan} step=14792




2022-04-20 15:40.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153907/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:40.32 [info     ] FQE_20220420153907: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00016958283823589946, 'time_algorithm_update': 0.004254709149515906, 'loss': 0.9475008407824261, 'time_step': 0.004501124454099078, 'init_value': -27.247272491455078, 'ave_value': -22.102057991824154, 'soft_opc': nan} step=15136




2022-04-20 15:40.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153907/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:40.34 [info     ] FQE_20220420153907: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00016386356464652128, 'time_algorithm_update': 0.0035410495691521222, 'loss': 0.9605369293888988, 'time_step': 0.003779769636863886, 'init_value': -27.41591453552246, 'ave_value': -22.133862735829023, 'soft_opc': nan} step=15480




2022-04-20 15:40.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153907/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:40.35 [info     ] FQE_20220420153907: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00017274257748626, 'time_algorithm_update': 0.0034807109555532764, 'loss': 0.9533321024351862, 'time_step': 0.003728321818418281, 'init_value': -27.703582763671875, 'ave_value': -22.51156800815265, 'soft_opc': nan} step=15824




2022-04-20 15:40.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153907/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:40.37 [info     ] FQE_20220420153907: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00016678557839504507, 'time_algorithm_update': 0.0035004692022190535, 'loss': 0.9533475627039754, 'time_step': 0.003739081149877504, 'init_value': -28.172584533691406, 'ave_value': -22.95051316104762, 'soft_opc': nan} step=16168




2022-04-20 15:40.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153907/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:40.38 [info     ] FQE_20220420153907: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00016690755999365517, 'time_algorithm_update': 0.0035913288593292236, 'loss': 0.9427175055106365, 'time_step': 0.003831632608591124, 'init_value': -28.47315216064453, 'ave_value': -23.454235087383772, 'soft_opc': nan} step=16512




2022-04-20 15:40.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153907/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:40.40 [info     ] FQE_20220420153907: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00016458505807920943, 'time_algorithm_update': 0.003500548906104509, 'loss': 0.9641237816134425, 'time_step': 0.0037352186302806057, 'init_value': -29.027313232421875, 'ave_value': -23.80044549177904, 'soft_opc': nan} step=16856




2022-04-20 15:40.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153907/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:40.41 [info     ] FQE_20220420153907: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00016909283260966456, 'time_algorithm_update': 0.0034662693045860115, 'loss': 0.9535809359675663, 'time_step': 0.003709656554599141, 'init_value': -28.850215911865234, 'ave_value': -23.64811595530912, 'soft_opc': nan} step=17200




2022-04-20 15:40.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153907/model_17200.pt
search iteration:  3
using hyper params:  [0.0025487161562910317, 0.008025943666813981, 9.814315051166158e-05, 7]
2022-04-20 15:40.41 [debug    ] RoundIterator is selected.
2022-04-20 15:40.41 [info     ] Directory is created at d3rlpy_logs/TD3PlusBC_20220420154041
2022-04-20 15:40.41 [debug    ] Fitting scaler...              scaler=standard
2022-04-20 15:40.41 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 15:40.41 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 15:40.41 [info     ] Parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154041/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.00254871615629

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:40.44 [info     ] TD3PlusBC_20220420154041: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.0004016795353582728, 'time_algorithm_update': 0.006813114846658985, 'critic_loss': 9.6319730090119, 'actor_loss': 2.6555018522586042, 'time_step': 0.007296182955914771, 'td_error': 1.0447589223750342, 'init_value': -11.450523376464844, 'ave_value': -7.280582770120965} step=342
2022-04-20 15:40.44 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154041/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:40.47 [info     ] TD3PlusBC_20220420154041: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.00039584734286481174, 'time_algorithm_update': 0.0067623986138237845, 'critic_loss': 6.609740659507395, 'actor_loss': 2.582079151220489, 'time_step': 0.007234712790327462, 'td_error': 1.3120166003975628, 'init_value': -16.167272567749023, 'ave_value': -10.300028728170002} step=684
2022-04-20 15:40.47 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154041/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:40.50 [info     ] TD3PlusBC_20220420154041: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.00040296574085079437, 'time_algorithm_update': 0.006813968134205243, 'critic_loss': 10.087838030698007, 'actor_loss': 2.5744218909949588, 'time_step': 0.00729687241782919, 'td_error': 1.658563636254907, 'init_value': -20.754011154174805, 'ave_value': -13.295330784889693} step=1026
2022-04-20 15:40.50 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154041/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:40.53 [info     ] TD3PlusBC_20220420154041: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.0003916352813006842, 'time_algorithm_update': 0.006703339124980725, 'critic_loss': 14.676553339986077, 'actor_loss': 2.5703846167402657, 'time_step': 0.007163841821994, 'td_error': 2.078887216571065, 'init_value': -25.84246253967285, 'ave_value': -16.624086560037675} step=1368
2022-04-20 15:40.53 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154041/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:40.56 [info     ] TD3PlusBC_20220420154041: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.0003959309985066018, 'time_algorithm_update': 0.006765665366635685, 'critic_loss': 19.559020987728186, 'actor_loss': 2.5676560290375647, 'time_step': 0.007235819833320484, 'td_error': 2.6790370000930244, 'init_value': -31.288105010986328, 'ave_value': -20.02412740965164} step=1710
2022-04-20 15:40.56 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154041/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:40.59 [info     ] TD3PlusBC_20220420154041: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.0004008645899811683, 'time_algorithm_update': 0.006786721491674233, 'critic_loss': 24.68310000882511, 'actor_loss': 2.5651963119618375, 'time_step': 0.007260201270120186, 'td_error': 3.2234855060154937, 'init_value': -35.76451110839844, 'ave_value': -22.926853311656313} step=2052
2022-04-20 15:40.59 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154041/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:41.01 [info     ] TD3PlusBC_20220420154041: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.0004027489333124886, 'time_algorithm_update': 0.006736578997115643, 'critic_loss': 29.42065053237112, 'actor_loss': 2.5653821139307746, 'time_step': 0.007210309742486965, 'td_error': 3.514468984730819, 'init_value': -39.62303161621094, 'ave_value': -25.764319135603152} step=2394
2022-04-20 15:41.01 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154041/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:41.04 [info     ] TD3PlusBC_20220420154041: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.0004022435138100072, 'time_algorithm_update': 0.006748121384291621, 'critic_loss': 35.03341691535816, 'actor_loss': 2.563015375918115, 'time_step': 0.007223005183258949, 'td_error': 4.1235380641528465, 'init_value': -43.856285095214844, 'ave_value': -28.527845438776353} step=2736
2022-04-20 15:41.04 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154041/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:41.07 [info     ] TD3PlusBC_20220420154041: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.00039845600462796397, 'time_algorithm_update': 0.006798502994559662, 'critic_loss': 40.59955318071689, 'actor_loss': 2.5629057438052887, 'time_step': 0.0072761843776145176, 'td_error': 4.728179046237754, 'init_value': -48.73810577392578, 'ave_value': -31.432060626777947} step=3078
2022-04-20 15:41.07 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154041/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:41.10 [info     ] TD3PlusBC_20220420154041: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.0004002734234458522, 'time_algorithm_update': 0.0068165468193634214, 'critic_loss': 46.39103011081093, 'actor_loss': 2.5633523575743737, 'time_step': 0.007291270278350651, 'td_error': 5.084650952893851, 'init_value': -50.63861846923828, 'ave_value': -33.51101679253845} step=3420
2022-04-20 15:41.10 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154041/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:41.13 [info     ] TD3PlusBC_20220420154041: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.0003981680897941366, 'time_algorithm_update': 0.006488610429373401, 'critic_loss': 52.28675355409321, 'actor_loss': 2.5620158011453196, 'time_step': 0.006961212520710906, 'td_error': 6.021441800005806, 'init_value': -55.525535583496094, 'ave_value': -36.34614491356881} step=3762
2022-04-20 15:41.13 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154041/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:41.16 [info     ] TD3PlusBC_20220420154041: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.0004055500030517578, 'time_algorithm_update': 0.006741026688737479, 'critic_loss': 58.11460766597101, 'actor_loss': 2.5615143608628657, 'time_step': 0.007221775445324635, 'td_error': 6.303752357504336, 'init_value': -57.1558723449707, 'ave_value': -38.05637436415614} step=4104
2022-04-20 15:41.16 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154041/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:41.19 [info     ] TD3PlusBC_20220420154041: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.00039850619801303796, 'time_algorithm_update': 0.006790804584123935, 'critic_loss': 63.922329138594066, 'actor_loss': 2.561371658280579, 'time_step': 0.007265503643548977, 'td_error': 6.962273566674046, 'init_value': -60.972679138183594, 'ave_value': -40.464730948997946} step=4446
2022-04-20 15:41.19 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154041/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:41.22 [info     ] TD3PlusBC_20220420154041: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.0004012947194060387, 'time_algorithm_update': 0.006746115740279705, 'critic_loss': 70.17355887652838, 'actor_loss': 2.5610689614948474, 'time_step': 0.007219108921742579, 'td_error': 7.649128055552809, 'init_value': -64.12857055664062, 'ave_value': -42.49884585582106} step=4788
2022-04-20 15:41.22 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154041/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:41.24 [info     ] TD3PlusBC_20220420154041: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.0003971691020050941, 'time_algorithm_update': 0.0067364367825246, 'critic_loss': 75.79073879175019, 'actor_loss': 2.5609774185202974, 'time_step': 0.007207527495267098, 'td_error': 8.269864630647756, 'init_value': -67.48600769042969, 'ave_value': -44.532876114831346} step=5130
2022-04-20 15:41.24 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154041/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:41.27 [info     ] TD3PlusBC_20220420154041: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.00039331048552752933, 'time_algorithm_update': 0.006776461127208687, 'critic_loss': 81.18953035588851, 'actor_loss': 2.561545621581942, 'time_step': 0.007247418688054671, 'td_error': 8.344140218492146, 'init_value': -67.48835754394531, 'ave_value': -46.017983450611446} step=5472
2022-04-20 15:41.27 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154041/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:41.30 [info     ] TD3PlusBC_20220420154041: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.0004028207377383583, 'time_algorithm_update': 0.006725860618011296, 'critic_loss': 86.23836267482467, 'actor_loss': 2.5621921444496913, 'time_step': 0.007203940759625351, 'td_error': 9.580002453945726, 'init_value': -72.42317962646484, 'ave_value': -48.19995610679894} step=5814
2022-04-20 15:41.30 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154041/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:41.33 [info     ] TD3PlusBC_20220420154041: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.000400465134291621, 'time_algorithm_update': 0.006706596815098099, 'critic_loss': 91.62109701814707, 'actor_loss': 2.562456210454305, 'time_step': 0.007181510590670402, 'td_error': 10.22810161314304, 'init_value': -73.62391662597656, 'ave_value': -49.498872050726284} step=6156
2022-04-20 15:41.33 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154041/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:41.36 [info     ] TD3PlusBC_20220420154041: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.00040216125242891366, 'time_algorithm_update': 0.006805792886611314, 'critic_loss': 96.67251644915308, 'actor_loss': 2.5646797146713523, 'time_step': 0.007287802751998455, 'td_error': 10.643422576665143, 'init_value': -73.64519500732422, 'ave_value': -50.33949485277891} step=6498
2022-04-20 15:41.36 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154041/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:41.39 [info     ] TD3PlusBC_20220420154041: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.00039611364665784334, 'time_algorithm_update': 0.006833979260851765, 'critic_loss': 101.84728662591232, 'actor_loss': 2.565935752545184, 'time_step': 0.007291882358796415, 'td_error': 11.673271182832547, 'init_value': -75.88871765136719, 'ave_value': -51.95850263275424} step=6840
2022-04-20 15:41.39 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154041/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:41.42 [info     ] TD3PlusBC_20220420154041: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.0003917419422439664, 'time_algorithm_update': 0.006790672129357768, 'critic_loss': 106.55018615722656, 'actor_loss': 2.565771861383092, 'time_step': 0.007244478192245751, 'td_error': 11.448751220965265, 'init_value': -77.42548370361328, 'ave_value': -53.474325302507026} step=7182
2022-04-20 15:41.42 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154041/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:41.45 [info     ] TD3PlusBC_20220420154041: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.0003934666427255374, 'time_algorithm_update': 0.006831705221655773, 'critic_loss': 111.97848084656118, 'actor_loss': 2.566120581320155, 'time_step': 0.007285758765817386, 'td_error': 13.244566835137116, 'init_value': -79.37977600097656, 'ave_value': -54.975937359156845} step=7524
2022-04-20 15:41.45 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154041/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:41.48 [info     ] TD3PlusBC_20220420154041: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.00039317872789171006, 'time_algorithm_update': 0.006756535747595001, 'critic_loss': 115.58734329402098, 'actor_loss': 2.5664201493848835, 'time_step': 0.0072070290470681, 'td_error': 11.88196780523161, 'init_value': -78.59910583496094, 'ave_value': -55.47872527145504} step=7866
2022-04-20 15:41.48 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154041/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:41.51 [info     ] TD3PlusBC_20220420154041: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.0003954681039553637, 'time_algorithm_update': 0.006723451335527744, 'critic_loss': 120.60431291903669, 'actor_loss': 2.5656821044564944, 'time_step': 0.00717766661393015, 'td_error': 12.34156044095955, 'init_value': -81.06694793701172, 'ave_value': -57.17162230815202} step=8208
2022-04-20 15:41.51 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154041/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:41.53 [info     ] TD3PlusBC_20220420154041: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.0004005948005363955, 'time_algorithm_update': 0.00680754756369786, 'critic_loss': 124.24681524087114, 'actor_loss': 2.5650352623030455, 'time_step': 0.007274185704906085, 'td_error': 11.898611261766039, 'init_value': -81.84587097167969, 'ave_value': -57.454595082468344} step=8550
2022-04-20 15:41.53 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154041/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:41.56 [info     ] TD3PlusBC_20220420154041: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.00039205355950963427, 'time_algorithm_update': 0.006775737505907204, 'critic_loss': 127.67845910055595, 'actor_loss': 2.5655244944388405, 'time_step': 0.007233532548647875, 'td_error': 12.571421748527031, 'init_value': -84.06593322753906, 'ave_value': -59.066765083136325} step=8892
2022-04-20 15:41.56 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154041/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:41.59 [info     ] TD3PlusBC_20220420154041: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.00040218844051249545, 'time_algorithm_update': 0.006826227171379223, 'critic_loss': 131.37860968517282, 'actor_loss': 2.5654856670669646, 'time_step': 0.007292714732432226, 'td_error': 12.96387762608245, 'init_value': -86.04463195800781, 'ave_value': -59.979932521997476} step=9234
2022-04-20 15:41.59 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154041/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:42.02 [info     ] TD3PlusBC_20220420154041: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.00039579436095834474, 'time_algorithm_update': 0.006737050954361408, 'critic_loss': 134.72092881676747, 'actor_loss': 2.5658812202208225, 'time_step': 0.007197237154196578, 'td_error': 13.346288866960757, 'init_value': -84.58419036865234, 'ave_value': -60.273800719078665} step=9576
2022-04-20 15:42.02 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154041/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:42.05 [info     ] TD3PlusBC_20220420154041: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.00039633533410858687, 'time_algorithm_update': 0.006776542691459433, 'critic_loss': 138.63973983407718, 'actor_loss': 2.5653271479913364, 'time_step': 0.007235319990860789, 'td_error': 14.642570458464224, 'init_value': -85.53627014160156, 'ave_value': -61.449335045384764} step=9918
2022-04-20 15:42.05 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154041/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:42.08 [info     ] TD3PlusBC_20220420154041: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.0003981374160588136, 'time_algorithm_update': 0.006762272433230751, 'critic_loss': 141.0430340014006, 'actor_loss': 2.566673577180383, 'time_step': 0.007223633297702722, 'td_error': 14.038395874252682, 'init_value': -87.74928283691406, 'ave_value': -62.43568074152427} step=10260
2022-04-20 15:42.08 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154041/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:42.11 [info     ] TD3PlusBC_20220420154041: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.0003984859812329387, 'time_algorithm_update': 0.006788620474742867, 'critic_loss': 144.21463262546828, 'actor_loss': 2.5662040138802333, 'time_step': 0.007245659828186035, 'td_error': 14.799403686237245, 'init_value': -90.16583251953125, 'ave_value': -63.24387951466448} step=10602
2022-04-20 15:42.11 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154041/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:42.14 [info     ] TD3PlusBC_20220420154041: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.0004001570026776944, 'time_algorithm_update': 0.006792272740637349, 'critic_loss': 147.7242022062603, 'actor_loss': 2.5656115827504653, 'time_step': 0.007258980594880399, 'td_error': 15.208613565925067, 'init_value': -86.72418212890625, 'ave_value': -63.70992200906716} step=10944
2022-04-20 15:42.14 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154041/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:42.17 [info     ] TD3PlusBC_20220420154041: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.00039382845337627927, 'time_algorithm_update': 0.006856769846196761, 'critic_loss': 149.74442840040777, 'actor_loss': 2.566495447828059, 'time_step': 0.007311061808937474, 'td_error': 14.414509618667994, 'init_value': -86.29212951660156, 'ave_value': -63.988345649994} step=11286
2022-04-20 15:42.17 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154041/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:42.19 [info     ] TD3PlusBC_20220420154041: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.00038952576486687914, 'time_algorithm_update': 0.006721622065493935, 'critic_loss': 151.72538839864453, 'actor_loss': 2.5660891853577907, 'time_step': 0.007171068972314311, 'td_error': 14.545065586093832, 'init_value': -89.1018295288086, 'ave_value': -65.02476098359236} step=11628
2022-04-20 15:42.20 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154041/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:42.22 [info     ] TD3PlusBC_20220420154041: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.00039114450153551603, 'time_algorithm_update': 0.006794198214659217, 'critic_loss': 154.22945526747677, 'actor_loss': 2.566098355410392, 'time_step': 0.007247717059843722, 'td_error': 14.609137963192582, 'init_value': -88.6984634399414, 'ave_value': -65.36518211005071} step=11970
2022-04-20 15:42.22 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154041/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:42.25 [info     ] TD3PlusBC_20220420154041: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.0003951488182558651, 'time_algorithm_update': 0.006801847128840218, 'critic_loss': 156.17178721734655, 'actor_loss': 2.5665354073396203, 'time_step': 0.007253599445722256, 'td_error': 15.628786112826269, 'init_value': -90.58860778808594, 'ave_value': -66.38773469191716} step=12312
2022-04-20 15:42.25 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154041/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:42.28 [info     ] TD3PlusBC_20220420154041: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.0004030751903154697, 'time_algorithm_update': 0.0068241030152081045, 'critic_loss': 157.9710133647361, 'actor_loss': 2.5663346087026317, 'time_step': 0.007289619473685996, 'td_error': 15.163265540805963, 'init_value': -88.63920593261719, 'ave_value': -66.4710948013709} step=12654
2022-04-20 15:42.28 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154041/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:42.31 [info     ] TD3PlusBC_20220420154041: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.00039910712437322965, 'time_algorithm_update': 0.006781219738965843, 'critic_loss': 159.78556944194594, 'actor_loss': 2.5660074105736803, 'time_step': 0.007243014915644774, 'td_error': 14.682421895136862, 'init_value': -86.23047637939453, 'ave_value': -66.30951684754912} step=12996
2022-04-20 15:42.31 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154041/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:42.34 [info     ] TD3PlusBC_20220420154041: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.00039996389757122907, 'time_algorithm_update': 0.006808814946670978, 'critic_loss': 161.00327526337918, 'actor_loss': 2.5665086793620686, 'time_step': 0.007267626405459399, 'td_error': 16.30826608753022, 'init_value': -89.54013061523438, 'ave_value': -67.47610637965435} step=13338
2022-04-20 15:42.34 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154041/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:42.37 [info     ] TD3PlusBC_20220420154041: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.00039858357948169374, 'time_algorithm_update': 0.006775679644088299, 'critic_loss': 162.699365783156, 'actor_loss': 2.5663267813230815, 'time_step': 0.007236669635215001, 'td_error': 16.62986190872405, 'init_value': -87.31075286865234, 'ave_value': -67.24236568748448} step=13680
2022-04-20 15:42.37 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154041/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:42.40 [info     ] TD3PlusBC_20220420154041: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.00039524781076531664, 'time_algorithm_update': 0.008272318812141641, 'critic_loss': 163.75984174047994, 'actor_loss': 2.567078821840342, 'time_step': 0.008730611605950964, 'td_error': 15.654242054042626, 'init_value': -87.99081420898438, 'ave_value': -67.75355161382859} step=14022
2022-04-20 15:42.40 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154041/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:42.44 [info     ] TD3PlusBC_20220420154041: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.0003991803230597959, 'time_algorithm_update': 0.008989838828817445, 'critic_loss': 165.32503826297514, 'actor_loss': 2.5659813072249205, 'time_step': 0.009454439258017736, 'td_error': 17.048298100479737, 'init_value': -90.90361022949219, 'ave_value': -68.97869948033411} step=14364
2022-04-20 15:42.44 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154041/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:42.48 [info     ] TD3PlusBC_20220420154041: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.00039947242067571274, 'time_algorithm_update': 0.008486475163733053, 'critic_loss': 166.40116474084687, 'actor_loss': 2.5664883189731174, 'time_step': 0.0089486823444478, 'td_error': 15.878213929953615, 'init_value': -88.92291259765625, 'ave_value': -68.81879886681851} step=14706
2022-04-20 15:42.48 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154041/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:42.52 [info     ] TD3PlusBC_20220420154041: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.00040199045549359235, 'time_algorithm_update': 0.008999524060745685, 'critic_loss': 167.03432473522878, 'actor_loss': 2.5662301060749075, 'time_step': 0.009466418745922066, 'td_error': 16.321365587674308, 'init_value': -89.0724868774414, 'ave_value': -68.95310107444472} step=15048
2022-04-20 15:42.52 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154041/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:42.55 [info     ] TD3PlusBC_20220420154041: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.00040193329080503586, 'time_algorithm_update': 0.008972134506493284, 'critic_loss': 167.99473627547772, 'actor_loss': 2.566843373036524, 'time_step': 0.00943784337294729, 'td_error': 16.231531340041748, 'init_value': -87.41654968261719, 'ave_value': -68.92779258447624} step=15390
2022-04-20 15:42.55 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154041/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:42.59 [info     ] TD3PlusBC_20220420154041: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.00039797428755732305, 'time_algorithm_update': 0.00846678750556812, 'critic_loss': 168.73791260747186, 'actor_loss': 2.567425117158053, 'time_step': 0.008927822810167458, 'td_error': 16.996234700437448, 'init_value': -85.93700408935547, 'ave_value': -69.02244195703216} step=15732
2022-04-20 15:42.59 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154041/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:43.03 [info     ] TD3PlusBC_20220420154041: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.0004064388442457768, 'time_algorithm_update': 0.009053960181119149, 'critic_loss': 168.8977023119118, 'actor_loss': 2.5662857942413866, 'time_step': 0.009521055639835825, 'td_error': 15.651994467541316, 'init_value': -86.32752990722656, 'ave_value': -69.26211562505327} step=16074
2022-04-20 15:43.03 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154041/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:43.06 [info     ] TD3PlusBC_20220420154041: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.0003978536840070758, 'time_algorithm_update': 0.00847817722119783, 'critic_loss': 169.17244990387854, 'actor_loss': 2.566672525907818, 'time_step': 0.008936842282613119, 'td_error': 16.76224736173182, 'init_value': -89.2844009399414, 'ave_value': -70.17514329091352} step=16416
2022-04-20 15:43.06 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154041/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:43.10 [info     ] TD3PlusBC_20220420154041: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.0004025586167274163, 'time_algorithm_update': 0.008772765683848955, 'critic_loss': 169.5544605812831, 'actor_loss': 2.566139134747243, 'time_step': 0.009239895999083045, 'td_error': 17.452618275510567, 'init_value': -91.17674255371094, 'ave_value': -70.76778079107723} step=16758
2022-04-20 15:43.10 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154041/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:43.14 [info     ] TD3PlusBC_20220420154041: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.00040498811599106816, 'time_algorithm_update': 0.008874920376560144, 'critic_loss': 170.3039503487927, 'actor_loss': 2.567516321327254, 'time_step': 0.00933785326996742, 'td_error': 16.53669448199511, 'init_value': -87.69554138183594, 'ave_value': -70.35592383718537} step=17100
2022-04-20 15:43.14 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154041/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 102 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.28189108e-01  9.

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 15:43.15 [info     ] FQE_20220420154314: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00014088240014501364, 'time_algorithm_update': 0.003958045718181564, 'loss': 0.00544986547761967, 'time_step': 0.0041650074074067265, 'init_value': -0.48423677682876587, 'ave_value': -0.4588857270508736, 'soft_opc': nan} step=166




2022-04-20 15:43.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154314/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:43.16 [info     ] FQE_20220420154314: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00014558326767151616, 'time_algorithm_update': 0.004956038601427193, 'loss': 0.004173675068115524, 'time_step': 0.005168305822165616, 'init_value': -0.6119048595428467, 'ave_value': -0.536726001193663, 'soft_opc': nan} step=332




2022-04-20 15:43.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154314/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:43.17 [info     ] FQE_20220420154314: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00014818576445062477, 'time_algorithm_update': 0.0050179857805550815, 'loss': 0.0037566528035641975, 'time_step': 0.00523036072053105, 'init_value': -0.6820859909057617, 'ave_value': -0.570461146211302, 'soft_opc': nan} step=498




2022-04-20 15:43.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154314/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:43.18 [info     ] FQE_20220420154314: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00013634957462908273, 'time_algorithm_update': 0.004859487694430064, 'loss': 0.0036881335503253414, 'time_step': 0.005057847643473062, 'init_value': -0.7733572721481323, 'ave_value': -0.6166297445291872, 'soft_opc': nan} step=664




2022-04-20 15:43.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154314/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:43.18 [info     ] FQE_20220420154314: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.000146005527082696, 'time_algorithm_update': 0.004925103072660515, 'loss': 0.0035009557312545196, 'time_step': 0.0051346485873302785, 'init_value': -0.8632011413574219, 'ave_value': -0.6663671515479281, 'soft_opc': nan} step=830




2022-04-20 15:43.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154314/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:43.19 [info     ] FQE_20220420154314: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00014864680278732116, 'time_algorithm_update': 0.004983759788145502, 'loss': 0.0034053781713605917, 'time_step': 0.005196670451796198, 'init_value': -0.8937277793884277, 'ave_value': -0.6812117796626177, 'soft_opc': nan} step=996




2022-04-20 15:43.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154314/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:43.20 [info     ] FQE_20220420154314: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00016183738248894014, 'time_algorithm_update': 0.005164542830134013, 'loss': 0.0033707519893601805, 'time_step': 0.005397812429680882, 'init_value': -0.9903817772865295, 'ave_value': -0.7340332990831083, 'soft_opc': nan} step=1162




2022-04-20 15:43.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154314/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:43.21 [info     ] FQE_20220420154314: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00016565207975456514, 'time_algorithm_update': 0.005174931273402938, 'loss': 0.00338445687021342, 'time_step': 0.005416409078850804, 'init_value': -1.0504831075668335, 'ave_value': -0.7566479666510949, 'soft_opc': nan} step=1328




2022-04-20 15:43.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154314/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:43.22 [info     ] FQE_20220420154314: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00016292175614690207, 'time_algorithm_update': 0.005033747259392796, 'loss': 0.0033918611911484546, 'time_step': 0.005270485418388642, 'init_value': -1.070554256439209, 'ave_value': -0.7484192760141046, 'soft_opc': nan} step=1494




2022-04-20 15:43.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154314/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:43.23 [info     ] FQE_20220420154314: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.0001583831856049687, 'time_algorithm_update': 0.004548352884959026, 'loss': 0.003380175416935684, 'time_step': 0.004773178732538798, 'init_value': -1.1428706645965576, 'ave_value': -0.778762020580135, 'soft_opc': nan} step=1660




2022-04-20 15:43.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154314/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:43.24 [info     ] FQE_20220420154314: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00016403485493487622, 'time_algorithm_update': 0.005004729133054435, 'loss': 0.0034535377404192485, 'time_step': 0.005246283060096833, 'init_value': -1.26795494556427, 'ave_value': -0.8689133856344867, 'soft_opc': nan} step=1826




2022-04-20 15:43.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154314/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:43.25 [info     ] FQE_20220420154314: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00016113648931664158, 'time_algorithm_update': 0.005039788154234369, 'loss': 0.003498712069977703, 'time_step': 0.005273817533470062, 'init_value': -1.3232133388519287, 'ave_value': -0.8886492398206716, 'soft_opc': nan} step=1992




2022-04-20 15:43.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154314/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:43.26 [info     ] FQE_20220420154314: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00016114941562514706, 'time_algorithm_update': 0.00509580215775823, 'loss': 0.0037306441311129785, 'time_step': 0.005330391677029161, 'init_value': -1.4413418769836426, 'ave_value': -0.9720376512220313, 'soft_opc': nan} step=2158




2022-04-20 15:43.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154314/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:43.27 [info     ] FQE_20220420154314: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00016452030963208302, 'time_algorithm_update': 0.005055686077439642, 'loss': 0.003688807254208319, 'time_step': 0.005293996937303658, 'init_value': -1.5219321250915527, 'ave_value': -1.0292341739140651, 'soft_opc': nan} step=2324




2022-04-20 15:43.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154314/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:43.28 [info     ] FQE_20220420154314: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00016352929264666085, 'time_algorithm_update': 0.004976045654480715, 'loss': 0.0037828637644682214, 'time_step': 0.005213566573269396, 'init_value': -1.549439549446106, 'ave_value': -1.027375923902602, 'soft_opc': nan} step=2490




2022-04-20 15:43.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154314/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:43.29 [info     ] FQE_20220420154314: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00016022446643875306, 'time_algorithm_update': 0.004973997552710843, 'loss': 0.00402306068509099, 'time_step': 0.005205217614231339, 'init_value': -1.6293613910675049, 'ave_value': -1.0713345912796957, 'soft_opc': nan} step=2656




2022-04-20 15:43.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154314/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:43.30 [info     ] FQE_20220420154314: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00016268477382430112, 'time_algorithm_update': 0.005068635366049157, 'loss': 0.004018635244293879, 'time_step': 0.005304121109376471, 'init_value': -1.7194278240203857, 'ave_value': -1.1225579692638135, 'soft_opc': nan} step=2822




2022-04-20 15:43.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154314/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:43.31 [info     ] FQE_20220420154314: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.0001567185643207596, 'time_algorithm_update': 0.0050568781703351495, 'loss': 0.004312425066369125, 'time_step': 0.00528646664447095, 'init_value': -1.8136136531829834, 'ave_value': -1.1996463894575566, 'soft_opc': nan} step=2988




2022-04-20 15:43.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154314/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:43.32 [info     ] FQE_20220420154314: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00016058496682040663, 'time_algorithm_update': 0.004029084400958325, 'loss': 0.004534816036298198, 'time_step': 0.0042613902723932845, 'init_value': -1.9034340381622314, 'ave_value': -1.252187962326649, 'soft_opc': nan} step=3154




2022-04-20 15:43.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154314/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:43.33 [info     ] FQE_20220420154314: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00016481905098421028, 'time_algorithm_update': 0.005057100790092744, 'loss': 0.004608499430434172, 'time_step': 0.005296793328710349, 'init_value': -2.00752854347229, 'ave_value': -1.3396077766007668, 'soft_opc': nan} step=3320




2022-04-20 15:43.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154314/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:43.34 [info     ] FQE_20220420154314: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00016199824321700866, 'time_algorithm_update': 0.005085091993033168, 'loss': 0.004869865717125091, 'time_step': 0.005317389246929123, 'init_value': -2.0452985763549805, 'ave_value': -1.354206994175911, 'soft_opc': nan} step=3486




2022-04-20 15:43.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154314/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:43.35 [info     ] FQE_20220420154314: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.0001672506332397461, 'time_algorithm_update': 0.0050314808466348305, 'loss': 0.005182709663065644, 'time_step': 0.005273771573262042, 'init_value': -2.128147602081299, 'ave_value': -1.4126300354731514, 'soft_opc': nan} step=3652




2022-04-20 15:43.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154314/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:43.36 [info     ] FQE_20220420154314: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00016119250332016544, 'time_algorithm_update': 0.005103718803589602, 'loss': 0.005243908851309286, 'time_step': 0.005340968269899667, 'init_value': -2.2371530532836914, 'ave_value': -1.4919713673022417, 'soft_opc': nan} step=3818




2022-04-20 15:43.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154314/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:43.37 [info     ] FQE_20220420154314: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00016182732869343585, 'time_algorithm_update': 0.005074432097285627, 'loss': 0.0054614397904734656, 'time_step': 0.005309132208307105, 'init_value': -2.21359920501709, 'ave_value': -1.4701389703933183, 'soft_opc': nan} step=3984




2022-04-20 15:43.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154314/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:43.38 [info     ] FQE_20220420154314: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.0001642014606889472, 'time_algorithm_update': 0.005141640283975257, 'loss': 0.005930139955640939, 'time_step': 0.005378387060510107, 'init_value': -2.329385757446289, 'ave_value': -1.5421358013609507, 'soft_opc': nan} step=4150




2022-04-20 15:43.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154314/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:43.38 [info     ] FQE_20220420154314: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.0001601957413087408, 'time_algorithm_update': 0.005133265472320189, 'loss': 0.0061599358948542324, 'time_step': 0.005366879773427205, 'init_value': -2.455026626586914, 'ave_value': -1.6475604914464392, 'soft_opc': nan} step=4316




2022-04-20 15:43.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154314/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:43.39 [info     ] FQE_20220420154314: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.0001642416758709643, 'time_algorithm_update': 0.00506408674171172, 'loss': 0.0064391358205010685, 'time_step': 0.0053024004740887375, 'init_value': -2.5129361152648926, 'ave_value': -1.6863134553236467, 'soft_opc': nan} step=4482




2022-04-20 15:43.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154314/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:43.40 [info     ] FQE_20220420154314: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00015730599322950984, 'time_algorithm_update': 0.004134731120373829, 'loss': 0.006487895159544535, 'time_step': 0.00436117419277329, 'init_value': -2.578390598297119, 'ave_value': -1.7124336893896799, 'soft_opc': nan} step=4648




2022-04-20 15:43.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154314/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:43.41 [info     ] FQE_20220420154314: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00016328800155455806, 'time_algorithm_update': 0.005075441785605557, 'loss': 0.00692974072137668, 'time_step': 0.005316121032439083, 'init_value': -2.6567986011505127, 'ave_value': -1.7718954342039857, 'soft_opc': nan} step=4814




2022-04-20 15:43.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154314/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:43.42 [info     ] FQE_20220420154314: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.0001615300235978092, 'time_algorithm_update': 0.005023783948048052, 'loss': 0.00708385997105401, 'time_step': 0.0052587713103696525, 'init_value': -2.7457613945007324, 'ave_value': -1.8305293157159745, 'soft_opc': nan} step=4980




2022-04-20 15:43.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154314/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:43.43 [info     ] FQE_20220420154314: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00016259141715176134, 'time_algorithm_update': 0.0050736148673367785, 'loss': 0.0076121143391496135, 'time_step': 0.005309083375586085, 'init_value': -2.829730987548828, 'ave_value': -1.8852928298013705, 'soft_opc': nan} step=5146




2022-04-20 15:43.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154314/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:43.44 [info     ] FQE_20220420154314: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00016394580703183828, 'time_algorithm_update': 0.005039999283939959, 'loss': 0.007672473527028237, 'time_step': 0.005278071725224874, 'init_value': -2.870650291442871, 'ave_value': -1.9165688391070108, 'soft_opc': nan} step=5312




2022-04-20 15:43.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154314/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:43.45 [info     ] FQE_20220420154314: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00016076593513948372, 'time_algorithm_update': 0.005031246736825231, 'loss': 0.008256604074828708, 'time_step': 0.005268194589270167, 'init_value': -2.995220422744751, 'ave_value': -2.009038877124722, 'soft_opc': nan} step=5478




2022-04-20 15:43.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154314/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:43.46 [info     ] FQE_20220420154314: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00016227544072162673, 'time_algorithm_update': 0.004986557615808694, 'loss': 0.008215026796026806, 'time_step': 0.005222780158720821, 'init_value': -3.052201509475708, 'ave_value': -2.024326985967052, 'soft_opc': nan} step=5644




2022-04-20 15:43.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154314/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:43.47 [info     ] FQE_20220420154314: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.0001604987914303699, 'time_algorithm_update': 0.005053458443607192, 'loss': 0.008765579171826592, 'time_step': 0.005285468446203025, 'init_value': -3.1738905906677246, 'ave_value': -2.127631624929003, 'soft_opc': nan} step=5810




2022-04-20 15:43.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154314/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:43.48 [info     ] FQE_20220420154314: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00016463090138263013, 'time_algorithm_update': 0.005105460982724845, 'loss': 0.009393454141527728, 'time_step': 0.005342730556625918, 'init_value': -3.1527647972106934, 'ave_value': -2.0777470228505566, 'soft_opc': nan} step=5976




2022-04-20 15:43.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154314/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:43.49 [info     ] FQE_20220420154314: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.0001587724111166345, 'time_algorithm_update': 0.004142532865685153, 'loss': 0.009623737792563007, 'time_step': 0.004372760473963726, 'init_value': -3.2782044410705566, 'ave_value': -2.170095045367877, 'soft_opc': nan} step=6142




2022-04-20 15:43.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154314/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:43.50 [info     ] FQE_20220420154314: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.0001656693148325725, 'time_algorithm_update': 0.004990043410335679, 'loss': 0.00941204503059376, 'time_step': 0.005229142775018531, 'init_value': -3.3567118644714355, 'ave_value': -2.231642910435393, 'soft_opc': nan} step=6308




2022-04-20 15:43.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154314/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:43.51 [info     ] FQE_20220420154314: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00016350056751664863, 'time_algorithm_update': 0.00504680857600936, 'loss': 0.01012588743189159, 'time_step': 0.0052864135029804275, 'init_value': -3.426668405532837, 'ave_value': -2.2740184658282514, 'soft_opc': nan} step=6474




2022-04-20 15:43.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154314/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:43.52 [info     ] FQE_20220420154314: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00016424311212746493, 'time_algorithm_update': 0.00506542533277029, 'loss': 0.010610745111354122, 'time_step': 0.005302838532321425, 'init_value': -3.5261945724487305, 'ave_value': -2.3405576392873986, 'soft_opc': nan} step=6640




2022-04-20 15:43.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154314/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:43.53 [info     ] FQE_20220420154314: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00016174546207290097, 'time_algorithm_update': 0.005098222249961761, 'loss': 0.010724914191832137, 'time_step': 0.005332997046321271, 'init_value': -3.632113456726074, 'ave_value': -2.422162679012294, 'soft_opc': nan} step=6806




2022-04-20 15:43.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154314/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:43.54 [info     ] FQE_20220420154314: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.0001640090023178652, 'time_algorithm_update': 0.005188671939344291, 'loss': 0.011894589634009087, 'time_step': 0.0054295307182403935, 'init_value': -3.5712432861328125, 'ave_value': -2.332223484819537, 'soft_opc': nan} step=6972




2022-04-20 15:43.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154314/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:43.55 [info     ] FQE_20220420154314: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00016823016017316337, 'time_algorithm_update': 0.005155082208564483, 'loss': 0.011779328577461686, 'time_step': 0.005400101822542857, 'init_value': -3.6545798778533936, 'ave_value': -2.4020418709328584, 'soft_opc': nan} step=7138




2022-04-20 15:43.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154314/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:43.56 [info     ] FQE_20220420154314: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00016440971788153592, 'time_algorithm_update': 0.005079434578677258, 'loss': 0.011752598942818502, 'time_step': 0.00531712497573301, 'init_value': -3.707895278930664, 'ave_value': -2.4390343129903345, 'soft_opc': nan} step=7304




2022-04-20 15:43.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154314/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:43.57 [info     ] FQE_20220420154314: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00016114797936864645, 'time_algorithm_update': 0.005124560321669981, 'loss': 0.012290277939110276, 'time_step': 0.0053630909287785905, 'init_value': -3.8180348873138428, 'ave_value': -2.5103693841276944, 'soft_opc': nan} step=7470




2022-04-20 15:43.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154314/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:43.57 [info     ] FQE_20220420154314: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00015955373465296733, 'time_algorithm_update': 0.0044299277914575785, 'loss': 0.01151438150169182, 'time_step': 0.004659914108644049, 'init_value': -3.861184597015381, 'ave_value': -2.5423115113565515, 'soft_opc': nan} step=7636




2022-04-20 15:43.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154314/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:43.58 [info     ] FQE_20220420154314: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00016359966921519083, 'time_algorithm_update': 0.005061566111553146, 'loss': 0.012863453691867349, 'time_step': 0.005298248256545469, 'init_value': -3.935236930847168, 'ave_value': -2.5721115447916425, 'soft_opc': nan} step=7802




2022-04-20 15:43.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154314/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:43.59 [info     ] FQE_20220420154314: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00016457632363560688, 'time_algorithm_update': 0.005017401224159333, 'loss': 0.01329484295327194, 'time_step': 0.0052566226706447375, 'init_value': -4.006877422332764, 'ave_value': -2.6059008989046824, 'soft_opc': nan} step=7968




2022-04-20 15:43.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154314/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:44.00 [info     ] FQE_20220420154314: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00016723770693124058, 'time_algorithm_update': 0.005166190216340214, 'loss': 0.013721069017024037, 'time_step': 0.005409925817007042, 'init_value': -4.082123756408691, 'ave_value': -2.6437350069617365, 'soft_opc': nan} step=8134




2022-04-20 15:44.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154314/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:44.01 [info     ] FQE_20220420154314: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00016976839088531862, 'time_algorithm_update': 0.005286384777850415, 'loss': 0.014319077784936112, 'time_step': 0.005530980696161109, 'init_value': -4.154075622558594, 'ave_value': -2.691166459359564, 'soft_opc': nan} step=8300




2022-04-20 15:44.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154314/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.43210892e-01 -3.25999953e-02
 -1.38000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.28049971e-01  6.00000000e-01  4.67532035e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.87189108e-01  2.46000047e-02
 -8.00013420e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.61927906e-02  2.08952959e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.53789108e-01  1.32000047e-02
  8.79998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -9.71586383e-02 -6.00000000e-01 -5.33093061e-02]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.39489108e-01 -4.75999953e-02
 -9.30001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  1.41298638e-01  5.43892365e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.9573146

2022-04-20 15:44.02 [info     ] Directory is created at d3rlpy_logs/FQE_20220420154402
2022-04-20 15:44.02 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 15:44.02 [debug    ] Building models...
2022-04-20 15:44.02 [debug    ] Models have been built.
2022-04-20 15:44.02 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420154402/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 15:44.04 [info     ] FQE_20220420154402: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.0001665076544118482, 'time_algorithm_update': 0.005109150742375573, 'loss': 0.028134811019828152, 'time_step': 0.005350983420083689, 'init_value': -1.1804890632629395, 'ave_value': -1.1366968378256839, 'soft_opc': nan} step=344




2022-04-20 15:44.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154402/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:44.06 [info     ] FQE_20220420154402: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.0001649752605793088, 'time_algorithm_update': 0.005120395920997442, 'loss': 0.02412702155916757, 'time_step': 0.0053596053012581754, 'init_value': -2.0350708961486816, 'ave_value': -1.9534175805260872, 'soft_opc': nan} step=688




2022-04-20 15:44.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154402/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:44.07 [info     ] FQE_20220420154402: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00016259038171102835, 'time_algorithm_update': 0.004602349081704783, 'loss': 0.030461939831459246, 'time_step': 0.004839378040890361, 'init_value': -3.110450267791748, 'ave_value': -2.9512492481820485, 'soft_opc': nan} step=1032




2022-04-20 15:44.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154402/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:44.09 [info     ] FQE_20220420154402: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00017060720643331838, 'time_algorithm_update': 0.005074944607047147, 'loss': 0.03667738470573758, 'time_step': 0.005321964275005252, 'init_value': -3.8481788635253906, 'ave_value': -3.675717016999916, 'soft_opc': nan} step=1376




2022-04-20 15:44.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154402/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:44.11 [info     ] FQE_20220420154402: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.0001651478368182515, 'time_algorithm_update': 0.005008939393731051, 'loss': 0.04856264416323325, 'time_step': 0.005251494951026384, 'init_value': -4.676809310913086, 'ave_value': -4.498415041951513, 'soft_opc': nan} step=1720




2022-04-20 15:44.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154402/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:44.13 [info     ] FQE_20220420154402: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00016850302385729412, 'time_algorithm_update': 0.005088896945465443, 'loss': 0.06001815395050704, 'time_step': 0.005333510942237322, 'init_value': -5.443544387817383, 'ave_value': -5.330147561477917, 'soft_opc': nan} step=2064




2022-04-20 15:44.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154402/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:44.15 [info     ] FQE_20220420154402: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00016801786977191304, 'time_algorithm_update': 0.004656454158383746, 'loss': 0.0764093274293944, 'time_step': 0.0048976526703945426, 'init_value': -6.171072483062744, 'ave_value': -6.140301751615564, 'soft_opc': nan} step=2408




2022-04-20 15:44.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154402/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:44.17 [info     ] FQE_20220420154402: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00016909075337787007, 'time_algorithm_update': 0.005099995191707168, 'loss': 0.09559749773443611, 'time_step': 0.005348224972569665, 'init_value': -6.7253737449646, 'ave_value': -6.8503570680472965, 'soft_opc': nan} step=2752




2022-04-20 15:44.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154402/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:44.19 [info     ] FQE_20220420154402: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00016761172649472258, 'time_algorithm_update': 0.005090543697046679, 'loss': 0.11302220185698811, 'time_step': 0.005330751801646033, 'init_value': -7.256694793701172, 'ave_value': -7.532103295040711, 'soft_opc': nan} step=3096




2022-04-20 15:44.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154402/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:44.21 [info     ] FQE_20220420154402: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00016869639241418174, 'time_algorithm_update': 0.005080255658127541, 'loss': 0.142588343660802, 'time_step': 0.005324968764948291, 'init_value': -8.057579040527344, 'ave_value': -8.580746374765898, 'soft_opc': nan} step=3440




2022-04-20 15:44.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154402/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:44.23 [info     ] FQE_20220420154402: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00016558655472688897, 'time_algorithm_update': 0.005082820044007412, 'loss': 0.17184647205177433, 'time_step': 0.005321363377016644, 'init_value': -8.479308128356934, 'ave_value': -9.262329551441645, 'soft_opc': nan} step=3784




2022-04-20 15:44.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154402/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:44.25 [info     ] FQE_20220420154402: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00016888768173927484, 'time_algorithm_update': 0.004671837008276651, 'loss': 0.19979656767100096, 'time_step': 0.00491694170375203, 'init_value': -9.077876091003418, 'ave_value': -10.17186560357904, 'soft_opc': nan} step=4128




2022-04-20 15:44.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154402/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:44.27 [info     ] FQE_20220420154402: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00016734835713408714, 'time_algorithm_update': 0.005107117253680562, 'loss': 0.23298985740646375, 'time_step': 0.005352883837943853, 'init_value': -9.305940628051758, 'ave_value': -10.673472116423167, 'soft_opc': nan} step=4472




2022-04-20 15:44.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154402/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:44.29 [info     ] FQE_20220420154402: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00016642171283100926, 'time_algorithm_update': 0.005080187736555587, 'loss': 0.2648392036600515, 'time_step': 0.005320709112078645, 'init_value': -9.607551574707031, 'ave_value': -11.377582164041211, 'soft_opc': nan} step=4816




2022-04-20 15:44.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154402/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:44.31 [info     ] FQE_20220420154402: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00016813153444334518, 'time_algorithm_update': 0.005084264417027318, 'loss': 0.301855550212569, 'time_step': 0.005328475042831066, 'init_value': -9.885435104370117, 'ave_value': -12.010682959059203, 'soft_opc': nan} step=5160




2022-04-20 15:44.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154402/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:44.33 [info     ] FQE_20220420154402: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00016714112703190295, 'time_algorithm_update': 0.0046303591062856276, 'loss': 0.3397072901039623, 'time_step': 0.0048731856567915096, 'init_value': -10.546468734741211, 'ave_value': -13.088803026874062, 'soft_opc': nan} step=5504




2022-04-20 15:44.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154402/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:44.35 [info     ] FQE_20220420154402: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00016988294069157091, 'time_algorithm_update': 0.005148558422576549, 'loss': 0.39021570184592935, 'time_step': 0.005395478287408518, 'init_value': -10.868520736694336, 'ave_value': -13.756508623303946, 'soft_opc': nan} step=5848




2022-04-20 15:44.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154402/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:44.37 [info     ] FQE_20220420154402: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00016922521036724712, 'time_algorithm_update': 0.005053118217823117, 'loss': 0.42204499983250404, 'time_step': 0.00529928540074548, 'init_value': -11.160120964050293, 'ave_value': -14.325850040595643, 'soft_opc': nan} step=6192




2022-04-20 15:44.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154402/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:44.39 [info     ] FQE_20220420154402: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00017015046851579532, 'time_algorithm_update': 0.005124575869981633, 'loss': 0.4661582179454177, 'time_step': 0.005374071903006975, 'init_value': -11.604984283447266, 'ave_value': -14.938376729010796, 'soft_opc': nan} step=6536




2022-04-20 15:44.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154402/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:44.41 [info     ] FQE_20220420154402: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00016779192658357843, 'time_algorithm_update': 0.005077109087345212, 'loss': 0.5003825934105661, 'time_step': 0.005318555721016817, 'init_value': -12.09284782409668, 'ave_value': -15.640695931145771, 'soft_opc': nan} step=6880




2022-04-20 15:44.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154402/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:44.43 [info     ] FQE_20220420154402: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00016928758732108183, 'time_algorithm_update': 0.0047896365786707674, 'loss': 0.5418162533233679, 'time_step': 0.005033838887547337, 'init_value': -12.586709976196289, 'ave_value': -16.044126517509806, 'soft_opc': nan} step=7224




2022-04-20 15:44.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154402/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:44.45 [info     ] FQE_20220420154402: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00016920926959015602, 'time_algorithm_update': 0.005083838174509448, 'loss': 0.5774947243504399, 'time_step': 0.005330308925273807, 'init_value': -13.15056037902832, 'ave_value': -16.82371230853327, 'soft_opc': nan} step=7568




2022-04-20 15:44.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154402/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:44.47 [info     ] FQE_20220420154402: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00016916629879973655, 'time_algorithm_update': 0.0051188392694606335, 'loss': 0.6146078973813632, 'time_step': 0.00536487338154815, 'init_value': -13.669471740722656, 'ave_value': -17.266681306016306, 'soft_opc': nan} step=7912




2022-04-20 15:44.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154402/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:44.49 [info     ] FQE_20220420154402: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00017269128976866256, 'time_algorithm_update': 0.005057122818259306, 'loss': 0.6483717384033425, 'time_step': 0.005306784496750943, 'init_value': -13.693981170654297, 'ave_value': -17.23764005865931, 'soft_opc': nan} step=8256




2022-04-20 15:44.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154402/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:44.50 [info     ] FQE_20220420154402: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00016514437143192736, 'time_algorithm_update': 0.004655761081118917, 'loss': 0.6775969775112043, 'time_step': 0.0048957952233247976, 'init_value': -14.570022583007812, 'ave_value': -17.974734299076797, 'soft_opc': nan} step=8600




2022-04-20 15:44.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154402/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:44.52 [info     ] FQE_20220420154402: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00017059681027434593, 'time_algorithm_update': 0.005088792290798453, 'loss': 0.6927039333467566, 'time_step': 0.005338388126949931, 'init_value': -15.004064559936523, 'ave_value': -18.36250578214473, 'soft_opc': nan} step=8944




2022-04-20 15:44.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154402/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:44.54 [info     ] FQE_20220420154402: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00017100226047427156, 'time_algorithm_update': 0.005024543335271436, 'loss': 0.7111584562375102, 'time_step': 0.005272620639135671, 'init_value': -15.846776962280273, 'ave_value': -19.011248818980444, 'soft_opc': nan} step=9288




2022-04-20 15:44.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154402/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:44.56 [info     ] FQE_20220420154402: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00017179999240609102, 'time_algorithm_update': 0.005032586496929789, 'loss': 0.740294692398937, 'time_step': 0.005279877158098443, 'init_value': -16.58123779296875, 'ave_value': -19.78852111557834, 'soft_opc': nan} step=9632




2022-04-20 15:44.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154402/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:44.58 [info     ] FQE_20220420154402: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00017204464868057605, 'time_algorithm_update': 0.005040557578552601, 'loss': 0.7582588732610781, 'time_step': 0.005289949649988219, 'init_value': -17.088848114013672, 'ave_value': -20.13748903205442, 'soft_opc': nan} step=9976




2022-04-20 15:44.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154402/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:45.00 [info     ] FQE_20220420154402: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00016983858374662177, 'time_algorithm_update': 0.004808125800864641, 'loss': 0.7791159503638397, 'time_step': 0.005054956951806712, 'init_value': -17.294776916503906, 'ave_value': -20.312529607979698, 'soft_opc': nan} step=10320




2022-04-20 15:45.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154402/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:45.02 [info     ] FQE_20220420154402: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00017167246618936228, 'time_algorithm_update': 0.005169379156689311, 'loss': 0.7825661034561521, 'time_step': 0.005416583183199861, 'init_value': -17.83246612548828, 'ave_value': -20.796751928937763, 'soft_opc': nan} step=10664




2022-04-20 15:45.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154402/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:45.04 [info     ] FQE_20220420154402: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.0001692820427029632, 'time_algorithm_update': 0.005074704109236251, 'loss': 0.7875422120180934, 'time_step': 0.005319106717442357, 'init_value': -17.936866760253906, 'ave_value': -20.753622641265896, 'soft_opc': nan} step=11008




2022-04-20 15:45.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154402/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:45.06 [info     ] FQE_20220420154402: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.0001685924308244572, 'time_algorithm_update': 0.005051703647125599, 'loss': 0.7987446169650485, 'time_step': 0.005297301813613537, 'init_value': -18.651615142822266, 'ave_value': -21.374271750417414, 'soft_opc': nan} step=11352




2022-04-20 15:45.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154402/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:45.08 [info     ] FQE_20220420154402: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00017014215158861736, 'time_algorithm_update': 0.004635900259017944, 'loss': 0.7930721941209117, 'time_step': 0.004881093668383222, 'init_value': -19.29465103149414, 'ave_value': -21.934604444752683, 'soft_opc': nan} step=11696




2022-04-20 15:45.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154402/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:45.10 [info     ] FQE_20220420154402: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00017064809799194336, 'time_algorithm_update': 0.005115305268487265, 'loss': 0.7967078300669442, 'time_step': 0.005362245925637179, 'init_value': -19.383907318115234, 'ave_value': -21.876712700382278, 'soft_opc': nan} step=12040




2022-04-20 15:45.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154402/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:45.12 [info     ] FQE_20220420154402: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00017116790594056596, 'time_algorithm_update': 0.005143082419107127, 'loss': 0.7794227274676222, 'time_step': 0.0053930532100588775, 'init_value': -19.729454040527344, 'ave_value': -22.18027463337671, 'soft_opc': nan} step=12384




2022-04-20 15:45.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154402/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:45.14 [info     ] FQE_20220420154402: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00017198712326759515, 'time_algorithm_update': 0.005118047775224198, 'loss': 0.7645169407519143, 'time_step': 0.005365514478018117, 'init_value': -19.979366302490234, 'ave_value': -22.472035652108758, 'soft_opc': nan} step=12728




2022-04-20 15:45.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154402/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:45.16 [info     ] FQE_20220420154402: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.000173493180164071, 'time_algorithm_update': 0.004985747642295305, 'loss': 0.7471162616166958, 'time_step': 0.00523505030676376, 'init_value': -19.911052703857422, 'ave_value': -22.49794270253346, 'soft_opc': nan} step=13072




2022-04-20 15:45.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154402/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:45.18 [info     ] FQE_20220420154402: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00016875391782716264, 'time_algorithm_update': 0.004915943672490674, 'loss': 0.7395339413805929, 'time_step': 0.005161635404409364, 'init_value': -20.022098541259766, 'ave_value': -22.436696051707205, 'soft_opc': nan} step=13416




2022-04-20 15:45.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154402/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:45.20 [info     ] FQE_20220420154402: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00017043670942617017, 'time_algorithm_update': 0.005118554414704789, 'loss': 0.7287925949948298, 'time_step': 0.005364878233089004, 'init_value': -20.46548080444336, 'ave_value': -22.994095896908107, 'soft_opc': nan} step=13760




2022-04-20 15:45.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154402/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:45.22 [info     ] FQE_20220420154402: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.0001716911792755127, 'time_algorithm_update': 0.005144367384356122, 'loss': 0.7201894898465726, 'time_step': 0.005393103111621945, 'init_value': -20.868492126464844, 'ave_value': -23.288659052442558, 'soft_opc': nan} step=14104




2022-04-20 15:45.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154402/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:45.24 [info     ] FQE_20220420154402: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00017105562742366346, 'time_algorithm_update': 0.005197622748308404, 'loss': 0.7092425129376352, 'time_step': 0.005444508652354396, 'init_value': -20.865013122558594, 'ave_value': -23.288854489115543, 'soft_opc': nan} step=14448




2022-04-20 15:45.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154402/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:45.26 [info     ] FQE_20220420154402: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00016719380090403002, 'time_algorithm_update': 0.004610272340996321, 'loss': 0.6914374773542202, 'time_step': 0.00485029608704323, 'init_value': -21.034969329833984, 'ave_value': -23.503144588691878, 'soft_opc': nan} step=14792




2022-04-20 15:45.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154402/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:45.28 [info     ] FQE_20220420154402: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00017559528350830078, 'time_algorithm_update': 0.005086858605229577, 'loss': 0.671816197180644, 'time_step': 0.005340647558833278, 'init_value': -20.9954776763916, 'ave_value': -23.347692550987063, 'soft_opc': nan} step=15136




2022-04-20 15:45.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154402/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:45.30 [info     ] FQE_20220420154402: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00016973878062048623, 'time_algorithm_update': 0.005039056373196979, 'loss': 0.6579679415106426, 'time_step': 0.005282555901727011, 'init_value': -21.33468246459961, 'ave_value': -23.635625878039345, 'soft_opc': nan} step=15480




2022-04-20 15:45.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154402/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:45.32 [info     ] FQE_20220420154402: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00017196563787238542, 'time_algorithm_update': 0.005074512126833894, 'loss': 0.6521034680068666, 'time_step': 0.005322935276253279, 'init_value': -21.609905242919922, 'ave_value': -23.86866379461549, 'soft_opc': nan} step=15824




2022-04-20 15:45.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154402/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:45.33 [info     ] FQE_20220420154402: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00016849678616191066, 'time_algorithm_update': 0.004838896352191304, 'loss': 0.6359325280348056, 'time_step': 0.00508497066276018, 'init_value': -21.747146606445312, 'ave_value': -23.91525097365852, 'soft_opc': nan} step=16168




2022-04-20 15:45.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154402/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:45.35 [info     ] FQE_20220420154402: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00017258039740628974, 'time_algorithm_update': 0.005090188148409822, 'loss': 0.6141329227491866, 'time_step': 0.00534057478572047, 'init_value': -21.77197265625, 'ave_value': -24.16231809160819, 'soft_opc': nan} step=16512




2022-04-20 15:45.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154402/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:45.37 [info     ] FQE_20220420154402: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00017272802286369857, 'time_algorithm_update': 0.005052242861237637, 'loss': 0.6027717131601517, 'time_step': 0.005299738673276679, 'init_value': -21.692920684814453, 'ave_value': -24.211785548023443, 'soft_opc': nan} step=16856




2022-04-20 15:45.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154402/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:45.39 [info     ] FQE_20220420154402: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00016978452371996502, 'time_algorithm_update': 0.005113604456879372, 'loss': 0.5701315974773362, 'time_step': 0.005358814500099005, 'init_value': -21.7081241607666, 'ave_value': -24.259856667388064, 'soft_opc': nan} step=17200




2022-04-20 15:45.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154402/model_17200.pt
search iteration:  4
using hyper params:  [0.0029273671954283325, 0.0018091868341954683, 5.141959053824242e-05, 7]
2022-04-20 15:45.39 [debug    ] RoundIterator is selected.
2022-04-20 15:45.39 [info     ] Directory is created at d3rlpy_logs/TD3PlusBC_20220420154539
2022-04-20 15:45.39 [debug    ] Fitting scaler...              scaler=standard
2022-04-20 15:45.40 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 15:45.40 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 15:45.40 [info     ] Parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154539/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0029273671954

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:45.43 [info     ] TD3PlusBC_20220420154539: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.00039305463868972155, 'time_algorithm_update': 0.008449904402794197, 'critic_loss': 18.598088229608816, 'actor_loss': 2.6978895678157695, 'time_step': 0.008921872105514794, 'td_error': 1.0647435605369762, 'init_value': -11.277771949768066, 'ave_value': -7.193621791126039} step=342
2022-04-20 15:45.43 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154539/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:45.47 [info     ] TD3PlusBC_20220420154539: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.0003962516784667969, 'time_algorithm_update': 0.008982029574656348, 'critic_loss': 5.684068979924185, 'actor_loss': 2.5795344163102714, 'time_step': 0.009453172572174965, 'td_error': 1.334770197981062, 'init_value': -16.039087295532227, 'ave_value': -10.26134061068867} step=684
2022-04-20 15:45.47 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154539/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:45.51 [info     ] TD3PlusBC_20220420154539: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.0003979136372170253, 'time_algorithm_update': 0.008986112667106048, 'critic_loss': 8.625405913905093, 'actor_loss': 2.5695718575639335, 'time_step': 0.009461354791072378, 'td_error': 1.7617092352629744, 'init_value': -21.119037628173828, 'ave_value': -13.62300255908614} step=1026
2022-04-20 15:45.51 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154539/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:45.54 [info     ] TD3PlusBC_20220420154539: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.00039568142584192823, 'time_algorithm_update': 0.008561238210800796, 'critic_loss': 12.142289685924151, 'actor_loss': 2.5636672457756355, 'time_step': 0.009029847836633872, 'td_error': 2.264232645028893, 'init_value': -25.990360260009766, 'ave_value': -16.830383188961292} step=1368
2022-04-20 15:45.54 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154539/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:45.58 [info     ] TD3PlusBC_20220420154539: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.0003968170511792278, 'time_algorithm_update': 0.008850466438204224, 'critic_loss': 16.074992914645993, 'actor_loss': 2.561353788041232, 'time_step': 0.009323888354831271, 'td_error': 2.8815347892891365, 'init_value': -30.78009605407715, 'ave_value': -19.981274514820107} step=1710
2022-04-20 15:45.58 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154539/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:46.02 [info     ] TD3PlusBC_20220420154539: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.00039682262822201376, 'time_algorithm_update': 0.008463451039721395, 'critic_loss': 20.43398542292634, 'actor_loss': 2.560236424730535, 'time_step': 0.008932536805582326, 'td_error': 3.582106432828394, 'init_value': -35.15159606933594, 'ave_value': -22.855671265898525} step=2052
2022-04-20 15:46.02 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154539/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:46.05 [info     ] TD3PlusBC_20220420154539: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.00040181756716722633, 'time_algorithm_update': 0.008928886631078888, 'critic_loss': 25.28735625953005, 'actor_loss': 2.5590166278749877, 'time_step': 0.009403826897604424, 'td_error': 4.296303849704752, 'init_value': -39.76568603515625, 'ave_value': -26.010435403152997} step=2394
2022-04-20 15:46.05 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154539/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:46.09 [info     ] TD3PlusBC_20220420154539: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.0004023306551035385, 'time_algorithm_update': 0.009071085188123915, 'critic_loss': 30.560013670670358, 'actor_loss': 2.559259293372171, 'time_step': 0.009549112347831503, 'td_error': 4.9252143960175845, 'init_value': -43.59089279174805, 'ave_value': -28.561827180322076} step=2736
2022-04-20 15:46.09 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154539/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:46.13 [info     ] TD3PlusBC_20220420154539: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.00039956025909959223, 'time_algorithm_update': 0.008503152613054243, 'critic_loss': 36.204487136930055, 'actor_loss': 2.558748451589841, 'time_step': 0.008971384394238566, 'td_error': 5.608470830445113, 'init_value': -47.30729675292969, 'ave_value': -31.197776884246398} step=3078
2022-04-20 15:46.13 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154539/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:46.17 [info     ] TD3PlusBC_20220420154539: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.00040222190276921146, 'time_algorithm_update': 0.008919285054792437, 'critic_loss': 41.87752248529802, 'actor_loss': 2.558871311053895, 'time_step': 0.009396202382985611, 'td_error': 6.283963453891802, 'init_value': -50.9373779296875, 'ave_value': -33.72095282839183} step=3420
2022-04-20 15:46.17 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154539/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:46.20 [info     ] TD3PlusBC_20220420154539: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.0003951578809503923, 'time_algorithm_update': 0.008422132123980606, 'critic_loss': 47.891947288959344, 'actor_loss': 2.5593835108461436, 'time_step': 0.008893149638036538, 'td_error': 6.877189600398594, 'init_value': -53.76068115234375, 'ave_value': -35.98565776639131} step=3762
2022-04-20 15:46.20 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154539/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:46.24 [info     ] TD3PlusBC_20220420154539: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.00040048883672346146, 'time_algorithm_update': 0.009065328285708064, 'critic_loss': 53.96706605097007, 'actor_loss': 2.558590433053803, 'time_step': 0.009540024556611714, 'td_error': 7.466787806712476, 'init_value': -57.11201095581055, 'ave_value': -38.184952299550076} step=4104
2022-04-20 15:46.24 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154539/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:46.28 [info     ] TD3PlusBC_20220420154539: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.00039837304611652216, 'time_algorithm_update': 0.008988772219384622, 'critic_loss': 59.81189790246082, 'actor_loss': 2.5597516876912256, 'time_step': 0.009463298390483299, 'td_error': 8.10455286618274, 'init_value': -60.28255081176758, 'ave_value': -40.49856868250723} step=4446
2022-04-20 15:46.28 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154539/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:46.31 [info     ] TD3PlusBC_20220420154539: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.0003983305211652789, 'time_algorithm_update': 0.008502597697297035, 'critic_loss': 65.7557699303878, 'actor_loss': 2.560265412804676, 'time_step': 0.008978118673402664, 'td_error': 8.446475413682688, 'init_value': -62.243141174316406, 'ave_value': -42.129853269510676} step=4788
2022-04-20 15:46.31 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154539/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:46.35 [info     ] TD3PlusBC_20220420154539: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.00040128565671151143, 'time_algorithm_update': 0.008926284940619218, 'critic_loss': 71.88998476664226, 'actor_loss': 2.5602500592058863, 'time_step': 0.009402001113222357, 'td_error': 9.091686347215688, 'init_value': -65.40985107421875, 'ave_value': -44.17504635062555} step=5130
2022-04-20 15:46.35 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154539/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:46.39 [info     ] TD3PlusBC_20220420154539: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.00039735872145981816, 'time_algorithm_update': 0.008429385068123801, 'critic_loss': 78.16218182078579, 'actor_loss': 2.560206473222253, 'time_step': 0.008903281730518006, 'td_error': 9.574232644223697, 'init_value': -67.9832534790039, 'ave_value': -45.88439141837041} step=5472
2022-04-20 15:46.39 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154539/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:46.42 [info     ] TD3PlusBC_20220420154539: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.0003989551499573111, 'time_algorithm_update': 0.008965833842405799, 'critic_loss': 84.2605546315511, 'actor_loss': 2.559573279486762, 'time_step': 0.009443362553914389, 'td_error': 9.764123118360688, 'init_value': -69.3000717163086, 'ave_value': -47.406965065050265} step=5814
2022-04-20 15:46.42 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154539/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:46.46 [info     ] TD3PlusBC_20220420154539: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.0004022546678955792, 'time_algorithm_update': 0.009030294000056753, 'critic_loss': 90.43908657943993, 'actor_loss': 2.5597676938040212, 'time_step': 0.009509246251736467, 'td_error': 10.067425614282293, 'init_value': -71.68946838378906, 'ave_value': -48.90065692087031} step=6156
2022-04-20 15:46.46 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154539/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:46.50 [info     ] TD3PlusBC_20220420154539: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.0004018517265542906, 'time_algorithm_update': 0.008432934158726743, 'critic_loss': 96.39390257227491, 'actor_loss': 2.5599914126925998, 'time_step': 0.008910154738621405, 'td_error': 10.513655635677782, 'init_value': -74.06024169921875, 'ave_value': -50.745694391284104} step=6498
2022-04-20 15:46.50 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154539/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:46.53 [info     ] TD3PlusBC_20220420154539: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.0004029531925045259, 'time_algorithm_update': 0.008932661591914663, 'critic_loss': 102.38862034312466, 'actor_loss': 2.558761934090776, 'time_step': 0.00940052110549302, 'td_error': 10.85532552243133, 'init_value': -75.1090316772461, 'ave_value': -51.79919860475235} step=6840
2022-04-20 15:46.53 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154539/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:46.57 [info     ] TD3PlusBC_20220420154539: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.00039903531994735984, 'time_algorithm_update': 0.008559670364647581, 'critic_loss': 107.98581539399443, 'actor_loss': 2.5586924343778374, 'time_step': 0.009020794901931495, 'td_error': 11.182334749737581, 'init_value': -77.2746353149414, 'ave_value': -53.302988805857716} step=7182
2022-04-20 15:46.57 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154539/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:47.01 [info     ] TD3PlusBC_20220420154539: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.0003959937402379443, 'time_algorithm_update': 0.009000343886035227, 'critic_loss': 113.11603885226779, 'actor_loss': 2.5588164594438343, 'time_step': 0.009454754360935144, 'td_error': 11.39044702456243, 'init_value': -78.66835021972656, 'ave_value': -54.5226203546131} step=7524
2022-04-20 15:47.01 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154539/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:47.05 [info     ] TD3PlusBC_20220420154539: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.00039838908011453195, 'time_algorithm_update': 0.00897753447817083, 'critic_loss': 118.18759037040131, 'actor_loss': 2.5592844332867895, 'time_step': 0.009433495370965255, 'td_error': 11.844676512855402, 'init_value': -81.22457122802734, 'ave_value': -56.13518811842678} step=7866
2022-04-20 15:47.05 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154539/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:47.08 [info     ] TD3PlusBC_20220420154539: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.00040117063020405014, 'time_algorithm_update': 0.008453294547677737, 'critic_loss': 122.96450105187489, 'actor_loss': 2.5590972077776812, 'time_step': 0.008915830076786509, 'td_error': 11.807407186541951, 'init_value': -80.61607360839844, 'ave_value': -56.613591601574825} step=8208
2022-04-20 15:47.08 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154539/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:47.12 [info     ] TD3PlusBC_20220420154539: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.00040349486278511626, 'time_algorithm_update': 0.008933848107767385, 'critic_loss': 127.51284395184433, 'actor_loss': 2.5597174878706013, 'time_step': 0.009396985957497045, 'td_error': 12.371061003275253, 'init_value': -83.04655456542969, 'ave_value': -57.885701545632635} step=8550
2022-04-20 15:47.12 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154539/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:47.15 [info     ] TD3PlusBC_20220420154539: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.0003980328465065761, 'time_algorithm_update': 0.008474260742901361, 'critic_loss': 132.13505688048247, 'actor_loss': 2.5597279504028676, 'time_step': 0.008929168968869928, 'td_error': 12.074054458383864, 'init_value': -83.10514831542969, 'ave_value': -58.90768333428212} step=8892
2022-04-20 15:47.16 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154539/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:47.19 [info     ] TD3PlusBC_20220420154539: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.0004021256987811529, 'time_algorithm_update': 0.008949895351253755, 'critic_loss': 136.56215406719008, 'actor_loss': 2.5595342387929993, 'time_step': 0.009418051842360468, 'td_error': 13.075851645572515, 'init_value': -85.27153015136719, 'ave_value': -60.000019235704094} step=9234
2022-04-20 15:47.19 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154539/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:47.23 [info     ] TD3PlusBC_20220420154539: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.000400506962112516, 'time_algorithm_update': 0.008981901999802618, 'critic_loss': 140.3247239631519, 'actor_loss': 2.5593591001298694, 'time_step': 0.00944597539845963, 'td_error': 12.978299122130279, 'init_value': -85.52669525146484, 'ave_value': -60.59496690259676} step=9576
2022-04-20 15:47.23 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154539/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:47.27 [info     ] TD3PlusBC_20220420154539: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.0003977365661085698, 'time_algorithm_update': 0.008586065113893029, 'critic_loss': 144.35451152868438, 'actor_loss': 2.5610199671739724, 'time_step': 0.009049299864741097, 'td_error': 13.06573397334808, 'init_value': -86.16260528564453, 'ave_value': -61.60667709595348} step=9918
2022-04-20 15:47.27 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154539/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:47.30 [info     ] TD3PlusBC_20220420154539: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.00039969201673541155, 'time_algorithm_update': 0.008992513020833334, 'critic_loss': 148.1973191199944, 'actor_loss': 2.5612529774158324, 'time_step': 0.009458936445894297, 'td_error': 13.402569096921168, 'init_value': -86.48077392578125, 'ave_value': -62.30674447363553} step=10260
2022-04-20 15:47.30 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154539/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:47.34 [info     ] TD3PlusBC_20220420154539: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.0003994466268528275, 'time_algorithm_update': 0.008470722806383993, 'critic_loss': 151.66783543636924, 'actor_loss': 2.560850971623471, 'time_step': 0.008931550366139551, 'td_error': 13.935273887723625, 'init_value': -88.63789367675781, 'ave_value': -63.6368334422816} step=10602
2022-04-20 15:47.34 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154539/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:47.38 [info     ] TD3PlusBC_20220420154539: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.0004016502558836463, 'time_algorithm_update': 0.009035463221589027, 'critic_loss': 155.12943765294483, 'actor_loss': 2.5611485096446254, 'time_step': 0.009497179622538605, 'td_error': 13.583401127640176, 'init_value': -87.24931335449219, 'ave_value': -63.811405984325724} step=10944
2022-04-20 15:47.38 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154539/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:47.41 [info     ] TD3PlusBC_20220420154539: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.00039639319592749166, 'time_algorithm_update': 0.008896851400185746, 'critic_loss': 158.72271942674067, 'actor_loss': 2.561964581584373, 'time_step': 0.009353949312578169, 'td_error': 14.165326526510112, 'init_value': -90.0617904663086, 'ave_value': -64.68575526157159} step=11286
2022-04-20 15:47.41 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154539/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:47.45 [info     ] TD3PlusBC_20220420154539: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.00040147179051449423, 'time_algorithm_update': 0.008565043845371894, 'critic_loss': 161.57983951680146, 'actor_loss': 2.561897701687283, 'time_step': 0.009036531922412895, 'td_error': 14.289923778889792, 'init_value': -88.3048324584961, 'ave_value': -65.11019541417895} step=11628
2022-04-20 15:47.45 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154539/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:47.49 [info     ] TD3PlusBC_20220420154539: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.00039559289028770044, 'time_algorithm_update': 0.00894875833165576, 'critic_loss': 164.41552040590878, 'actor_loss': 2.562451005679125, 'time_step': 0.009409443676820275, 'td_error': 14.054197769860487, 'init_value': -89.21883392333984, 'ave_value': -66.01360983708504} step=11970
2022-04-20 15:47.49 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154539/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:47.52 [info     ] TD3PlusBC_20220420154539: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.000401459939298574, 'time_algorithm_update': 0.008578066937407555, 'critic_loss': 167.2924208613167, 'actor_loss': 2.562920043342992, 'time_step': 0.009043073793600875, 'td_error': 14.40202382144955, 'init_value': -89.10499572753906, 'ave_value': -66.19392850975296} step=12312
2022-04-20 15:47.52 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154539/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:47.56 [info     ] TD3PlusBC_20220420154539: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.0003977937307971263, 'time_algorithm_update': 0.00895511267478006, 'critic_loss': 169.7749586941903, 'actor_loss': 2.562824943609405, 'time_step': 0.009417604981807241, 'td_error': 14.76670227514969, 'init_value': -87.93386840820312, 'ave_value': -66.67198598527794} step=12654
2022-04-20 15:47.56 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154539/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:48.00 [info     ] TD3PlusBC_20220420154539: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.00038936403062608506, 'time_algorithm_update': 0.008705413132383112, 'critic_loss': 172.2522715741431, 'actor_loss': 2.562861470451132, 'time_step': 0.009157893950479072, 'td_error': 14.662331111214035, 'init_value': -88.26348876953125, 'ave_value': -67.1743455383445} step=12996
2022-04-20 15:48.00 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154539/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:48.03 [info     ] TD3PlusBC_20220420154539: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.00035667489146628574, 'time_algorithm_update': 0.00816614586010314, 'critic_loss': 174.58140664351615, 'actor_loss': 2.5639900282809607, 'time_step': 0.008577672361630446, 'td_error': 15.114994021263668, 'init_value': -90.29368591308594, 'ave_value': -68.00647496469922} step=13338
2022-04-20 15:48.03 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154539/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:48.07 [info     ] TD3PlusBC_20220420154539: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.00038517915714553924, 'time_algorithm_update': 0.008705180290846796, 'critic_loss': 176.8537470053511, 'actor_loss': 2.5633817025792527, 'time_step': 0.00915107810706423, 'td_error': 15.148545822699335, 'init_value': -89.08045196533203, 'ave_value': -67.93627669604369} step=13680
2022-04-20 15:48.07 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154539/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:48.11 [info     ] TD3PlusBC_20220420154539: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.0004070404677363167, 'time_algorithm_update': 0.008523519973308719, 'critic_loss': 179.13833122922662, 'actor_loss': 2.563949307503059, 'time_step': 0.008994883961147733, 'td_error': 14.830264271683092, 'init_value': -88.80009460449219, 'ave_value': -68.2235516284595} step=14022
2022-04-20 15:48.11 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154539/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:48.14 [info     ] TD3PlusBC_20220420154539: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.0004086710556208739, 'time_algorithm_update': 0.008960813109637701, 'critic_loss': 180.83982726426152, 'actor_loss': 2.5641856068059017, 'time_step': 0.009433228370041876, 'td_error': 14.977130466994957, 'init_value': -87.43669128417969, 'ave_value': -68.17045118862461} step=14364
2022-04-20 15:48.14 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154539/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:48.18 [info     ] TD3PlusBC_20220420154539: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.000401507344162255, 'time_algorithm_update': 0.00898827934822841, 'critic_loss': 182.66609894601922, 'actor_loss': 2.5649795755308276, 'time_step': 0.009461052933631585, 'td_error': 15.357046620261741, 'init_value': -88.82122039794922, 'ave_value': -68.92580794729963} step=14706
2022-04-20 15:48.18 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154539/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:48.22 [info     ] TD3PlusBC_20220420154539: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.0004029517982438294, 'time_algorithm_update': 0.00852371098702414, 'critic_loss': 184.01005911129957, 'actor_loss': 2.564604237762808, 'time_step': 0.00898670941068415, 'td_error': 15.859900963950475, 'init_value': -88.81071472167969, 'ave_value': -69.46587589671869} step=15048
2022-04-20 15:48.22 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154539/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:48.25 [info     ] TD3PlusBC_20220420154539: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.00040304591084084317, 'time_algorithm_update': 0.008902618759556821, 'critic_loss': 185.92447818510715, 'actor_loss': 2.565013343130636, 'time_step': 0.009374289484749063, 'td_error': 16.362449129278616, 'init_value': -89.40191650390625, 'ave_value': -69.89200511585351} step=15390
2022-04-20 15:48.25 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154539/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:48.29 [info     ] TD3PlusBC_20220420154539: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.00040582536953931664, 'time_algorithm_update': 0.008521768781873915, 'critic_loss': 187.25416924102962, 'actor_loss': 2.5653190069031298, 'time_step': 0.00899260992195174, 'td_error': 16.384125456518415, 'init_value': -87.68128204345703, 'ave_value': -69.84050550457702} step=15732
2022-04-20 15:48.29 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154539/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:48.33 [info     ] TD3PlusBC_20220420154539: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.0004046841671592311, 'time_algorithm_update': 0.00902698402516326, 'critic_loss': 188.54566134625708, 'actor_loss': 2.56587903402005, 'time_step': 0.00949680317215055, 'td_error': 16.950195182479654, 'init_value': -90.23387145996094, 'ave_value': -70.49109133694019} step=16074
2022-04-20 15:48.33 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154539/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:48.37 [info     ] TD3PlusBC_20220420154539: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.00040900707244873047, 'time_algorithm_update': 0.008967467915942098, 'critic_loss': 189.94388535705923, 'actor_loss': 2.5650741217429176, 'time_step': 0.0094446327254089, 'td_error': 16.717849972862876, 'init_value': -88.06376647949219, 'ave_value': -70.32286219388011} step=16416
2022-04-20 15:48.37 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154539/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:48.40 [info     ] TD3PlusBC_20220420154539: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.00040218565199110244, 'time_algorithm_update': 0.008514570911028231, 'critic_loss': 190.83261878030342, 'actor_loss': 2.5651816527048745, 'time_step': 0.008984852255436411, 'td_error': 16.465017852973865, 'init_value': -86.98148345947266, 'ave_value': -70.43072356463435} step=16758
2022-04-20 15:48.40 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154539/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:48.44 [info     ] TD3PlusBC_20220420154539: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.00039910851863392613, 'time_algorithm_update': 0.009006420771280924, 'critic_loss': 191.6339924684045, 'actor_loss': 2.5650461040742214, 'time_step': 0.009475561610439368, 'td_error': 16.823198078569916, 'init_value': -88.05357360839844, 'ave_value': -70.77086567109149} step=17100
2022-04-20 15:48.44 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420154539/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 102 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.28189108e-01 

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 15:48.45 [info     ] FQE_20220420154844: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00016059502061591092, 'time_algorithm_update': 0.004997447312596333, 'loss': 0.005592960642695606, 'time_step': 0.005229388374880135, 'init_value': -0.43404287099838257, 'ave_value': -0.3964527798564853, 'soft_opc': nan} step=166




2022-04-20 15:48.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154844/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:48.46 [info     ] FQE_20220420154844: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00016211601625005883, 'time_algorithm_update': 0.005047535321798669, 'loss': 0.004112679526737207, 'time_step': 0.005285850490432188, 'init_value': -0.5402873754501343, 'ave_value': -0.47051024251357393, 'soft_opc': nan} step=332




2022-04-20 15:48.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154844/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:48.47 [info     ] FQE_20220420154844: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00015737924231104105, 'time_algorithm_update': 0.004108947443674846, 'loss': 0.00390461668041142, 'time_step': 0.0043359406023140415, 'init_value': -0.6065391302108765, 'ave_value': -0.5137099376669875, 'soft_opc': nan} step=498




2022-04-20 15:48.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154844/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:48.48 [info     ] FQE_20220420154844: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.0001640707613473915, 'time_algorithm_update': 0.005035661789308111, 'loss': 0.004002368562933372, 'time_step': 0.005274555769311376, 'init_value': -0.7177464962005615, 'ave_value': -0.5921705755061126, 'soft_opc': nan} step=664




2022-04-20 15:48.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154844/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:48.49 [info     ] FQE_20220420154844: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00016701221466064453, 'time_algorithm_update': 0.0050337788570358095, 'loss': 0.00400983231108501, 'time_step': 0.005271107317453407, 'init_value': -0.7906450033187866, 'ave_value': -0.6363385959117263, 'soft_opc': nan} step=830




2022-04-20 15:48.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154844/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:48.50 [info     ] FQE_20220420154844: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00016416699053293252, 'time_algorithm_update': 0.004973201866609505, 'loss': 0.003957158870467669, 'time_step': 0.0052085856357252745, 'init_value': -0.8200794458389282, 'ave_value': -0.6497808426954188, 'soft_opc': nan} step=996




2022-04-20 15:48.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154844/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:48.51 [info     ] FQE_20220420154844: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00016270200890230844, 'time_algorithm_update': 0.005001592348857099, 'loss': 0.003936038665327591, 'time_step': 0.00523834199790495, 'init_value': -0.898808479309082, 'ave_value': -0.7027279229918578, 'soft_opc': nan} step=1162




2022-04-20 15:48.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154844/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:48.52 [info     ] FQE_20220420154844: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00016250380550522403, 'time_algorithm_update': 0.004993944282991341, 'loss': 0.003804506003963256, 'time_step': 0.005230107939386943, 'init_value': -0.9574946165084839, 'ave_value': -0.7359717813899388, 'soft_opc': nan} step=1328




2022-04-20 15:48.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154844/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:48.53 [info     ] FQE_20220420154844: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.0001671903104667204, 'time_algorithm_update': 0.005064305052699813, 'loss': 0.003834114731177239, 'time_step': 0.005304850727678782, 'init_value': -1.014658808708191, 'ave_value': -0.7827355422885032, 'soft_opc': nan} step=1494




2022-04-20 15:48.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154844/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:48.54 [info     ] FQE_20220420154844: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.0001626718475157956, 'time_algorithm_update': 0.004993777677237269, 'loss': 0.0037925257445705764, 'time_step': 0.005228199154497629, 'init_value': -1.083040475845337, 'ave_value': -0.8247962124041609, 'soft_opc': nan} step=1660




2022-04-20 15:48.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154844/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:48.55 [info     ] FQE_20220420154844: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00016218208404908697, 'time_algorithm_update': 0.00503941472754421, 'loss': 0.003817761358272015, 'time_step': 0.005275954683142972, 'init_value': -1.1887164115905762, 'ave_value': -0.8976241469383239, 'soft_opc': nan} step=1826




2022-04-20 15:48.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154844/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:48.55 [info     ] FQE_20220420154844: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00016248369791421546, 'time_algorithm_update': 0.00432812162192471, 'loss': 0.0038207652108990645, 'time_step': 0.004559604518384819, 'init_value': -1.2041418552398682, 'ave_value': -0.9029171288013458, 'soft_opc': nan} step=1992




2022-04-20 15:48.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154844/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:48.56 [info     ] FQE_20220420154844: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.0001686854534838573, 'time_algorithm_update': 0.004973194685327001, 'loss': 0.004011701500037663, 'time_step': 0.005217934229287757, 'init_value': -1.2914862632751465, 'ave_value': -0.965638002303538, 'soft_opc': nan} step=2158




2022-04-20 15:48.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154844/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:48.57 [info     ] FQE_20220420154844: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00016970950436879354, 'time_algorithm_update': 0.00514775299164186, 'loss': 0.0041629523108993845, 'time_step': 0.005391663815601763, 'init_value': -1.3937327861785889, 'ave_value': -1.0369501558308665, 'soft_opc': nan} step=2324




2022-04-20 15:48.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154844/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:48.58 [info     ] FQE_20220420154844: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00016381510769028262, 'time_algorithm_update': 0.00502068163400673, 'loss': 0.004344421486561018, 'time_step': 0.005259196442293833, 'init_value': -1.4337369203567505, 'ave_value': -1.0593882991991064, 'soft_opc': nan} step=2490




2022-04-20 15:48.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154844/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:48.59 [info     ] FQE_20220420154844: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00015983667718358786, 'time_algorithm_update': 0.005068415618804564, 'loss': 0.004882384262732174, 'time_step': 0.005302535482199795, 'init_value': -1.5013794898986816, 'ave_value': -1.0815326819817226, 'soft_opc': nan} step=2656




2022-04-20 15:48.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154844/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:49.00 [info     ] FQE_20220420154844: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.0001624291201671922, 'time_algorithm_update': 0.004993657031691218, 'loss': 0.004924545800229198, 'time_step': 0.005232551011694483, 'init_value': -1.6119892597198486, 'ave_value': -1.1766194822149234, 'soft_opc': nan} step=2822




2022-04-20 15:49.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154844/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:49.01 [info     ] FQE_20220420154844: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00016460073999611727, 'time_algorithm_update': 0.005086913166275944, 'loss': 0.005112395949341941, 'time_step': 0.005327731729990028, 'init_value': -1.6063964366912842, 'ave_value': -1.1546285108738654, 'soft_opc': nan} step=2988




2022-04-20 15:49.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154844/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:49.02 [info     ] FQE_20220420154844: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00016651814242443406, 'time_algorithm_update': 0.005020997610436864, 'loss': 0.005603468450240746, 'time_step': 0.0052624323281897116, 'init_value': -1.7519170045852661, 'ave_value': -1.2577129889078238, 'soft_opc': nan} step=3154




2022-04-20 15:49.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154844/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:49.03 [info     ] FQE_20220420154844: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00015920616057981928, 'time_algorithm_update': 0.0050550598696053745, 'loss': 0.005767093965761274, 'time_step': 0.005287454788943371, 'init_value': -1.8451099395751953, 'ave_value': -1.312339250078878, 'soft_opc': nan} step=3320




2022-04-20 15:49.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154844/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:49.04 [info     ] FQE_20220420154844: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00016515657126185405, 'time_algorithm_update': 0.00482637910957796, 'loss': 0.006254416180729687, 'time_step': 0.005068902509758271, 'init_value': -1.8646202087402344, 'ave_value': -1.3126469520432456, 'soft_opc': nan} step=3486




2022-04-20 15:49.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154844/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:49.05 [info     ] FQE_20220420154844: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.0001612743699407003, 'time_algorithm_update': 0.004561924072633307, 'loss': 0.006590053557659533, 'time_step': 0.004798267261091485, 'init_value': -2.0139763355255127, 'ave_value': -1.4503772974282771, 'soft_opc': nan} step=3652




2022-04-20 15:49.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154844/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:49.06 [info     ] FQE_20220420154844: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00016676948731204113, 'time_algorithm_update': 0.005096842007464673, 'loss': 0.007381868131419487, 'time_step': 0.005341801298670022, 'init_value': -2.1218767166137695, 'ave_value': -1.529644516989723, 'soft_opc': nan} step=3818




2022-04-20 15:49.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154844/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:49.07 [info     ] FQE_20220420154844: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.000162111707480557, 'time_algorithm_update': 0.005096240215990917, 'loss': 0.007428545833015478, 'time_step': 0.005332165453807417, 'init_value': -2.151751756668091, 'ave_value': -1.5353800713277614, 'soft_opc': nan} step=3984




2022-04-20 15:49.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154844/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:49.08 [info     ] FQE_20220420154844: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.000162233789283109, 'time_algorithm_update': 0.0050205150282526595, 'loss': 0.007777252195245889, 'time_step': 0.005253501685268907, 'init_value': -2.2246644496917725, 'ave_value': -1.569684355088451, 'soft_opc': nan} step=4150




2022-04-20 15:49.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154844/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:49.09 [info     ] FQE_20220420154844: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00016607577542224563, 'time_algorithm_update': 0.0050480624279343935, 'loss': 0.008275007935667253, 'time_step': 0.005289742745548846, 'init_value': -2.2902333736419678, 'ave_value': -1.594199664004751, 'soft_opc': nan} step=4316




2022-04-20 15:49.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154844/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:49.10 [info     ] FQE_20220420154844: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00016463233763913074, 'time_algorithm_update': 0.005009474524532456, 'loss': 0.008730732946634203, 'time_step': 0.005245539079229516, 'init_value': -2.339322090148926, 'ave_value': -1.6275055744760745, 'soft_opc': nan} step=4482




2022-04-20 15:49.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154844/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:49.11 [info     ] FQE_20220420154844: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.0001647601644676852, 'time_algorithm_update': 0.0051067579223448975, 'loss': 0.009636813633744213, 'time_step': 0.0053471599716738046, 'init_value': -2.456383466720581, 'ave_value': -1.7058222180148503, 'soft_opc': nan} step=4648




2022-04-20 15:49.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154844/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:49.12 [info     ] FQE_20220420154844: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00016121691968067582, 'time_algorithm_update': 0.004988013979900314, 'loss': 0.010046551613479358, 'time_step': 0.005223390567733581, 'init_value': -2.621516704559326, 'ave_value': -1.8327804896477107, 'soft_opc': nan} step=4814




2022-04-20 15:49.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154844/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:49.13 [info     ] FQE_20220420154844: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.000167287975908762, 'time_algorithm_update': 0.005090890160526137, 'loss': 0.010281986381622952, 'time_step': 0.005331533500947148, 'init_value': -2.6942903995513916, 'ave_value': -1.8868919403166384, 'soft_opc': nan} step=4980




2022-04-20 15:49.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154844/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:49.13 [info     ] FQE_20220420154844: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.000161893396492464, 'time_algorithm_update': 0.004245035619620818, 'loss': 0.011572277562619826, 'time_step': 0.004485143236367099, 'init_value': -2.7802562713623047, 'ave_value': -1.954378368188669, 'soft_opc': nan} step=5146




2022-04-20 15:49.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154844/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:49.14 [info     ] FQE_20220420154844: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00016226538692612247, 'time_algorithm_update': 0.004984049911958626, 'loss': 0.011998439720321268, 'time_step': 0.0052229036767798736, 'init_value': -2.9180519580841064, 'ave_value': -2.0388763650781936, 'soft_opc': nan} step=5312




2022-04-20 15:49.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154844/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:49.15 [info     ] FQE_20220420154844: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00016486213867922863, 'time_algorithm_update': 0.004946652665195695, 'loss': 0.012783836326505765, 'time_step': 0.005187093493450119, 'init_value': -2.9332714080810547, 'ave_value': -2.0324386515260278, 'soft_opc': nan} step=5478




2022-04-20 15:49.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154844/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:49.16 [info     ] FQE_20220420154844: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.0001637059521962361, 'time_algorithm_update': 0.005083234913377877, 'loss': 0.013042527260763728, 'time_step': 0.005323776279587343, 'init_value': -3.067342519760132, 'ave_value': -2.109238911138193, 'soft_opc': nan} step=5644




2022-04-20 15:49.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154844/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:49.17 [info     ] FQE_20220420154844: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00016408512391239763, 'time_algorithm_update': 0.005052751805408892, 'loss': 0.013577540782392475, 'time_step': 0.005291496414736092, 'init_value': -3.1670022010803223, 'ave_value': -2.19717826702976, 'soft_opc': nan} step=5810




2022-04-20 15:49.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154844/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:49.18 [info     ] FQE_20220420154844: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00016146251954228045, 'time_algorithm_update': 0.004961028156510319, 'loss': 0.014632248384906092, 'time_step': 0.0051983896508274305, 'init_value': -3.2581887245178223, 'ave_value': -2.2672167618129704, 'soft_opc': nan} step=5976




2022-04-20 15:49.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154844/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:49.19 [info     ] FQE_20220420154844: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00016365281070571347, 'time_algorithm_update': 0.005090396088289927, 'loss': 0.015044068764048034, 'time_step': 0.005328472838344344, 'init_value': -3.34721302986145, 'ave_value': -2.340582600544702, 'soft_opc': nan} step=6142




2022-04-20 15:49.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154844/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:49.20 [info     ] FQE_20220420154844: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00016481761472770967, 'time_algorithm_update': 0.005012397306511201, 'loss': 0.015615291649430811, 'time_step': 0.005251242453793445, 'init_value': -3.3679451942443848, 'ave_value': -2.3489930477362497, 'soft_opc': nan} step=6308




2022-04-20 15:49.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154844/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:49.21 [info     ] FQE_20220420154844: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00016678672239004848, 'time_algorithm_update': 0.005068369658596544, 'loss': 0.016347164387747377, 'time_step': 0.005306235278945371, 'init_value': -3.467015504837036, 'ave_value': -2.407719400390848, 'soft_opc': nan} step=6474




2022-04-20 15:49.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154844/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:49.22 [info     ] FQE_20220420154844: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00016342875469161803, 'time_algorithm_update': 0.004158762564142066, 'loss': 0.017178051684231282, 'time_step': 0.004396735903728439, 'init_value': -3.4621379375457764, 'ave_value': -2.404131784820342, 'soft_opc': nan} step=6640




2022-04-20 15:49.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154844/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:49.23 [info     ] FQE_20220420154844: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00016209734491555086, 'time_algorithm_update': 0.005133245364729181, 'loss': 0.015741913634105528, 'time_step': 0.005370639892945807, 'init_value': -3.6279592514038086, 'ave_value': -2.5138697511038264, 'soft_opc': nan} step=6806




2022-04-20 15:49.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154844/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:49.24 [info     ] FQE_20220420154844: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00016552281666951007, 'time_algorithm_update': 0.005072000515030091, 'loss': 0.01811732616870249, 'time_step': 0.005314986389803599, 'init_value': -3.654578447341919, 'ave_value': -2.5159750794236726, 'soft_opc': nan} step=6972




2022-04-20 15:49.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154844/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:49.25 [info     ] FQE_20220420154844: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.0001649698579167745, 'time_algorithm_update': 0.005118658743708967, 'loss': 0.019737505710246152, 'time_step': 0.005360132240387331, 'init_value': -3.811476230621338, 'ave_value': -2.683530665477654, 'soft_opc': nan} step=7138




2022-04-20 15:49.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154844/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:49.26 [info     ] FQE_20220420154844: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00016444275178104998, 'time_algorithm_update': 0.005085627716707896, 'loss': 0.02022381185228566, 'time_step': 0.005326296909745917, 'init_value': -3.8637514114379883, 'ave_value': -2.729649172709869, 'soft_opc': nan} step=7304




2022-04-20 15:49.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154844/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:49.27 [info     ] FQE_20220420154844: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00016879173646490258, 'time_algorithm_update': 0.0050482362149709675, 'loss': 0.021025061704841138, 'time_step': 0.005291658711720662, 'init_value': -3.8988208770751953, 'ave_value': -2.744798260860078, 'soft_opc': nan} step=7470




2022-04-20 15:49.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154844/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:49.28 [info     ] FQE_20220420154844: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00016742729278932135, 'time_algorithm_update': 0.005044231931847262, 'loss': 0.02186311271773786, 'time_step': 0.005288161427141672, 'init_value': -4.036889553070068, 'ave_value': -2.833461013060432, 'soft_opc': nan} step=7636




2022-04-20 15:49.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154844/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:49.29 [info     ] FQE_20220420154844: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00016736553375979504, 'time_algorithm_update': 0.0051360489374183745, 'loss': 0.02274454764654321, 'time_step': 0.005380844495382653, 'init_value': -4.0767316818237305, 'ave_value': -2.860821462818631, 'soft_opc': nan} step=7802




2022-04-20 15:49.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154844/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:49.30 [info     ] FQE_20220420154844: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.0001608851444290345, 'time_algorithm_update': 0.005024302436644773, 'loss': 0.02285117025386138, 'time_step': 0.005260520670787397, 'init_value': -4.101585388183594, 'ave_value': -2.8361714625412278, 'soft_opc': nan} step=7968




2022-04-20 15:49.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154844/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:49.30 [info     ] FQE_20220420154844: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00016060938318091702, 'time_algorithm_update': 0.004089714532875153, 'loss': 0.02401828063660335, 'time_step': 0.004325672804591167, 'init_value': -4.28203821182251, 'ave_value': -2.9577688717224575, 'soft_opc': nan} step=8134




2022-04-20 15:49.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154844/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:49.31 [info     ] FQE_20220420154844: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.0001661720046077866, 'time_algorithm_update': 0.005128261554672058, 'loss': 0.02447693002079221, 'time_step': 0.005366608320948589, 'init_value': -4.328437805175781, 'ave_value': -2.9634411924191424, 'soft_opc': nan} step=8300




2022-04-20 15:49.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154844/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.43210892e-01 -3.25999953e-02
 -1.38000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.28049971e-01  6.00000000e-01  4.67532035e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.87189108e-01  2.46000047e-02
 -8.00013420e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.61927906e-02  2.08952959e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.53789108e-01  1.32000047e-02
  8.79998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -9.71586383e-02 -6.00000000e-01 -5.33093061e-02]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.39489108e-01 -4.75999953e-02
 -9.30001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  1.41298638e-01  5.43892365e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.9573146

2022-04-20 15:49.32 [debug    ] RoundIterator is selected.
2022-04-20 15:49.32 [info     ] Directory is created at d3rlpy_logs/FQE_20220420154932
2022-04-20 15:49.32 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 15:49.32 [debug    ] Building models...
2022-04-20 15:49.32 [debug    ] Models have been built.
2022-04-20 15:49.32 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420154932/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}},

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 15:49.34 [info     ] FQE_20220420154932: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.0001691420410954675, 'time_algorithm_update': 0.005077899195427118, 'loss': 0.030854644521875956, 'time_step': 0.005321028620697731, 'init_value': -1.1258952617645264, 'ave_value': -1.0816007020975555, 'soft_opc': nan} step=344




2022-04-20 15:49.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154932/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:49.36 [info     ] FQE_20220420154932: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.0001668070637902548, 'time_algorithm_update': 0.00499419556107632, 'loss': 0.025900519915943054, 'time_step': 0.005236410817434621, 'init_value': -1.7336714267730713, 'ave_value': -1.6244594465706264, 'soft_opc': nan} step=688




2022-04-20 15:49.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154932/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:49.38 [info     ] FQE_20220420154932: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00017016848852468091, 'time_algorithm_update': 0.005076832549516545, 'loss': 0.028029756669258308, 'time_step': 0.00532270725383315, 'init_value': -2.5433807373046875, 'ave_value': -2.4057160310178727, 'soft_opc': nan} step=1032




2022-04-20 15:49.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154932/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:49.40 [info     ] FQE_20220420154932: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00017054621563401332, 'time_algorithm_update': 0.0045505630415539405, 'loss': 0.03057288585317343, 'time_step': 0.004792940477992213, 'init_value': -2.9390106201171875, 'ave_value': -2.833411567087646, 'soft_opc': nan} step=1376




2022-04-20 15:49.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154932/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:49.42 [info     ] FQE_20220420154932: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00017928314763446186, 'time_algorithm_update': 0.005048932031143543, 'loss': 0.03985329835708145, 'time_step': 0.005304136941599292, 'init_value': -3.3547675609588623, 'ave_value': -3.3347383883253143, 'soft_opc': nan} step=1720




2022-04-20 15:49.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154932/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:49.44 [info     ] FQE_20220420154932: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00016936590505200764, 'time_algorithm_update': 0.005084692045699718, 'loss': 0.0494889943037442, 'time_step': 0.0053306610085243405, 'init_value': -3.753080368041992, 'ave_value': -3.8330454242719876, 'soft_opc': nan} step=2064




2022-04-20 15:49.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154932/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:49.46 [info     ] FQE_20220420154932: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.0001752023086991421, 'time_algorithm_update': 0.005083818768346032, 'loss': 0.061890750568425065, 'time_step': 0.0053342871887739314, 'init_value': -4.158337593078613, 'ave_value': -4.339418992406043, 'soft_opc': nan} step=2408




2022-04-20 15:49.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154932/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:49.48 [info     ] FQE_20220420154932: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.0001688246117081753, 'time_algorithm_update': 0.005001480496206949, 'loss': 0.07582533055494084, 'time_step': 0.0052444075429162315, 'init_value': -4.379555702209473, 'ave_value': -4.700115153864278, 'soft_opc': nan} step=2752




2022-04-20 15:49.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154932/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:49.49 [info     ] FQE_20220420154932: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00016831450684126034, 'time_algorithm_update': 0.004565956287605818, 'loss': 0.0878615603825554, 'time_step': 0.00481045731278353, 'init_value': -4.632322311401367, 'ave_value': -5.063213963126386, 'soft_opc': nan} step=3096




2022-04-20 15:49.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154932/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:49.51 [info     ] FQE_20220420154932: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00016727904940760412, 'time_algorithm_update': 0.00506973197293836, 'loss': 0.10684354663393351, 'time_step': 0.005312858625899914, 'init_value': -5.0370683670043945, 'ave_value': -5.688586054247518, 'soft_opc': nan} step=3440




2022-04-20 15:49.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154932/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:49.53 [info     ] FQE_20220420154932: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.0001704394817352295, 'time_algorithm_update': 0.0050562821155370665, 'loss': 0.11904840272049917, 'time_step': 0.00530167305192282, 'init_value': -5.343334197998047, 'ave_value': -6.136807991193423, 'soft_opc': nan} step=3784




2022-04-20 15:49.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154932/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:49.55 [info     ] FQE_20220420154932: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00017076592112696447, 'time_algorithm_update': 0.005036629216615544, 'loss': 0.13478062128604845, 'time_step': 0.005284503448841183, 'init_value': -5.838176727294922, 'ave_value': -6.7946201072372325, 'soft_opc': nan} step=4128




2022-04-20 15:49.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154932/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:49.57 [info     ] FQE_20220420154932: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.000166443198226219, 'time_algorithm_update': 0.004529303589532542, 'loss': 0.14585441614089664, 'time_step': 0.004767039487528247, 'init_value': -5.812816143035889, 'ave_value': -6.998672047205403, 'soft_opc': nan} step=4472




2022-04-20 15:49.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154932/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:49.59 [info     ] FQE_20220420154932: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00017053997793862986, 'time_algorithm_update': 0.005058478477389314, 'loss': 0.15707463680138423, 'time_step': 0.005303900602251985, 'init_value': -6.394589900970459, 'ave_value': -7.869754731276719, 'soft_opc': nan} step=4816




2022-04-20 15:49.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154932/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:50.01 [info     ] FQE_20220420154932: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00016866173855094023, 'time_algorithm_update': 0.005063628041467001, 'loss': 0.1695947669565591, 'time_step': 0.0053052957667860876, 'init_value': -6.566028594970703, 'ave_value': -8.268468073226284, 'soft_opc': nan} step=5160




2022-04-20 15:50.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154932/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:50.03 [info     ] FQE_20220420154932: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.0001704720563666765, 'time_algorithm_update': 0.005034864641899286, 'loss': 0.1822800031960617, 'time_step': 0.005279518144075261, 'init_value': -7.147031307220459, 'ave_value': -9.034120182340402, 'soft_opc': nan} step=5504




2022-04-20 15:50.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154932/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:50.05 [info     ] FQE_20220420154932: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00016851688540259072, 'time_algorithm_update': 0.005125540633534276, 'loss': 0.19669352845440424, 'time_step': 0.005367240933484809, 'init_value': -7.080715179443359, 'ave_value': -9.266903744051065, 'soft_opc': nan} step=5848




2022-04-20 15:50.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154932/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:50.06 [info     ] FQE_20220420154932: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00016328068666679915, 'time_algorithm_update': 0.0033595014450161957, 'loss': 0.20869819815595483, 'time_step': 0.0035953293013018233, 'init_value': -7.256499767303467, 'ave_value': -9.673690179759545, 'soft_opc': nan} step=6192




2022-04-20 15:50.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154932/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:50.08 [info     ] FQE_20220420154932: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.0001647957535677178, 'time_algorithm_update': 0.0035471195398375046, 'loss': 0.22520836027404076, 'time_step': 0.003786594368690668, 'init_value': -7.375729084014893, 'ave_value': -10.104481687166452, 'soft_opc': nan} step=6536




2022-04-20 15:50.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154932/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:50.09 [info     ] FQE_20220420154932: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00016337078671122706, 'time_algorithm_update': 0.0035032144812650458, 'loss': 0.24126832041521232, 'time_step': 0.0037426033685373705, 'init_value': -7.534963607788086, 'ave_value': -10.496722531432562, 'soft_opc': nan} step=6880




2022-04-20 15:50.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154932/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:50.11 [info     ] FQE_20220420154932: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.0001658776471781176, 'time_algorithm_update': 0.0035023155600525614, 'loss': 0.24802429922098337, 'time_step': 0.0037391144175862156, 'init_value': -7.613146781921387, 'ave_value': -10.909379298965916, 'soft_opc': nan} step=7224




2022-04-20 15:50.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154932/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:50.12 [info     ] FQE_20220420154932: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00016622557196506235, 'time_algorithm_update': 0.0035454630851745605, 'loss': 0.26648765642650774, 'time_step': 0.0037829778915227847, 'init_value': -7.883658409118652, 'ave_value': -11.308331290994108, 'soft_opc': nan} step=7568




2022-04-20 15:50.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154932/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:50.13 [info     ] FQE_20220420154932: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00016440693722214809, 'time_algorithm_update': 0.0035258226616438045, 'loss': 0.2826585344223002, 'time_step': 0.0037619125011355376, 'init_value': -7.971954345703125, 'ave_value': -11.614037097825223, 'soft_opc': nan} step=7912




2022-04-20 15:50.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154932/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:50.15 [info     ] FQE_20220420154932: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00015907925228739894, 'time_algorithm_update': 0.0034297004688617796, 'loss': 0.30371719091957394, 'time_step': 0.0036617441232814347, 'init_value': -8.220220565795898, 'ave_value': -11.972087550861971, 'soft_opc': nan} step=8256




2022-04-20 15:50.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154932/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:50.16 [info     ] FQE_20220420154932: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00016556091086809024, 'time_algorithm_update': 0.003546192202457162, 'loss': 0.3162302073567759, 'time_step': 0.003786938828091289, 'init_value': -8.325584411621094, 'ave_value': -12.262879481864044, 'soft_opc': nan} step=8600




2022-04-20 15:50.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154932/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:50.18 [info     ] FQE_20220420154932: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.0001640181208765784, 'time_algorithm_update': 0.0035732506319533946, 'loss': 0.3293763651447587, 'time_step': 0.0038101416687632717, 'init_value': -8.739547729492188, 'ave_value': -12.693938901358251, 'soft_opc': nan} step=8944




2022-04-20 15:50.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154932/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:50.19 [info     ] FQE_20220420154932: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00016646468362142874, 'time_algorithm_update': 0.003553017627361209, 'loss': 0.34983723254354543, 'time_step': 0.0037940144538879395, 'init_value': -8.906524658203125, 'ave_value': -13.120105452811648, 'soft_opc': nan} step=9288




2022-04-20 15:50.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154932/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:50.20 [info     ] FQE_20220420154932: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00016530516535736794, 'time_algorithm_update': 0.0034693209237830584, 'loss': 0.3560732927574061, 'time_step': 0.003706841274749401, 'init_value': -9.153310775756836, 'ave_value': -13.476537152108115, 'soft_opc': nan} step=9632




2022-04-20 15:50.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154932/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:50.22 [info     ] FQE_20220420154932: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00016481100126754407, 'time_algorithm_update': 0.0034808835317922194, 'loss': 0.3783373590612914, 'time_step': 0.003718616657478865, 'init_value': -9.159629821777344, 'ave_value': -13.47324408307034, 'soft_opc': nan} step=9976




2022-04-20 15:50.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154932/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:50.23 [info     ] FQE_20220420154932: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00016376514767491542, 'time_algorithm_update': 0.0035137541072313176, 'loss': 0.4029406435830995, 'time_step': 0.0037522399148275684, 'init_value': -9.635002136230469, 'ave_value': -13.953414992103772, 'soft_opc': nan} step=10320




2022-04-20 15:50.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154932/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:50.25 [info     ] FQE_20220420154932: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00016515268835910532, 'time_algorithm_update': 0.0034607933011165885, 'loss': 0.41352069806740727, 'time_step': 0.003700159316839174, 'init_value': -10.043729782104492, 'ave_value': -14.38792589070267, 'soft_opc': nan} step=10664




2022-04-20 15:50.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154932/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:50.26 [info     ] FQE_20220420154932: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00016404723012170127, 'time_algorithm_update': 0.0035893390345018965, 'loss': 0.4312569245284553, 'time_step': 0.0038267685923465463, 'init_value': -10.015881538391113, 'ave_value': -14.43449003317557, 'soft_opc': nan} step=11008




2022-04-20 15:50.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154932/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:50.28 [info     ] FQE_20220420154932: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00016761935034463572, 'time_algorithm_update': 0.0035369167494219405, 'loss': 0.4501977756965992, 'time_step': 0.003777679315833158, 'init_value': -10.711637496948242, 'ave_value': -15.09905602573627, 'soft_opc': nan} step=11352




2022-04-20 15:50.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154932/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:50.29 [info     ] FQE_20220420154932: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00016497040903845498, 'time_algorithm_update': 0.003554927748303081, 'loss': 0.4595140213211782, 'time_step': 0.0037909302600594454, 'init_value': -10.706184387207031, 'ave_value': -14.942881330142116, 'soft_opc': nan} step=11696




2022-04-20 15:50.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154932/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:50.30 [info     ] FQE_20220420154932: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00016591507335041844, 'time_algorithm_update': 0.0035557483517846397, 'loss': 0.4651314289915527, 'time_step': 0.003795579422351926, 'init_value': -11.350749969482422, 'ave_value': -15.35463642748765, 'soft_opc': nan} step=12040




2022-04-20 15:50.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154932/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:50.32 [info     ] FQE_20220420154932: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00016623666120129963, 'time_algorithm_update': 0.0035652005395223926, 'loss': 0.48191299730218773, 'time_step': 0.0038049241831136305, 'init_value': -11.819522857666016, 'ave_value': -15.624457724588117, 'soft_opc': nan} step=12384




2022-04-20 15:50.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154932/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:50.33 [info     ] FQE_20220420154932: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00016501337982887445, 'time_algorithm_update': 0.0034927323807117552, 'loss': 0.4855559299440058, 'time_step': 0.00373075244038604, 'init_value': -12.06227970123291, 'ave_value': -15.632354585263409, 'soft_opc': nan} step=12728




2022-04-20 15:50.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154932/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:50.35 [info     ] FQE_20220420154932: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00016603012417638026, 'time_algorithm_update': 0.003489815218504085, 'loss': 0.49759081193939025, 'time_step': 0.0037316194800443426, 'init_value': -12.549766540527344, 'ave_value': -15.711362918165957, 'soft_opc': nan} step=13072




2022-04-20 15:50.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154932/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:50.36 [info     ] FQE_20220420154932: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00016217869381571925, 'time_algorithm_update': 0.0034825143425963644, 'loss': 0.5108510976072488, 'time_step': 0.0037170807982600013, 'init_value': -13.200387001037598, 'ave_value': -15.86114886527344, 'soft_opc': nan} step=13416




2022-04-20 15:50.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154932/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:50.37 [info     ] FQE_20220420154932: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.0001632203889447589, 'time_algorithm_update': 0.0035285353660583496, 'loss': 0.5082500142391858, 'time_step': 0.0037656918514606566, 'init_value': -13.556676864624023, 'ave_value': -16.08824424738681, 'soft_opc': nan} step=13760




2022-04-20 15:50.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154932/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:50.39 [info     ] FQE_20220420154932: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00016289880109387775, 'time_algorithm_update': 0.0035114697245664374, 'loss': 0.5206814888041726, 'time_step': 0.003746960052224093, 'init_value': -14.117513656616211, 'ave_value': -16.35412783082407, 'soft_opc': nan} step=14104




2022-04-20 15:50.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154932/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:50.40 [info     ] FQE_20220420154932: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00016350662985513376, 'time_algorithm_update': 0.003176593503286672, 'loss': 0.528355464068523, 'time_step': 0.0034138428610424663, 'init_value': -14.591838836669922, 'ave_value': -16.436610775127555, 'soft_opc': nan} step=14448




2022-04-20 15:50.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154932/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:50.42 [info     ] FQE_20220420154932: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.0001674024171607439, 'time_algorithm_update': 0.003632388835729555, 'loss': 0.5244995884077494, 'time_step': 0.003874859144521314, 'init_value': -14.875625610351562, 'ave_value': -16.424065939684372, 'soft_opc': nan} step=14792




2022-04-20 15:50.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154932/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:50.43 [info     ] FQE_20220420154932: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.0001642094102016715, 'time_algorithm_update': 0.0035518643467925314, 'loss': 0.5271358246005379, 'time_step': 0.0037909524385319198, 'init_value': -15.249191284179688, 'ave_value': -16.581931875034673, 'soft_opc': nan} step=15136




2022-04-20 15:50.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154932/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:50.44 [info     ] FQE_20220420154932: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00016479644664498262, 'time_algorithm_update': 0.0035928806593251783, 'loss': 0.5325729413618615, 'time_step': 0.0038323326166286027, 'init_value': -15.665351867675781, 'ave_value': -16.672459632137723, 'soft_opc': nan} step=15480




2022-04-20 15:50.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154932/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:50.46 [info     ] FQE_20220420154932: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00016526843226233193, 'time_algorithm_update': 0.003537945969160213, 'loss': 0.5341552727920718, 'time_step': 0.003776093555051227, 'init_value': -16.222013473510742, 'ave_value': -16.82542719219912, 'soft_opc': nan} step=15824




2022-04-20 15:50.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154932/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:50.47 [info     ] FQE_20220420154932: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00016515338143637014, 'time_algorithm_update': 0.0035970883313999617, 'loss': 0.5501704972486423, 'time_step': 0.0038377871347028154, 'init_value': -16.829452514648438, 'ave_value': -17.05756808513029, 'soft_opc': nan} step=16168




2022-04-20 15:50.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154932/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:50.49 [info     ] FQE_20220420154932: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00016748905181884766, 'time_algorithm_update': 0.003536460011504417, 'loss': 0.5674507029430378, 'time_step': 0.0037774630757265314, 'init_value': -17.282896041870117, 'ave_value': -17.289475237690166, 'soft_opc': nan} step=16512




2022-04-20 15:50.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154932/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:50.50 [info     ] FQE_20220420154932: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00016477218894071357, 'time_algorithm_update': 0.003497498673061992, 'loss': 0.5612146271403532, 'time_step': 0.0037373124166976573, 'init_value': -17.20654296875, 'ave_value': -17.219046731205943, 'soft_opc': nan} step=16856




2022-04-20 15:50.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154932/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:50.51 [info     ] FQE_20220420154932: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00016394326853197674, 'time_algorithm_update': 0.0035324276879776357, 'loss': 0.5571281089025095, 'time_step': 0.0037691648616347204, 'init_value': -17.634403228759766, 'ave_value': -17.560886990643517, 'soft_opc': nan} step=17200




2022-04-20 15:50.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154932/model_17200.pt
search iteration:  5
using hyper params:  [0.00763536735957856, 0.004613441760584798, 2.6745568944546886e-05, 3]
2022-04-20 15:50.51 [debug    ] RoundIterator is selected.
2022-04-20 15:50.51 [info     ] Directory is created at d3rlpy_logs/TD3PlusBC_20220420155051
2022-04-20 15:50.51 [debug    ] Fitting scaler...              scaler=standard
2022-04-20 15:50.52 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 15:50.52 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 15:50.52 [info     ] Parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155051/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.007635367359578

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:50.55 [info     ] TD3PlusBC_20220420155051: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.000356664434511062, 'time_algorithm_update': 0.00695893639012387, 'critic_loss': 3.037686907582813, 'actor_loss': 2.4139230446508755, 'time_step': 0.007393822335360343, 'td_error': 0.8177924207892755, 'init_value': -4.3829665184021, 'ave_value': -2.4921639581840602} step=342
2022-04-20 15:50.55 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155051/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:50.58 [info     ] TD3PlusBC_20220420155051: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.000351988781265348, 'time_algorithm_update': 0.006815886636923629, 'critic_loss': 1.300734276485722, 'actor_loss': 2.292129041158665, 'time_step': 0.0072413313458537494, 'td_error': 0.859082700901683, 'init_value': -6.323444366455078, 'ave_value': -3.5585117312545913} step=684
2022-04-20 15:50.58 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155051/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:51.00 [info     ] TD3PlusBC_20220420155051: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.00035462323685138544, 'time_algorithm_update': 0.006917181070785076, 'critic_loss': 1.833390837350087, 'actor_loss': 2.2836008127669842, 'time_step': 0.007349092360825566, 'td_error': 0.9266067484572672, 'init_value': -8.461655616760254, 'ave_value': -4.810701268898462} step=1026
2022-04-20 15:51.00 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155051/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:51.03 [info     ] TD3PlusBC_20220420155051: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.00035312022382055807, 'time_algorithm_update': 0.006868609908031441, 'critic_loss': 2.5258825083225096, 'actor_loss': 2.277868896897076, 'time_step': 0.007298532982318722, 'td_error': 1.0331236817313132, 'init_value': -10.504059791564941, 'ave_value': -5.956284031992325} step=1368
2022-04-20 15:51.03 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155051/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:51.06 [info     ] TD3PlusBC_20220420155051: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.0003553475552832174, 'time_algorithm_update': 0.006894571042200278, 'critic_loss': 3.3376812073919506, 'actor_loss': 2.288532102317141, 'time_step': 0.007329711440013863, 'td_error': 1.1610662806074774, 'init_value': -12.760056495666504, 'ave_value': -7.243591586339648} step=1710
2022-04-20 15:51.06 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155051/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:51.09 [info     ] TD3PlusBC_20220420155051: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.0003525053548534014, 'time_algorithm_update': 0.006884388756333736, 'critic_loss': 4.147690426536471, 'actor_loss': 2.2909772493685896, 'time_step': 0.007312403087727508, 'td_error': 1.3014770106460234, 'init_value': -14.994183540344238, 'ave_value': -8.461016029303114} step=2052
2022-04-20 15:51.09 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155051/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:51.12 [info     ] TD3PlusBC_20220420155051: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.0003542028672513906, 'time_algorithm_update': 0.006846137214125248, 'critic_loss': 5.15623744956234, 'actor_loss': 2.2937828462723404, 'time_step': 0.007279315887138858, 'td_error': 1.4839591459473351, 'init_value': -17.225906372070312, 'ave_value': -9.733794651760988} step=2394
2022-04-20 15:51.12 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155051/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:51.15 [info     ] TD3PlusBC_20220420155051: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.0003601082584314179, 'time_algorithm_update': 0.00694346218778376, 'critic_loss': 6.171013188640973, 'actor_loss': 2.292672756819697, 'time_step': 0.007381360433255023, 'td_error': 1.6560749500390135, 'init_value': -19.2266845703125, 'ave_value': -10.905235820612912} step=2736
2022-04-20 15:51.15 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155051/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:51.18 [info     ] TD3PlusBC_20220420155051: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.0003569488636931481, 'time_algorithm_update': 0.006986703789025022, 'critic_loss': 7.485611198938381, 'actor_loss': 2.295680188296134, 'time_step': 0.007419255044725206, 'td_error': 1.8202546129411454, 'init_value': -21.393156051635742, 'ave_value': -12.1940603359078} step=3078
2022-04-20 15:51.18 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155051/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:51.21 [info     ] TD3PlusBC_20220420155051: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.0003522620563618621, 'time_algorithm_update': 0.006827666048418011, 'critic_loss': 8.859549472206517, 'actor_loss': 2.2934637223070826, 'time_step': 0.0072570174758197275, 'td_error': 1.8980232037789961, 'init_value': -23.454418182373047, 'ave_value': -13.205123846400236} step=3420
2022-04-20 15:51.21 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155051/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:51.24 [info     ] TD3PlusBC_20220420155051: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.0003543220765409414, 'time_algorithm_update': 0.0069484508525558385, 'critic_loss': 10.436749829883464, 'actor_loss': 2.2916734288310447, 'time_step': 0.00737452507019043, 'td_error': 2.111363200928484, 'init_value': -25.643218994140625, 'ave_value': -14.338755888656822} step=3762
2022-04-20 15:51.24 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155051/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:51.27 [info     ] TD3PlusBC_20220420155051: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.0003544524399160642, 'time_algorithm_update': 0.006861053015056409, 'critic_loss': 12.112235753856904, 'actor_loss': 2.295702230163485, 'time_step': 0.007282704637761702, 'td_error': 2.324654862394939, 'init_value': -27.249109268188477, 'ave_value': -15.407971175767415} step=4104
2022-04-20 15:51.27 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155051/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:51.30 [info     ] TD3PlusBC_20220420155051: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.00035494252255088404, 'time_algorithm_update': 0.00694103199138976, 'critic_loss': 14.130705939398872, 'actor_loss': 2.2970259579998706, 'time_step': 0.007365551608347753, 'td_error': 2.52241851378368, 'init_value': -28.510242462158203, 'ave_value': -16.346845817706622} step=4446
2022-04-20 15:51.30 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155051/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:51.33 [info     ] TD3PlusBC_20220420155051: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.0003546044143319827, 'time_algorithm_update': 0.006889348141631188, 'critic_loss': 15.951659389406617, 'actor_loss': 2.2938616666180347, 'time_step': 0.007313873335631967, 'td_error': 2.7434592682312027, 'init_value': -30.961833953857422, 'ave_value': -17.591302109338294} step=4788
2022-04-20 15:51.33 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155051/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:51.36 [info     ] TD3PlusBC_20220420155051: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.0003585836343597948, 'time_algorithm_update': 0.006919708865427831, 'critic_loss': 18.090830730415924, 'actor_loss': 2.294395077298259, 'time_step': 0.007353414568984718, 'td_error': 2.8428754904390128, 'init_value': -32.913246154785156, 'ave_value': -18.571802649278077} step=5130
2022-04-20 15:51.36 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155051/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:51.39 [info     ] TD3PlusBC_20220420155051: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.00035829014248318144, 'time_algorithm_update': 0.0068576928467778435, 'critic_loss': 20.30621121501365, 'actor_loss': 2.2921321252633255, 'time_step': 0.00729027895899544, 'td_error': 3.067745165746408, 'init_value': -34.90290832519531, 'ave_value': -19.621246007700652} step=5472
2022-04-20 15:51.39 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155051/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:51.41 [info     ] TD3PlusBC_20220420155051: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.00036511574572289896, 'time_algorithm_update': 0.006895625103286832, 'critic_loss': 22.335835903011567, 'actor_loss': 2.2969656645903114, 'time_step': 0.0073309084128218086, 'td_error': 3.3646103614799556, 'init_value': -37.53422164916992, 'ave_value': -20.804040844440543} step=5814
2022-04-20 15:51.41 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155051/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:51.44 [info     ] TD3PlusBC_20220420155051: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.0003548553812573528, 'time_algorithm_update': 0.006909677159716511, 'critic_loss': 24.778154914142096, 'actor_loss': 2.2951394187079535, 'time_step': 0.007339311622039616, 'td_error': 3.510420296017107, 'init_value': -38.80360412597656, 'ave_value': -21.686721640682972} step=6156
2022-04-20 15:51.44 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155051/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:51.47 [info     ] TD3PlusBC_20220420155051: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.0003527374992593687, 'time_algorithm_update': 0.006861771756445455, 'critic_loss': 27.170890559927066, 'actor_loss': 2.2981622497937835, 'time_step': 0.007286876962895979, 'td_error': 3.720774490965076, 'init_value': -39.97477340698242, 'ave_value': -22.506813570177265} step=6498
2022-04-20 15:51.47 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155051/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:51.50 [info     ] TD3PlusBC_20220420155051: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.00035920756602147866, 'time_algorithm_update': 0.006887168912162558, 'critic_loss': 29.51470652797766, 'actor_loss': 2.2984893294105753, 'time_step': 0.007323571813036824, 'td_error': 4.092887705928094, 'init_value': -42.11143112182617, 'ave_value': -23.4999157358607} step=6840
2022-04-20 15:51.50 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155051/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:51.53 [info     ] TD3PlusBC_20220420155051: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.0003576864276015968, 'time_algorithm_update': 0.006911501549837882, 'critic_loss': 31.853706164666782, 'actor_loss': 2.295925426204302, 'time_step': 0.0073469270739639015, 'td_error': 4.309298853332475, 'init_value': -44.148319244384766, 'ave_value': -24.371899310375756} step=7182
2022-04-20 15:51.53 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155051/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:51.56 [info     ] TD3PlusBC_20220420155051: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.0003477209492733604, 'time_algorithm_update': 0.006791022785922937, 'critic_loss': 34.365585812351156, 'actor_loss': 2.298575448711016, 'time_step': 0.0072112320459376995, 'td_error': 4.333691575387746, 'init_value': -44.294864654541016, 'ave_value': -25.048607306142532} step=7524
2022-04-20 15:51.56 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155051/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:51.59 [info     ] TD3PlusBC_20220420155051: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.00035700114846926685, 'time_algorithm_update': 0.006950201346860294, 'critic_loss': 36.42549540982609, 'actor_loss': 2.298184926049751, 'time_step': 0.007384414561310707, 'td_error': 4.525850661299127, 'init_value': -45.074302673339844, 'ave_value': -25.7431578460376} step=7866
2022-04-20 15:51.59 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155051/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:52.02 [info     ] TD3PlusBC_20220420155051: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.00035770803864239255, 'time_algorithm_update': 0.006876582987824379, 'critic_loss': 39.133441741006415, 'actor_loss': 2.300930398249487, 'time_step': 0.007310836635834989, 'td_error': 4.740732245056012, 'init_value': -46.55701446533203, 'ave_value': -26.651679609472957} step=8208
2022-04-20 15:52.02 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155051/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:52.06 [info     ] TD3PlusBC_20220420155051: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.0003600496994821649, 'time_algorithm_update': 0.008746111602113959, 'critic_loss': 41.64945672130027, 'actor_loss': 2.2965025846024005, 'time_step': 0.00917882319779424, 'td_error': 5.099634624460145, 'init_value': -48.810359954833984, 'ave_value': -27.46014803808686} step=8550
2022-04-20 15:52.06 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155051/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:52.09 [info     ] TD3PlusBC_20220420155051: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.00035682616875185605, 'time_algorithm_update': 0.008961661517271522, 'critic_loss': 44.01373532501577, 'actor_loss': 2.2981741609629136, 'time_step': 0.009393596509743852, 'td_error': 5.2844946394707355, 'init_value': -49.4973030090332, 'ave_value': -28.30051714209219} step=8892
2022-04-20 15:52.09 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155051/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:52.13 [info     ] TD3PlusBC_20220420155051: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.00035785443601552507, 'time_algorithm_update': 0.008597242901896873, 'critic_loss': 46.47369842083133, 'actor_loss': 2.3010343041336325, 'time_step': 0.009029925915232877, 'td_error': 5.619954831205774, 'init_value': -51.936012268066406, 'ave_value': -29.0828274286144} step=9234
2022-04-20 15:52.13 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155051/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:52.17 [info     ] TD3PlusBC_20220420155051: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.00037607602905808833, 'time_algorithm_update': 0.008986426375762762, 'critic_loss': 49.1015354792277, 'actor_loss': 2.301015660079599, 'time_step': 0.009438416414093553, 'td_error': 5.606348693042032, 'init_value': -52.379722595214844, 'ave_value': -29.88347702468947} step=9576
2022-04-20 15:52.17 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155051/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:52.20 [info     ] TD3PlusBC_20220420155051: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.00035642950158370167, 'time_algorithm_update': 0.008997319734584518, 'critic_loss': 51.36662657776771, 'actor_loss': 2.2984304567526657, 'time_step': 0.00942511516704894, 'td_error': 5.82981177402344, 'init_value': -53.2285041809082, 'ave_value': -30.44994043100206} step=9918
2022-04-20 15:52.20 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155051/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:52.24 [info     ] TD3PlusBC_20220420155051: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.0003603362200552957, 'time_algorithm_update': 0.008796148132859614, 'critic_loss': 53.763965707076224, 'actor_loss': 2.3001273040883023, 'time_step': 0.009228862517061289, 'td_error': 5.905218930510093, 'init_value': -54.2139778137207, 'ave_value': -31.106478086022477} step=10260
2022-04-20 15:52.24 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155051/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:52.28 [info     ] TD3PlusBC_20220420155051: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.00035703600498667934, 'time_algorithm_update': 0.008996808738039251, 'critic_loss': 56.11827660722342, 'actor_loss': 2.2978284791199086, 'time_step': 0.009431698866057815, 'td_error': 6.071742540604626, 'init_value': -55.566871643066406, 'ave_value': -32.001282276390036} step=10602
2022-04-20 15:52.28 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155051/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:52.31 [info     ] TD3PlusBC_20220420155051: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.0003627287714104903, 'time_algorithm_update': 0.008493084656564813, 'critic_loss': 58.52897003798457, 'actor_loss': 2.296897101820561, 'time_step': 0.0089340405157435, 'td_error': 6.1635148991440705, 'init_value': -55.792503356933594, 'ave_value': -32.4534175640598} step=10944
2022-04-20 15:52.31 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155051/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:52.35 [info     ] TD3PlusBC_20220420155051: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.0003581472307617901, 'time_algorithm_update': 0.008931141847755477, 'critic_loss': 60.85014996891133, 'actor_loss': 2.3005787155084443, 'time_step': 0.00936629897669742, 'td_error': 6.454223644378103, 'init_value': -56.73919677734375, 'ave_value': -32.96003914624948} step=11286
2022-04-20 15:52.35 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155051/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:52.39 [info     ] TD3PlusBC_20220420155051: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.00035606211389017386, 'time_algorithm_update': 0.008924170544272974, 'critic_loss': 63.6534055854842, 'actor_loss': 2.301758499870523, 'time_step': 0.009354923203674674, 'td_error': 6.654661379889664, 'init_value': -58.2069091796875, 'ave_value': -33.93989158429558} step=11628
2022-04-20 15:52.39 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155051/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:52.42 [info     ] TD3PlusBC_20220420155051: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.0003574856540613007, 'time_algorithm_update': 0.008666793505350748, 'critic_loss': 65.94893917284514, 'actor_loss': 2.3018366844333404, 'time_step': 0.009099272259494714, 'td_error': 6.757038288961368, 'init_value': -58.595848083496094, 'ave_value': -34.44839000222533} step=11970
2022-04-20 15:52.42 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155051/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:52.46 [info     ] TD3PlusBC_20220420155051: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.0003560809364095766, 'time_algorithm_update': 0.00896325585437797, 'critic_loss': 68.0869988000881, 'actor_loss': 2.3001766483686126, 'time_step': 0.00939394019500554, 'td_error': 7.140341532859035, 'init_value': -60.33831024169922, 'ave_value': -35.05188776174669} step=12312
2022-04-20 15:52.46 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155051/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:52.50 [info     ] TD3PlusBC_20220420155051: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.00035218049211111684, 'time_algorithm_update': 0.008626258861251741, 'critic_loss': 70.25345034906042, 'actor_loss': 2.3026567634783293, 'time_step': 0.009055449251543012, 'td_error': 7.036215414391209, 'init_value': -60.0964469909668, 'ave_value': -35.41779027390327} step=12654
2022-04-20 15:52.50 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155051/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:52.53 [info     ] TD3PlusBC_20220420155051: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.00035387730737875776, 'time_algorithm_update': 0.008917043780722814, 'critic_loss': 72.61930060804936, 'actor_loss': 2.3022952344682484, 'time_step': 0.009343369662413123, 'td_error': 7.118376037524625, 'init_value': -60.57727813720703, 'ave_value': -35.889889830458856} step=12996
2022-04-20 15:52.53 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155051/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:52.57 [info     ] TD3PlusBC_20220420155051: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.0003536681682742827, 'time_algorithm_update': 0.008945345181470726, 'critic_loss': 74.77861618577388, 'actor_loss': 2.305629622866536, 'time_step': 0.009373503121716238, 'td_error': 7.172314276049484, 'init_value': -60.70366287231445, 'ave_value': -36.47368595071406} step=13338
2022-04-20 15:52.57 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155051/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:53.01 [info     ] TD3PlusBC_20220420155051: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.00035582439244142053, 'time_algorithm_update': 0.008600300515604299, 'critic_loss': 76.93164404930427, 'actor_loss': 2.3033736853571662, 'time_step': 0.009033926746301484, 'td_error': 7.441325817526888, 'init_value': -62.5885009765625, 'ave_value': -37.20918125323976} step=13680
2022-04-20 15:53.01 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155051/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:53.05 [info     ] TD3PlusBC_20220420155051: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.0003579938620851751, 'time_algorithm_update': 0.00889938337761059, 'critic_loss': 78.90708017628096, 'actor_loss': 2.305469600777877, 'time_step': 0.009333885204025179, 'td_error': 7.291740915785067, 'init_value': -62.6223258972168, 'ave_value': -37.7754127626851} step=14022
2022-04-20 15:53.05 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155051/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:53.08 [info     ] TD3PlusBC_20220420155051: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.0003635674192194353, 'time_algorithm_update': 0.00850395431295473, 'critic_loss': 81.09587098283377, 'actor_loss': 2.3044027877829927, 'time_step': 0.008946733865124441, 'td_error': 7.698544633845985, 'init_value': -63.03202438354492, 'ave_value': -38.179814985923656} step=14364
2022-04-20 15:53.08 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155051/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:53.12 [info     ] TD3PlusBC_20220420155051: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.00036166216197766756, 'time_algorithm_update': 0.00894654703419111, 'critic_loss': 83.12333767316495, 'actor_loss': 2.304618870305736, 'time_step': 0.009383727932534022, 'td_error': 7.793797495281005, 'init_value': -63.859291076660156, 'ave_value': -38.5896177391233} step=14706
2022-04-20 15:53.12 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155051/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:53.16 [info     ] TD3PlusBC_20220420155051: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.0003593065585309302, 'time_algorithm_update': 0.008976870112948947, 'critic_loss': 85.4429517266346, 'actor_loss': 2.305311311755264, 'time_step': 0.009411257609986422, 'td_error': 7.881725859597798, 'init_value': -64.69022369384766, 'ave_value': -39.13790889833634} step=15048
2022-04-20 15:53.16 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155051/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:53.19 [info     ] TD3PlusBC_20220420155051: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.00035472432075188176, 'time_algorithm_update': 0.008532446727418062, 'critic_loss': 87.58832631473653, 'actor_loss': 2.304339756045425, 'time_step': 0.008961021551611827, 'td_error': 8.333402314873311, 'init_value': -66.36138916015625, 'ave_value': -39.793613575295886} step=15390
2022-04-20 15:53.19 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155051/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:53.23 [info     ] TD3PlusBC_20220420155051: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.0003582232179697494, 'time_algorithm_update': 0.008911541330884074, 'critic_loss': 89.44326559144851, 'actor_loss': 2.3080084086858736, 'time_step': 0.009345962987308614, 'td_error': 8.012638418096557, 'init_value': -65.2818374633789, 'ave_value': -40.00501988491824} step=15732
2022-04-20 15:53.23 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155051/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:53.26 [info     ] TD3PlusBC_20220420155051: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.0003546713388454147, 'time_algorithm_update': 0.00850054046563935, 'critic_loss': 91.32451161167077, 'actor_loss': 2.3037835143462955, 'time_step': 0.008931113962541547, 'td_error': 8.335846690664173, 'init_value': -65.13739013671875, 'ave_value': -40.310733745660514} step=16074
2022-04-20 15:53.27 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155051/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:53.30 [info     ] TD3PlusBC_20220420155051: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.00035392192372104577, 'time_algorithm_update': 0.00889226079684252, 'critic_loss': 93.56612008078056, 'actor_loss': 2.30736560012862, 'time_step': 0.009322144831830298, 'td_error': 8.50498658110547, 'init_value': -66.73454284667969, 'ave_value': -40.99754704770229} step=16416
2022-04-20 15:53.30 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155051/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:53.34 [info     ] TD3PlusBC_20220420155051: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.0003603543454443502, 'time_algorithm_update': 0.009027460862321464, 'critic_loss': 95.03350873579059, 'actor_loss': 2.3042389119577686, 'time_step': 0.009467518120481256, 'td_error': 8.723345545135187, 'init_value': -66.65787506103516, 'ave_value': -41.25179178024919} step=16758
2022-04-20 15:53.34 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155051/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:53.38 [info     ] TD3PlusBC_20220420155051: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.00035730858295284515, 'time_algorithm_update': 0.008535453450610067, 'critic_loss': 96.92439058091905, 'actor_loss': 2.304671599851017, 'time_step': 0.008970632887723153, 'td_error': 8.607314988212794, 'init_value': -67.32221221923828, 'ave_value': -41.727914398118166} step=17100
2022-04-20 15:53.38 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155051/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 102 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.28189108e-01  

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 15:53.39 [info     ] FQE_20220420155338: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00016607433916574502, 'time_algorithm_update': 0.005087957324751888, 'loss': 0.006125667498514326, 'time_step': 0.005328168351966214, 'init_value': -0.5861301422119141, 'ave_value': -0.5517735108061954, 'soft_opc': nan} step=166




2022-04-20 15:53.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155338/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:53.40 [info     ] FQE_20220420155338: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00015858713402805557, 'time_algorithm_update': 0.005130293857620423, 'loss': 0.0046069800688223125, 'time_step': 0.005369101662233651, 'init_value': -0.7697806358337402, 'ave_value': -0.6598232274388408, 'soft_opc': nan} step=332




2022-04-20 15:53.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155338/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:53.41 [info     ] FQE_20220420155338: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00015944314290242023, 'time_algorithm_update': 0.005004450499293316, 'loss': 0.004223897664650646, 'time_step': 0.005237501787852092, 'init_value': -0.8466185331344604, 'ave_value': -0.6770487063639873, 'soft_opc': nan} step=498




2022-04-20 15:53.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155338/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:53.42 [info     ] FQE_20220420155338: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00014960191336022802, 'time_algorithm_update': 0.004982731428491064, 'loss': 0.004238514552251941, 'time_step': 0.005197288042091462, 'init_value': -0.9512709379196167, 'ave_value': -0.7259726031876362, 'soft_opc': nan} step=664




2022-04-20 15:53.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155338/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:53.43 [info     ] FQE_20220420155338: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00014507770538330078, 'time_algorithm_update': 0.004953698939587696, 'loss': 0.004158808129519808, 'time_step': 0.005163870662091726, 'init_value': -1.037652850151062, 'ave_value': -0.7736926065707529, 'soft_opc': nan} step=830




2022-04-20 15:53.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155338/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:53.43 [info     ] FQE_20220420155338: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00014556028756750636, 'time_algorithm_update': 0.00454587534249547, 'loss': 0.004081729008445898, 'time_step': 0.0047504198120301025, 'init_value': -1.076603889465332, 'ave_value': -0.7846015849427597, 'soft_opc': nan} step=996




2022-04-20 15:53.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155338/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:53.44 [info     ] FQE_20220420155338: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00013566735279129212, 'time_algorithm_update': 0.004344022417642984, 'loss': 0.004171618944909199, 'time_step': 0.004549944257161704, 'init_value': -1.15561044216156, 'ave_value': -0.8282548240914538, 'soft_opc': nan} step=1162




2022-04-20 15:53.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155338/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:53.45 [info     ] FQE_20220420155338: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00014462384832910746, 'time_algorithm_update': 0.005088755883366229, 'loss': 0.004179171278809074, 'time_step': 0.005294740918170975, 'init_value': -1.219323992729187, 'ave_value': -0.8566404486763047, 'soft_opc': nan} step=1328




2022-04-20 15:53.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155338/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:53.46 [info     ] FQE_20220420155338: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00014791862074151096, 'time_algorithm_update': 0.004911776048591338, 'loss': 0.004119612782890628, 'time_step': 0.00512958291065262, 'init_value': -1.2679152488708496, 'ave_value': -0.8823181358677847, 'soft_opc': nan} step=1494




2022-04-20 15:53.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155338/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:53.47 [info     ] FQE_20220420155338: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.0001608248216560088, 'time_algorithm_update': 0.005031219447951719, 'loss': 0.004114899430171508, 'time_step': 0.0052643296230270205, 'init_value': -1.343748688697815, 'ave_value': -0.9350617890690898, 'soft_opc': nan} step=1660




2022-04-20 15:53.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155338/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:53.48 [info     ] FQE_20220420155338: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00016041692480983505, 'time_algorithm_update': 0.004981335387172469, 'loss': 0.004098456686766571, 'time_step': 0.005215751119406827, 'init_value': -1.3766992092132568, 'ave_value': -0.9543896928765215, 'soft_opc': nan} step=1826




2022-04-20 15:53.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155338/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:53.49 [info     ] FQE_20220420155338: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00016356232654617494, 'time_algorithm_update': 0.00509200038680111, 'loss': 0.004203648934090308, 'time_step': 0.005330374441951154, 'init_value': -1.4448810815811157, 'ave_value': -0.9980716926289034, 'soft_opc': nan} step=1992




2022-04-20 15:53.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155338/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:53.50 [info     ] FQE_20220420155338: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.0001682718116116811, 'time_algorithm_update': 0.005147510264293257, 'loss': 0.004136108485448836, 'time_step': 0.00538926383098924, 'init_value': -1.5125784873962402, 'ave_value': -1.045704979919367, 'soft_opc': nan} step=2158




2022-04-20 15:53.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155338/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:53.51 [info     ] FQE_20220420155338: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00016944092440317912, 'time_algorithm_update': 0.0051023873938135355, 'loss': 0.004289243556839605, 'time_step': 0.005346895700477692, 'init_value': -1.5712997913360596, 'ave_value': -1.0727286939618286, 'soft_opc': nan} step=2324




2022-04-20 15:53.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155338/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:53.52 [info     ] FQE_20220420155338: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00016072571995746657, 'time_algorithm_update': 0.004697061446775873, 'loss': 0.0043828634850972565, 'time_step': 0.004930204655750689, 'init_value': -1.6537115573883057, 'ave_value': -1.13526935206206, 'soft_opc': nan} step=2490




2022-04-20 15:53.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155338/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:53.53 [info     ] FQE_20220420155338: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00015782017305672886, 'time_algorithm_update': 0.004710896905646266, 'loss': 0.0043979494878741724, 'time_step': 0.004944674939994353, 'init_value': -1.7195154428482056, 'ave_value': -1.1680158250235222, 'soft_opc': nan} step=2656




2022-04-20 15:53.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155338/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:53.54 [info     ] FQE_20220420155338: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00016489804509174392, 'time_algorithm_update': 0.00506906337048634, 'loss': 0.0046426156333771096, 'time_step': 0.0053135357707379816, 'init_value': -1.806386947631836, 'ave_value': -1.2367299849050002, 'soft_opc': nan} step=2822




2022-04-20 15:53.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155338/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:53.55 [info     ] FQE_20220420155338: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.0001623917774981763, 'time_algorithm_update': 0.005032766296202878, 'loss': 0.004645126479038273, 'time_step': 0.005266508424138448, 'init_value': -1.8904621601104736, 'ave_value': -1.3037866140069725, 'soft_opc': nan} step=2988




2022-04-20 15:53.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155338/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:53.56 [info     ] FQE_20220420155338: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.0001630237303584455, 'time_algorithm_update': 0.005002533096864999, 'loss': 0.004741328764224358, 'time_step': 0.005240321159362793, 'init_value': -1.9338126182556152, 'ave_value': -1.3158568378638578, 'soft_opc': nan} step=3154




2022-04-20 15:53.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155338/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:53.57 [info     ] FQE_20220420155338: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00016079178775649472, 'time_algorithm_update': 0.0050131355423525155, 'loss': 0.0049367746470092115, 'time_step': 0.005246571747653456, 'init_value': -2.0564281940460205, 'ave_value': -1.4136359944402634, 'soft_opc': nan} step=3320




2022-04-20 15:53.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155338/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:53.58 [info     ] FQE_20220420155338: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00016082194914300758, 'time_algorithm_update': 0.005031496645456337, 'loss': 0.00494207747151559, 'time_step': 0.005268523492008807, 'init_value': -2.120030641555786, 'ave_value': -1.4380442771288726, 'soft_opc': nan} step=3486




2022-04-20 15:53.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155338/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:53.59 [info     ] FQE_20220420155338: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.0001642861998224833, 'time_algorithm_update': 0.005029284810445395, 'loss': 0.005427485823852897, 'time_step': 0.005267164793359228, 'init_value': -2.2507357597351074, 'ave_value': -1.5298337603004666, 'soft_opc': nan} step=3652




2022-04-20 15:53.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155338/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:54.00 [info     ] FQE_20220420155338: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00016540073486695807, 'time_algorithm_update': 0.0050585471003888605, 'loss': 0.0056557831668620365, 'time_step': 0.005299652915403068, 'init_value': -2.3710973262786865, 'ave_value': -1.592268199688411, 'soft_opc': nan} step=3818




2022-04-20 15:54.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155338/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:54.00 [info     ] FQE_20220420155338: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00016801041292856974, 'time_algorithm_update': 0.004995641938175063, 'loss': 0.006032372628133856, 'time_step': 0.005240071250731687, 'init_value': -2.432107448577881, 'ave_value': -1.6287225025194185, 'soft_opc': nan} step=3984




2022-04-20 15:54.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155338/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:54.01 [info     ] FQE_20220420155338: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.000161116381725633, 'time_algorithm_update': 0.004122850406600769, 'loss': 0.006493098772248724, 'time_step': 0.004357211561088103, 'init_value': -2.51425838470459, 'ave_value': -1.6617167517744207, 'soft_opc': nan} step=4150




2022-04-20 15:54.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155338/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:54.02 [info     ] FQE_20220420155338: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00017163408807961336, 'time_algorithm_update': 0.005043431936976421, 'loss': 0.0071214415330765205, 'time_step': 0.005289714020418833, 'init_value': -2.678398370742798, 'ave_value': -1.771092202427151, 'soft_opc': nan} step=4316




2022-04-20 15:54.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155338/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:54.03 [info     ] FQE_20220420155338: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00016792280128203243, 'time_algorithm_update': 0.005185150238404791, 'loss': 0.007728008244113705, 'time_step': 0.005425756236156785, 'init_value': -2.7882156372070312, 'ave_value': -1.833780508320611, 'soft_opc': nan} step=4482




2022-04-20 15:54.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155338/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:54.04 [info     ] FQE_20220420155338: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00016509768474532897, 'time_algorithm_update': 0.005104500127125935, 'loss': 0.008181395648041717, 'time_step': 0.005343312240508665, 'init_value': -2.8999452590942383, 'ave_value': -1.8768260312509966, 'soft_opc': nan} step=4648




2022-04-20 15:54.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155338/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:54.05 [info     ] FQE_20220420155338: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00016482623226671335, 'time_algorithm_update': 0.0050579912691231235, 'loss': 0.008631948350103998, 'time_step': 0.005298804087811206, 'init_value': -2.9892587661743164, 'ave_value': -1.905912383162492, 'soft_opc': nan} step=4814




2022-04-20 15:54.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155338/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:54.06 [info     ] FQE_20220420155338: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00016760969736489905, 'time_algorithm_update': 0.005059222140944147, 'loss': 0.009419582630591804, 'time_step': 0.005302011248577072, 'init_value': -3.0656778812408447, 'ave_value': -1.9422727085717089, 'soft_opc': nan} step=4980




2022-04-20 15:54.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155338/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:54.07 [info     ] FQE_20220420155338: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00016243486519319466, 'time_algorithm_update': 0.005011812750115452, 'loss': 0.010220549466279167, 'time_step': 0.00524895306093147, 'init_value': -3.1740527153015137, 'ave_value': -2.006595504055689, 'soft_opc': nan} step=5146




2022-04-20 15:54.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155338/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:54.08 [info     ] FQE_20220420155338: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00016258998089526073, 'time_algorithm_update': 0.0050432883113263605, 'loss': 0.010658645237997982, 'time_step': 0.005275418959468244, 'init_value': -3.3129563331604004, 'ave_value': -2.0704668131967385, 'soft_opc': nan} step=5312




2022-04-20 15:54.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155338/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:54.09 [info     ] FQE_20220420155338: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00016600970762321748, 'time_algorithm_update': 0.005061699683407703, 'loss': 0.011093787366457015, 'time_step': 0.005301636385630412, 'init_value': -3.409100294113159, 'ave_value': -2.121541330755294, 'soft_opc': nan} step=5478




2022-04-20 15:54.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155338/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:54.10 [info     ] FQE_20220420155338: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00016135049153523273, 'time_algorithm_update': 0.004084352987358369, 'loss': 0.011551709911584046, 'time_step': 0.004317708762295275, 'init_value': -3.5190608501434326, 'ave_value': -2.1650524088458436, 'soft_opc': nan} step=5644




2022-04-20 15:54.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155338/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:54.11 [info     ] FQE_20220420155338: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.0001657066575015884, 'time_algorithm_update': 0.005081601889736681, 'loss': 0.012517602176613074, 'time_step': 0.005322822605270937, 'init_value': -3.6242542266845703, 'ave_value': -2.237501430732978, 'soft_opc': nan} step=5810




2022-04-20 15:54.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155338/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:54.12 [info     ] FQE_20220420155338: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00016616625958178416, 'time_algorithm_update': 0.005128178251795022, 'loss': 0.0129339855082939, 'time_step': 0.005367131118314812, 'init_value': -3.727792263031006, 'ave_value': -2.295717220941374, 'soft_opc': nan} step=5976




2022-04-20 15:54.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155338/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:54.13 [info     ] FQE_20220420155338: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00016653681375894202, 'time_algorithm_update': 0.005135908184281315, 'loss': 0.01351424295760983, 'time_step': 0.005376038781131606, 'init_value': -3.8501157760620117, 'ave_value': -2.372373712814606, 'soft_opc': nan} step=6142




2022-04-20 15:54.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155338/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:54.14 [info     ] FQE_20220420155338: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00016630126769284168, 'time_algorithm_update': 0.0051737736506634446, 'loss': 0.01413483483960897, 'time_step': 0.0054164392402373165, 'init_value': -3.916940689086914, 'ave_value': -2.3583808066810037, 'soft_opc': nan} step=6308




2022-04-20 15:54.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155338/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:54.15 [info     ] FQE_20220420155338: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00016587182699915874, 'time_algorithm_update': 0.005194230252001659, 'loss': 0.014665617804598421, 'time_step': 0.0054335766528026165, 'init_value': -3.9580609798431396, 'ave_value': -2.3602010379335634, 'soft_opc': nan} step=6474




2022-04-20 15:54.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155338/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:54.16 [info     ] FQE_20220420155338: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00016401187483086643, 'time_algorithm_update': 0.005049360803810947, 'loss': 0.014938503618812165, 'time_step': 0.0052893851176801935, 'init_value': -4.104551315307617, 'ave_value': -2.4861516449801826, 'soft_opc': nan} step=6640




2022-04-20 15:54.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155338/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:54.17 [info     ] FQE_20220420155338: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00016042410609233812, 'time_algorithm_update': 0.005071190466363746, 'loss': 0.01591470330415002, 'time_step': 0.0053062726216143875, 'init_value': -4.180325031280518, 'ave_value': -2.5122177584583425, 'soft_opc': nan} step=6806




2022-04-20 15:54.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155338/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:54.18 [info     ] FQE_20220420155338: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00016416268176343068, 'time_algorithm_update': 0.0050103937286928475, 'loss': 0.015326255043205547, 'time_step': 0.0052496841154902815, 'init_value': -4.2259955406188965, 'ave_value': -2.515217471679857, 'soft_opc': nan} step=6972




2022-04-20 15:54.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155338/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:54.18 [info     ] FQE_20220420155338: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00015737636979803982, 'time_algorithm_update': 0.004090613629444536, 'loss': 0.01693330189081313, 'time_step': 0.004319143582539386, 'init_value': -4.361726760864258, 'ave_value': -2.6339608797014833, 'soft_opc': nan} step=7138




2022-04-20 15:54.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155338/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:54.19 [info     ] FQE_20220420155338: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00016538924481495317, 'time_algorithm_update': 0.005009595170078507, 'loss': 0.01720134083585567, 'time_step': 0.005245169961308858, 'init_value': -4.420737266540527, 'ave_value': -2.651446955020095, 'soft_opc': nan} step=7304




2022-04-20 15:54.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155338/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:54.20 [info     ] FQE_20220420155338: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00016847145126526616, 'time_algorithm_update': 0.005041942538985287, 'loss': 0.01706202681378338, 'time_step': 0.005285172577363899, 'init_value': -4.498048305511475, 'ave_value': -2.663519069421533, 'soft_opc': nan} step=7470




2022-04-20 15:54.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155338/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:54.21 [info     ] FQE_20220420155338: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00016940501799066383, 'time_algorithm_update': 0.00506619947502412, 'loss': 0.017345591718246824, 'time_step': 0.0053098776254309225, 'init_value': -4.59438419342041, 'ave_value': -2.705007313000592, 'soft_opc': nan} step=7636




2022-04-20 15:54.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155338/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:54.22 [info     ] FQE_20220420155338: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00016622227358530802, 'time_algorithm_update': 0.005057355007493353, 'loss': 0.018290881918889123, 'time_step': 0.005298580031797111, 'init_value': -4.648017883300781, 'ave_value': -2.724661802107821, 'soft_opc': nan} step=7802




2022-04-20 15:54.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155338/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:54.23 [info     ] FQE_20220420155338: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00016460792127862033, 'time_algorithm_update': 0.005047668893653226, 'loss': 0.019017805825744992, 'time_step': 0.005288968603295016, 'init_value': -4.690141677856445, 'ave_value': -2.7225914325078464, 'soft_opc': nan} step=7968




2022-04-20 15:54.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155338/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:54.24 [info     ] FQE_20220420155338: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.000173093324684235, 'time_algorithm_update': 0.005079349839543721, 'loss': 0.01933369195826509, 'time_step': 0.005325926355568759, 'init_value': -4.853743553161621, 'ave_value': -2.8380110758449044, 'soft_opc': nan} step=8134




2022-04-20 15:54.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155338/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:54.25 [info     ] FQE_20220420155338: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.0001659623111586973, 'time_algorithm_update': 0.005043374486716397, 'loss': 0.020052242512865193, 'time_step': 0.005286736660693066, 'init_value': -4.889900207519531, 'ave_value': -2.835196281870542, 'soft_opc': nan} step=8300




2022-04-20 15:54.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155338/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.43210892e-01 -3.25999953e-02
 -1.38000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.28049971e-01  6.00000000e-01  4.67532035e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.87189108e-01  2.46000047e-02
 -8.00013420e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.61927906e-02  2.08952959e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.53789108e-01  1.32000047e-02
  8.79998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -9.71586383e-02 -6.00000000e-01 -5.33093061e-02]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.39489108e-01 -4.75999953e-02
 -9.30001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  1.41298638e-01  5.43892365e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.9573146

2022-04-20 15:54.25 [info     ] Directory is created at d3rlpy_logs/FQE_20220420155425
2022-04-20 15:54.25 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 15:54.25 [debug    ] Building models...
2022-04-20 15:54.25 [debug    ] Models have been built.
2022-04-20 15:54.25 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420155425/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 15:54.27 [info     ] FQE_20220420155425: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00016588942949161974, 'time_algorithm_update': 0.004609681839166686, 'loss': 0.030638986272564114, 'time_step': 0.004848997260248939, 'init_value': -1.2945137023925781, 'ave_value': -1.259153100049442, 'soft_opc': nan} step=344




2022-04-20 15:54.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155425/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:54.30 [info     ] FQE_20220420155425: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00017151790995930516, 'time_algorithm_update': 0.005168862814127013, 'loss': 0.026598599939715376, 'time_step': 0.005413976519606834, 'init_value': -2.007669448852539, 'ave_value': -1.9726414940378687, 'soft_opc': nan} step=688




2022-04-20 15:54.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155425/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:54.31 [info     ] FQE_20220420155425: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00016718132551326307, 'time_algorithm_update': 0.005041940267695937, 'loss': 0.03153293849467192, 'time_step': 0.005283374425976775, 'init_value': -2.945615530014038, 'ave_value': -2.926005391494648, 'soft_opc': nan} step=1032




2022-04-20 15:54.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155425/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:54.33 [info     ] FQE_20220420155425: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00017011650772981866, 'time_algorithm_update': 0.0050745751968649935, 'loss': 0.03708003998225078, 'time_step': 0.0053166407485340916, 'init_value': -3.5720880031585693, 'ave_value': -3.566011755195287, 'soft_opc': nan} step=1376




2022-04-20 15:54.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155425/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:54.35 [info     ] FQE_20220420155425: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00016938184582909873, 'time_algorithm_update': 0.005071177039035531, 'loss': 0.04661815179793467, 'time_step': 0.005315975394359854, 'init_value': -4.27822732925415, 'ave_value': -4.2770114441578455, 'soft_opc': nan} step=1720




2022-04-20 15:54.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155425/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:54.37 [info     ] FQE_20220420155425: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00016897362332011378, 'time_algorithm_update': 0.004663615725761236, 'loss': 0.055033737314366844, 'time_step': 0.004909800928692485, 'init_value': -4.84291410446167, 'ave_value': -4.857809174899851, 'soft_opc': nan} step=2064




2022-04-20 15:54.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155425/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:54.39 [info     ] FQE_20220420155425: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.0001687047093413597, 'time_algorithm_update': 0.005066109951152358, 'loss': 0.07315227732625465, 'time_step': 0.005308908085490382, 'init_value': -5.555355072021484, 'ave_value': -5.582764357543207, 'soft_opc': nan} step=2408




2022-04-20 15:54.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155425/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:54.41 [info     ] FQE_20220420155425: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00016834153685458872, 'time_algorithm_update': 0.00507320013157157, 'loss': 0.09017517317497972, 'time_step': 0.005317889673765315, 'init_value': -6.115337371826172, 'ave_value': -6.173297948041209, 'soft_opc': nan} step=2752




2022-04-20 15:54.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155425/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:54.43 [info     ] FQE_20220420155425: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00017145553300547045, 'time_algorithm_update': 0.005119828290717546, 'loss': 0.10666983058611147, 'time_step': 0.005363154549931371, 'init_value': -6.528670310974121, 'ave_value': -6.581053925367693, 'soft_opc': nan} step=3096




2022-04-20 15:54.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155425/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:54.45 [info     ] FQE_20220420155425: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.0001683325268501459, 'time_algorithm_update': 0.0046161641908246415, 'loss': 0.12826109459883597, 'time_step': 0.004857330128203991, 'init_value': -7.302175998687744, 'ave_value': -7.285798400237754, 'soft_opc': nan} step=3440




2022-04-20 15:54.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155425/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:54.47 [info     ] FQE_20220420155425: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00016826113989186841, 'time_algorithm_update': 0.004987214193787686, 'loss': 0.1581005598689148, 'time_step': 0.005231468483459118, 'init_value': -7.981639385223389, 'ave_value': -7.917772227153182, 'soft_opc': nan} step=3784




2022-04-20 15:54.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155425/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:54.49 [info     ] FQE_20220420155425: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00016860975775607798, 'time_algorithm_update': 0.00506228901619135, 'loss': 0.1803254947326211, 'time_step': 0.005304825860400533, 'init_value': -8.552826881408691, 'ave_value': -8.378210808439096, 'soft_opc': nan} step=4128




2022-04-20 15:54.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155425/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:54.51 [info     ] FQE_20220420155425: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00016945392586464105, 'time_algorithm_update': 0.005135769067808639, 'loss': 0.2099329602558079, 'time_step': 0.005381548127462697, 'init_value': -8.884269714355469, 'ave_value': -8.743339718671658, 'soft_opc': nan} step=4472




2022-04-20 15:54.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155425/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:54.53 [info     ] FQE_20220420155425: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.0001680331174717393, 'time_algorithm_update': 0.00504123332888581, 'loss': 0.23885995397062668, 'time_step': 0.005286913278490998, 'init_value': -9.688291549682617, 'ave_value': -9.345431361701927, 'soft_opc': nan} step=4816




2022-04-20 15:54.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155425/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:54.55 [info     ] FQE_20220420155425: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.000166014876476554, 'time_algorithm_update': 0.004585494828778644, 'loss': 0.27044030624376825, 'time_step': 0.004828081574550894, 'init_value': -10.15416145324707, 'ave_value': -9.77878808290404, 'soft_opc': nan} step=5160




2022-04-20 15:54.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155425/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:54.57 [info     ] FQE_20220420155425: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.0001705891864244328, 'time_algorithm_update': 0.005098478738651719, 'loss': 0.29926521249310395, 'time_step': 0.005345985639926999, 'init_value': -10.948198318481445, 'ave_value': -10.45322885993629, 'soft_opc': nan} step=5504




2022-04-20 15:54.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155425/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:54.59 [info     ] FQE_20220420155425: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00017011650772981866, 'time_algorithm_update': 0.005024979973948279, 'loss': 0.33255733081767724, 'time_step': 0.005268658316412637, 'init_value': -11.543338775634766, 'ave_value': -10.992024152205737, 'soft_opc': nan} step=5848




2022-04-20 15:54.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155425/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:55.01 [info     ] FQE_20220420155425: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00016595804414083792, 'time_algorithm_update': 0.004983274049537126, 'loss': 0.3660739704771617, 'time_step': 0.005225679902143256, 'init_value': -11.673181533813477, 'ave_value': -10.998643141628142, 'soft_opc': nan} step=6192




2022-04-20 15:55.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155425/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:55.02 [info     ] FQE_20220420155425: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00016417267710663552, 'time_algorithm_update': 0.004568576119666876, 'loss': 0.40439578131664283, 'time_step': 0.00480731351431026, 'init_value': -12.333077430725098, 'ave_value': -11.581956818511895, 'soft_opc': nan} step=6536




2022-04-20 15:55.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155425/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:55.04 [info     ] FQE_20220420155425: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00016987531684165778, 'time_algorithm_update': 0.00506622361582379, 'loss': 0.4267891717771458, 'time_step': 0.005311504352924435, 'init_value': -12.790658950805664, 'ave_value': -12.00694827189069, 'soft_opc': nan} step=6880




2022-04-20 15:55.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155425/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:55.06 [info     ] FQE_20220420155425: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.0001650882321734761, 'time_algorithm_update': 0.0050608903862709225, 'loss': 0.4590345369760207, 'time_step': 0.005302395931510038, 'init_value': -13.387434005737305, 'ave_value': -12.582292639977206, 'soft_opc': nan} step=7224




2022-04-20 15:55.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155425/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:55.08 [info     ] FQE_20220420155425: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.0001682770806689595, 'time_algorithm_update': 0.005104245141495106, 'loss': 0.4926474611924658, 'time_step': 0.005351650160412455, 'init_value': -14.17281723022461, 'ave_value': -13.432702646384367, 'soft_opc': nan} step=7568




2022-04-20 15:55.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155425/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:55.10 [info     ] FQE_20220420155425: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.0001706654249235641, 'time_algorithm_update': 0.005083223414975543, 'loss': 0.5189762577674416, 'time_step': 0.005330902892489766, 'init_value': -14.614105224609375, 'ave_value': -13.978681443337988, 'soft_opc': nan} step=7912




2022-04-20 15:55.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155425/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:55.12 [info     ] FQE_20220420155425: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00016533981922060945, 'time_algorithm_update': 0.004589147345964299, 'loss': 0.5387339517365881, 'time_step': 0.004830308431802794, 'init_value': -14.700286865234375, 'ave_value': -14.148048820472658, 'soft_opc': nan} step=8256




2022-04-20 15:55.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155425/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:55.14 [info     ] FQE_20220420155425: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00016919818035391875, 'time_algorithm_update': 0.0050565842972245326, 'loss': 0.5493246570695192, 'time_step': 0.005304037831550421, 'init_value': -15.058749198913574, 'ave_value': -14.657722888207525, 'soft_opc': nan} step=8600




2022-04-20 15:55.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155425/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:55.16 [info     ] FQE_20220420155425: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.0001717833585517351, 'time_algorithm_update': 0.005074396382930667, 'loss': 0.5654705645310757, 'time_step': 0.005322082098140273, 'init_value': -15.53085708618164, 'ave_value': -15.350208589014061, 'soft_opc': nan} step=8944




2022-04-20 15:55.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155425/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:55.18 [info     ] FQE_20220420155425: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00016940541045610294, 'time_algorithm_update': 0.00508164597112079, 'loss': 0.5749549649406744, 'time_step': 0.0053278533525245135, 'init_value': -15.782881736755371, 'ave_value': -15.884959485277173, 'soft_opc': nan} step=9288




2022-04-20 15:55.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155425/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:55.20 [info     ] FQE_20220420155425: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00016615418500678483, 'time_algorithm_update': 0.004772806583448898, 'loss': 0.5973957961446844, 'time_step': 0.005014904016672179, 'init_value': -16.101316452026367, 'ave_value': -16.33358719288623, 'soft_opc': nan} step=9632




2022-04-20 15:55.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155425/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:55.22 [info     ] FQE_20220420155425: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00016928966655287633, 'time_algorithm_update': 0.005082221918327864, 'loss': 0.6128820018940281, 'time_step': 0.005327420179233994, 'init_value': -16.195947647094727, 'ave_value': -16.565513477239524, 'soft_opc': nan} step=9976




2022-04-20 15:55.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155425/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:55.24 [info     ] FQE_20220420155425: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00016860144082890002, 'time_algorithm_update': 0.0050250839355380035, 'loss': 0.6279397394324024, 'time_step': 0.005270554575809213, 'init_value': -16.207918167114258, 'ave_value': -16.72391378111856, 'soft_opc': nan} step=10320




2022-04-20 15:55.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155425/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:55.26 [info     ] FQE_20220420155425: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00016519635222678961, 'time_algorithm_update': 0.005048197369242824, 'loss': 0.6318963267190685, 'time_step': 0.005289459644361984, 'init_value': -16.201488494873047, 'ave_value': -17.080914959674367, 'soft_opc': nan} step=10664




2022-04-20 15:55.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155425/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:55.28 [info     ] FQE_20220420155425: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00016736637714297273, 'time_algorithm_update': 0.005062561395556428, 'loss': 0.6342408271062426, 'time_step': 0.0053069508352945015, 'init_value': -16.13018798828125, 'ave_value': -17.164477175271333, 'soft_opc': nan} step=11008




2022-04-20 15:55.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155425/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:55.30 [info     ] FQE_20220420155425: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.0001663212166276089, 'time_algorithm_update': 0.004578092070512994, 'loss': 0.6223343609843056, 'time_step': 0.0048212360504061675, 'init_value': -16.1060733795166, 'ave_value': -17.365563781145408, 'soft_opc': nan} step=11352




2022-04-20 15:55.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155425/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:55.32 [info     ] FQE_20220420155425: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00017150127610494924, 'time_algorithm_update': 0.005023909169574117, 'loss': 0.6286495417020782, 'time_step': 0.005272201327390449, 'init_value': -16.174684524536133, 'ave_value': -17.624817404945524, 'soft_opc': nan} step=11696




2022-04-20 15:55.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155425/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:55.34 [info     ] FQE_20220420155425: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00016664626986481423, 'time_algorithm_update': 0.0050494705521783164, 'loss': 0.6256021710740792, 'time_step': 0.005293026913044064, 'init_value': -16.321199417114258, 'ave_value': -17.9938847800658, 'soft_opc': nan} step=12040




2022-04-20 15:55.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155425/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:55.36 [info     ] FQE_20220420155425: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.0001657022986301156, 'time_algorithm_update': 0.005038264878960543, 'loss': 0.6161255769020076, 'time_step': 0.005278477142023486, 'init_value': -16.67815399169922, 'ave_value': -18.52769601977858, 'soft_opc': nan} step=12384




2022-04-20 15:55.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155425/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:55.37 [info     ] FQE_20220420155425: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00016761311264925225, 'time_algorithm_update': 0.0049584189126657885, 'loss': 0.6146498967319467, 'time_step': 0.005204116189202597, 'init_value': -16.80193328857422, 'ave_value': -18.784054710986595, 'soft_opc': nan} step=12728




2022-04-20 15:55.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155425/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:55.39 [info     ] FQE_20220420155425: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00016813292059787485, 'time_algorithm_update': 0.004796347645826118, 'loss': 0.567081417482303, 'time_step': 0.005039228256358657, 'init_value': -16.64813804626465, 'ave_value': -18.7484291190894, 'soft_opc': nan} step=13072




2022-04-20 15:55.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155425/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:55.41 [info     ] FQE_20220420155425: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.0001696438290352045, 'time_algorithm_update': 0.004993016636648843, 'loss': 0.5916071071148699, 'time_step': 0.005239645409029584, 'init_value': -16.95767593383789, 'ave_value': -19.067294190407452, 'soft_opc': nan} step=13416




2022-04-20 15:55.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155425/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:55.43 [info     ] FQE_20220420155425: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00016909907030504803, 'time_algorithm_update': 0.005058179761088172, 'loss': 0.5778219296954312, 'time_step': 0.0053052867567816445, 'init_value': -16.82525634765625, 'ave_value': -18.891002211475595, 'soft_opc': nan} step=13760




2022-04-20 15:55.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155425/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:55.45 [info     ] FQE_20220420155425: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.0001676096472629281, 'time_algorithm_update': 0.005058534616647765, 'loss': 0.5558974797360946, 'time_step': 0.005300869775372882, 'init_value': -16.913110733032227, 'ave_value': -19.010239059529226, 'soft_opc': nan} step=14104




2022-04-20 15:55.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155425/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:55.47 [info     ] FQE_20220420155425: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00016683825226717218, 'time_algorithm_update': 0.0046437916367553, 'loss': 0.5480456949688147, 'time_step': 0.004888286424237628, 'init_value': -16.814197540283203, 'ave_value': -18.974216572839456, 'soft_opc': nan} step=14448




2022-04-20 15:55.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155425/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:55.49 [info     ] FQE_20220420155425: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00016810173212095748, 'time_algorithm_update': 0.0050677684850470965, 'loss': 0.5380634599701003, 'time_step': 0.005314154680385146, 'init_value': -16.754409790039062, 'ave_value': -18.916270045146163, 'soft_opc': nan} step=14792




2022-04-20 15:55.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155425/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:55.51 [info     ] FQE_20220420155425: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.000172595645106116, 'time_algorithm_update': 0.005012408245441525, 'loss': 0.524412734184934, 'time_step': 0.0052640576695286954, 'init_value': -17.136444091796875, 'ave_value': -19.183010675009708, 'soft_opc': nan} step=15136




2022-04-20 15:55.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155425/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:55.53 [info     ] FQE_20220420155425: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00016618468040643738, 'time_algorithm_update': 0.005064475674961888, 'loss': 0.5274118316511429, 'time_step': 0.005308504021444986, 'init_value': -17.571216583251953, 'ave_value': -19.492374881865405, 'soft_opc': nan} step=15480




2022-04-20 15:55.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155425/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:55.55 [info     ] FQE_20220420155425: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00017217633336089378, 'time_algorithm_update': 0.0051701983740163404, 'loss': 0.5168617812315609, 'time_step': 0.0054190047951631765, 'init_value': -17.20951271057129, 'ave_value': -19.136580611887673, 'soft_opc': nan} step=15824




2022-04-20 15:55.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155425/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:55.57 [info     ] FQE_20220420155425: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00016695746155672296, 'time_algorithm_update': 0.004736571117889049, 'loss': 0.5150778346727494, 'time_step': 0.004979152319043181, 'init_value': -17.228946685791016, 'ave_value': -19.055867052354916, 'soft_opc': nan} step=16168




2022-04-20 15:55.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155425/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:55.59 [info     ] FQE_20220420155425: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00017185405243274777, 'time_algorithm_update': 0.0050772698812706525, 'loss': 0.5083760082028633, 'time_step': 0.00532421538996142, 'init_value': -17.405303955078125, 'ave_value': -19.06219971930558, 'soft_opc': nan} step=16512




2022-04-20 15:55.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155425/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:56.01 [info     ] FQE_20220420155425: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00017287564832110737, 'time_algorithm_update': 0.005089871412099794, 'loss': 0.5232214711953042, 'time_step': 0.005339344573575397, 'init_value': -17.66330337524414, 'ave_value': -19.362677813067336, 'soft_opc': nan} step=16856




2022-04-20 15:56.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155425/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:56.03 [info     ] FQE_20220420155425: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00016639329666315124, 'time_algorithm_update': 0.0050357905931250994, 'loss': 0.5172520738723146, 'time_step': 0.005276865737382756, 'init_value': -17.497364044189453, 'ave_value': -19.130507031594917, 'soft_opc': nan} step=17200




2022-04-20 15:56.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155425/model_17200.pt
search iteration:  6
using hyper params:  [0.009309517446710828, 0.007281179591166241, 3.635309530650009e-05, 7]
2022-04-20 15:56.03 [debug    ] RoundIterator is selected.
2022-04-20 15:56.03 [info     ] Directory is created at d3rlpy_logs/TD3PlusBC_20220420155603
2022-04-20 15:56.03 [debug    ] Fitting scaler...              scaler=standard
2022-04-20 15:56.03 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 15:56.03 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 15:56.03 [info     ] Parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155603/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.009309517446710

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:56.07 [info     ] TD3PlusBC_20220420155603: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.00040068542748166803, 'time_algorithm_update': 0.008463761262726365, 'critic_loss': 9.948767953448826, 'actor_loss': 2.670782688765498, 'time_step': 0.008947417749996073, 'td_error': 1.0370241855680193, 'init_value': -11.252344131469727, 'ave_value': -7.229345930854094} step=342
2022-04-20 15:56.07 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155603/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:56.10 [info     ] TD3PlusBC_20220420155603: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.00040464931064181856, 'time_algorithm_update': 0.009062041316116065, 'critic_loss': 6.323082693139015, 'actor_loss': 2.5802407069512974, 'time_step': 0.009547550775851423, 'td_error': 1.3143069841930266, 'init_value': -15.933691024780273, 'ave_value': -10.296217609208984} step=684
2022-04-20 15:56.10 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155603/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:56.14 [info     ] TD3PlusBC_20220420155603: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.00040278169843885634, 'time_algorithm_update': 0.008706824821338319, 'critic_loss': 10.069039034564593, 'actor_loss': 2.5724086831187645, 'time_step': 0.009191516547175179, 'td_error': 1.6679950782922766, 'init_value': -20.680944442749023, 'ave_value': -13.4104863208779} step=1026
2022-04-20 15:56.14 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155603/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:56.18 [info     ] TD3PlusBC_20220420155603: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.0004063851652089615, 'time_algorithm_update': 0.009025920901382179, 'critic_loss': 14.611411613330507, 'actor_loss': 2.5696607416833355, 'time_step': 0.009509943382084718, 'td_error': 2.164200535313472, 'init_value': -25.808008193969727, 'ave_value': -16.75791994639429} step=1368
2022-04-20 15:56.18 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155603/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:56.22 [info     ] TD3PlusBC_20220420155603: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.00040744480333830183, 'time_algorithm_update': 0.008966570012053551, 'critic_loss': 19.649170030627335, 'actor_loss': 2.56873673584029, 'time_step': 0.009453647317942123, 'td_error': 2.6552662535126963, 'init_value': -30.802204132080078, 'ave_value': -19.99527408067892} step=1710
2022-04-20 15:56.22 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155603/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:56.25 [info     ] TD3PlusBC_20220420155603: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.00040056482393142077, 'time_algorithm_update': 0.00854553534970646, 'critic_loss': 24.317538835848982, 'actor_loss': 2.5670406567422965, 'time_step': 0.009021040291814079, 'td_error': 3.0094714484562526, 'init_value': -34.947147369384766, 'ave_value': -22.825035818779153} step=2052
2022-04-20 15:56.25 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155603/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:56.29 [info     ] TD3PlusBC_20220420155603: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.0003982357114379169, 'time_algorithm_update': 0.009032766024271647, 'critic_loss': 29.288610650782, 'actor_loss': 2.565959438245896, 'time_step': 0.009503615529913651, 'td_error': 3.6200762514150786, 'init_value': -39.689491271972656, 'ave_value': -25.85875924794532} step=2394
2022-04-20 15:56.29 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155603/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:56.33 [info     ] TD3PlusBC_20220420155603: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.00039699133376629034, 'time_algorithm_update': 0.008731576434352942, 'critic_loss': 34.77359431528906, 'actor_loss': 2.565103288282428, 'time_step': 0.009203522526032744, 'td_error': 4.077662628056106, 'init_value': -42.90396499633789, 'ave_value': -28.34275672489366} step=2736
2022-04-20 15:56.33 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155603/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:56.36 [info     ] TD3PlusBC_20220420155603: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.0004051693698816132, 'time_algorithm_update': 0.008924434756674962, 'critic_loss': 40.31163854208606, 'actor_loss': 2.5648006165933888, 'time_step': 0.009407523082710846, 'td_error': 4.6947692493794815, 'init_value': -47.625587463378906, 'ave_value': -31.27027217350296} step=3078
2022-04-20 15:56.36 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155603/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:56.40 [info     ] TD3PlusBC_20220420155603: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.00040500833277116744, 'time_algorithm_update': 0.008982821514731959, 'critic_loss': 46.17458236984342, 'actor_loss': 2.564830619689317, 'time_step': 0.009465914023549933, 'td_error': 5.494002799491712, 'init_value': -52.289520263671875, 'ave_value': -34.07651769786519} step=3420
2022-04-20 15:56.40 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155603/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:56.44 [info     ] TD3PlusBC_20220420155603: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.0004039103524726734, 'time_algorithm_update': 0.008662271917911997, 'critic_loss': 52.24315090625607, 'actor_loss': 2.5640551263128804, 'time_step': 0.009142240585639463, 'td_error': 5.814963215046249, 'init_value': -55.402244567871094, 'ave_value': -36.1480676042766} step=3762
2022-04-20 15:56.44 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155603/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:56.48 [info     ] TD3PlusBC_20220420155603: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.00040605890820598047, 'time_algorithm_update': 0.009036878396195975, 'critic_loss': 57.825500421356736, 'actor_loss': 2.5634005446183052, 'time_step': 0.009522250521252726, 'td_error': 6.336970681482257, 'init_value': -58.139564514160156, 'ave_value': -38.32029948997714} step=4104
2022-04-20 15:56.48 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155603/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:56.51 [info     ] TD3PlusBC_20220420155603: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.0004010019246597736, 'time_algorithm_update': 0.008544138997618914, 'critic_loss': 64.12923264085201, 'actor_loss': 2.5636763600578085, 'time_step': 0.009021389554118553, 'td_error': 6.966149774627193, 'init_value': -60.8871955871582, 'ave_value': -40.56931341758619} step=4446
2022-04-20 15:56.51 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155603/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:56.55 [info     ] TD3PlusBC_20220420155603: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.0004019681473224484, 'time_algorithm_update': 0.009089255890651056, 'critic_loss': 69.42379481890048, 'actor_loss': 2.5627133121267396, 'time_step': 0.00957088163721631, 'td_error': 7.678546857622288, 'init_value': -64.63858795166016, 'ave_value': -42.57951165878247} step=4788
2022-04-20 15:56.55 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155603/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:56.59 [info     ] TD3PlusBC_20220420155603: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.0004037151559751633, 'time_algorithm_update': 0.009015650079961409, 'critic_loss': 74.94918322423746, 'actor_loss': 2.5628161932292737, 'time_step': 0.009495790241754542, 'td_error': 7.904797405286344, 'init_value': -66.33993530273438, 'ave_value': -44.35963970858721} step=5130
2022-04-20 15:56.59 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155603/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:57.02 [info     ] TD3PlusBC_20220420155603: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.0003988122382359198, 'time_algorithm_update': 0.00854571520933631, 'critic_loss': 80.50947852999147, 'actor_loss': 2.5631543098137395, 'time_step': 0.009021587539137456, 'td_error': 8.766642261292887, 'init_value': -70.68180847167969, 'ave_value': -46.692388153472365} step=5472
2022-04-20 15:57.02 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155603/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:57.06 [info     ] TD3PlusBC_20220420155603: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.0004049260713900739, 'time_algorithm_update': 0.009104363402427986, 'critic_loss': 86.29196647733275, 'actor_loss': 2.563241720199585, 'time_step': 0.009589442732738472, 'td_error': 8.931628029326994, 'init_value': -70.24287414550781, 'ave_value': -47.8708151638481} step=5814
2022-04-20 15:57.06 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155603/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:57.10 [info     ] TD3PlusBC_20220420155603: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.0004062297051413017, 'time_algorithm_update': 0.008662047441939862, 'critic_loss': 91.2268787629423, 'actor_loss': 2.5637409938009164, 'time_step': 0.00914749694846527, 'td_error': 9.659639555841785, 'init_value': -74.06794738769531, 'ave_value': -49.5054778015288} step=6156
2022-04-20 15:57.10 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155603/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:57.14 [info     ] TD3PlusBC_20220420155603: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.0004080241186576977, 'time_algorithm_update': 0.00902637473323889, 'critic_loss': 96.69469931529976, 'actor_loss': 2.5638673598306223, 'time_step': 0.009508257023772301, 'td_error': 10.31851467364158, 'init_value': -75.1976547241211, 'ave_value': -51.068481530229654} step=6498
2022-04-20 15:57.14 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155603/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:57.18 [info     ] TD3PlusBC_20220420155603: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.0004019946382756819, 'time_algorithm_update': 0.009064636035272253, 'critic_loss': 101.02328941835995, 'actor_loss': 2.5631908464152913, 'time_step': 0.009532237610621759, 'td_error': 10.54029937147342, 'init_value': -77.04036712646484, 'ave_value': -52.12650935969978} step=6840
2022-04-20 15:57.18 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155603/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:57.21 [info     ] TD3PlusBC_20220420155603: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.00040128286819011847, 'time_algorithm_update': 0.008633483223050659, 'critic_loss': 106.1257147203412, 'actor_loss': 2.564290886036834, 'time_step': 0.00909679186971564, 'td_error': 11.315645777380631, 'init_value': -79.38555908203125, 'ave_value': -54.153734125819994} step=7182
2022-04-20 15:57.21 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155603/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:57.25 [info     ] TD3PlusBC_20220420155603: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.0004050933826736539, 'time_algorithm_update': 0.009010596582066942, 'critic_loss': 110.7448493109809, 'actor_loss': 2.5648029380374484, 'time_step': 0.00948864814133672, 'td_error': 11.38657198257028, 'init_value': -78.54634094238281, 'ave_value': -54.68772571154409} step=7524
2022-04-20 15:57.25 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155603/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:57.29 [info     ] TD3PlusBC_20220420155603: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.0003987355538976123, 'time_algorithm_update': 0.008506487684640272, 'critic_loss': 115.35997676291662, 'actor_loss': 2.5650498853092305, 'time_step': 0.008970377738015694, 'td_error': 11.236484455503096, 'init_value': -78.89596557617188, 'ave_value': -55.61260990542049} step=7866
2022-04-20 15:57.29 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155603/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:57.32 [info     ] TD3PlusBC_20220420155603: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.00040152058963887174, 'time_algorithm_update': 0.008941768205653854, 'critic_loss': 119.73976679573282, 'actor_loss': 2.5664759002930935, 'time_step': 0.009406330989815338, 'td_error': 11.646928969361072, 'init_value': -78.94145202636719, 'ave_value': -56.72277738403932} step=8208
2022-04-20 15:57.32 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155603/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:57.36 [info     ] TD3PlusBC_20220420155603: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.0004047580629761456, 'time_algorithm_update': 0.008990362373708982, 'critic_loss': 123.89531366448654, 'actor_loss': 2.565075796250014, 'time_step': 0.00946550480803551, 'td_error': 12.618863489359349, 'init_value': -81.52558898925781, 'ave_value': -57.99455192845348} step=8550
2022-04-20 15:57.36 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155603/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:57.40 [info     ] TD3PlusBC_20220420155603: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.00039718862165484515, 'time_algorithm_update': 0.008511782389635231, 'critic_loss': 128.0945325929519, 'actor_loss': 2.5672998247090835, 'time_step': 0.008973130705760933, 'td_error': 13.27816179986744, 'init_value': -82.61457824707031, 'ave_value': -58.89432897850065} step=8892
2022-04-20 15:57.40 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155603/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:57.43 [info     ] TD3PlusBC_20220420155603: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.00040518540387962296, 'time_algorithm_update': 0.009028965966743335, 'critic_loss': 131.89269486365959, 'actor_loss': 2.566627821727106, 'time_step': 0.009500164037559464, 'td_error': 13.515605829900935, 'init_value': -83.35114288330078, 'ave_value': -59.96494904769904} step=9234
2022-04-20 15:57.43 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155603/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:57.47 [info     ] TD3PlusBC_20220420155603: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.0004015505662438465, 'time_algorithm_update': 0.008597016334533691, 'critic_loss': 135.52441122936227, 'actor_loss': 2.567045740216796, 'time_step': 0.009061854485182734, 'td_error': 13.136920875543115, 'init_value': -83.56777954101562, 'ave_value': -60.56435815241943} step=9576
2022-04-20 15:57.47 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155603/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:57.51 [info     ] TD3PlusBC_20220420155603: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.00040234041492841395, 'time_algorithm_update': 0.008901345799540916, 'critic_loss': 138.8072412501999, 'actor_loss': 2.567305619256538, 'time_step': 0.009370329784371002, 'td_error': 14.440471732683095, 'init_value': -86.58818054199219, 'ave_value': -61.89253896015} step=9918
2022-04-20 15:57.51 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155603/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:57.55 [info     ] TD3PlusBC_20220420155603: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.0004037284014517801, 'time_algorithm_update': 0.00900720713431375, 'critic_loss': 141.89504516333864, 'actor_loss': 2.566902032372547, 'time_step': 0.009475197011267233, 'td_error': 13.827150645809857, 'init_value': -85.6363754272461, 'ave_value': -62.37304565779881} step=10260
2022-04-20 15:57.55 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155603/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:57.58 [info     ] TD3PlusBC_20220420155603: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.0003987857472826863, 'time_algorithm_update': 0.00862736311572337, 'critic_loss': 144.49505947626125, 'actor_loss': 2.5662508164232936, 'time_step': 0.009088100745664006, 'td_error': 14.06937728561008, 'init_value': -86.5076904296875, 'ave_value': -62.849090490600275} step=10602
2022-04-20 15:57.58 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155603/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:58.02 [info     ] TD3PlusBC_20220420155603: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.0004025237602100038, 'time_algorithm_update': 0.009041493399101392, 'critic_loss': 146.84820882339923, 'actor_loss': 2.5669599750585723, 'time_step': 0.009511561421623007, 'td_error': 14.462684763998922, 'init_value': -85.0760726928711, 'ave_value': -63.36256594995322} step=10944
2022-04-20 15:58.02 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155603/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:58.06 [info     ] TD3PlusBC_20220420155603: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.00039603347666779454, 'time_algorithm_update': 0.008525564656620138, 'critic_loss': 150.0462399756002, 'actor_loss': 2.5668632761079664, 'time_step': 0.008985090674015513, 'td_error': 14.664944265771348, 'init_value': -87.13964080810547, 'ave_value': -64.1197150252137} step=11286
2022-04-20 15:58.06 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155603/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:58.09 [info     ] TD3PlusBC_20220420155603: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.0003669624440154137, 'time_algorithm_update': 0.008575355100352861, 'critic_loss': 151.88902398717335, 'actor_loss': 2.5663782839189495, 'time_step': 0.00900187269288894, 'td_error': 14.692814041536678, 'init_value': -87.35673522949219, 'ave_value': -64.86461680318573} step=11628
2022-04-20 15:58.09 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155603/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:58.13 [info     ] TD3PlusBC_20220420155603: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.0003741345210382116, 'time_algorithm_update': 0.008711018757513391, 'critic_loss': 153.8989272535893, 'actor_loss': 2.567316396891722, 'time_step': 0.00914380355188024, 'td_error': 15.47341746569457, 'init_value': -86.80828857421875, 'ave_value': -65.66822697192079} step=11970
2022-04-20 15:58.13 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155603/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:58.17 [info     ] TD3PlusBC_20220420155603: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.0003970408300210161, 'time_algorithm_update': 0.008654843296921044, 'critic_loss': 155.69328325952006, 'actor_loss': 2.567438914761906, 'time_step': 0.009119017082348205, 'td_error': 15.030830492010576, 'init_value': -88.68742370605469, 'ave_value': -66.12556202195786} step=12312
2022-04-20 15:58.17 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155603/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:58.20 [info     ] TD3PlusBC_20220420155603: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.00040149549294633477, 'time_algorithm_update': 0.009069327025385628, 'critic_loss': 157.91715711180927, 'actor_loss': 2.566510639692608, 'time_step': 0.009533940002932185, 'td_error': 15.355602900795521, 'init_value': -87.3204574584961, 'ave_value': -66.27748692388639} step=12654
2022-04-20 15:58.20 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155603/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:58.24 [info     ] TD3PlusBC_20220420155603: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.0003999541377463536, 'time_algorithm_update': 0.008533773366470782, 'critic_loss': 160.0198365373221, 'actor_loss': 2.567172652796695, 'time_step': 0.008998074029621324, 'td_error': 15.721408924658611, 'init_value': -88.96097564697266, 'ave_value': -66.6111489973021} step=12996
2022-04-20 15:58.24 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155603/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:58.28 [info     ] TD3PlusBC_20220420155603: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.00040274405340005084, 'time_algorithm_update': 0.008959247354875532, 'critic_loss': 161.59351442571273, 'actor_loss': 2.566949905707822, 'time_step': 0.009428184631972284, 'td_error': 15.671431896198262, 'init_value': -87.74019622802734, 'ave_value': -67.31029049933846} step=13338
2022-04-20 15:58.28 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155603/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:58.31 [info     ] TD3PlusBC_20220420155603: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.0003994640551115337, 'time_algorithm_update': 0.009019707378588225, 'critic_loss': 162.90969159310325, 'actor_loss': 2.5670034467128287, 'time_step': 0.009480544001038312, 'td_error': 15.634902665789868, 'init_value': -86.195556640625, 'ave_value': -67.27653636586375} step=13680
2022-04-20 15:58.31 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155603/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:58.35 [info     ] TD3PlusBC_20220420155603: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.0003966023350319667, 'time_algorithm_update': 0.008699187758373238, 'critic_loss': 164.6363138790019, 'actor_loss': 2.5670592854594627, 'time_step': 0.009163118942439208, 'td_error': 16.84658815249674, 'init_value': -91.09519958496094, 'ave_value': -68.73175616554681} step=14022
2022-04-20 15:58.35 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155603/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:58.39 [info     ] TD3PlusBC_20220420155603: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.0003971133315772341, 'time_algorithm_update': 0.008904719910426446, 'critic_loss': 165.4231837534765, 'actor_loss': 2.5676281800744127, 'time_step': 0.009365297897517333, 'td_error': 15.549413811489192, 'init_value': -86.34446716308594, 'ave_value': -68.12161269287017} step=14364
2022-04-20 15:58.39 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155603/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:58.42 [info     ] TD3PlusBC_20220420155603: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.00039896978969462436, 'time_algorithm_update': 0.008617593531022993, 'critic_loss': 166.68237123991312, 'actor_loss': 2.5671432227419135, 'time_step': 0.009076899255228322, 'td_error': 16.42946636654527, 'init_value': -89.94549560546875, 'ave_value': -68.94918956290175} step=14706
2022-04-20 15:58.42 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155603/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:58.46 [info     ] TD3PlusBC_20220420155603: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.00040606448524876644, 'time_algorithm_update': 0.008952560480575116, 'critic_loss': 167.37182556955437, 'actor_loss': 2.5675374513481097, 'time_step': 0.009425080310531526, 'td_error': 16.531944734282437, 'init_value': -90.07906341552734, 'ave_value': -69.50768405356973} step=15048
2022-04-20 15:58.46 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155603/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:58.50 [info     ] TD3PlusBC_20220420155603: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.00041392044714319776, 'time_algorithm_update': 0.009022662514134458, 'critic_loss': 167.64059952406856, 'actor_loss': 2.5675642281247857, 'time_step': 0.009501354039063927, 'td_error': 16.728920265356795, 'init_value': -89.36138916015625, 'ave_value': -69.51903930889158} step=15390
2022-04-20 15:58.50 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155603/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:58.54 [info     ] TD3PlusBC_20220420155603: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.0004011044028209664, 'time_algorithm_update': 0.008615292303743418, 'critic_loss': 168.79931674087257, 'actor_loss': 2.5679265434979, 'time_step': 0.009078904899240237, 'td_error': 17.219023159676297, 'init_value': -90.00130462646484, 'ave_value': -69.86743653604475} step=15732
2022-04-20 15:58.54 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155603/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:58.57 [info     ] TD3PlusBC_20220420155603: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.0003994996087592945, 'time_algorithm_update': 0.009004021945752595, 'critic_loss': 169.48523352996648, 'actor_loss': 2.568352190374631, 'time_step': 0.009468287752385725, 'td_error': 17.64837503127176, 'init_value': -89.09648895263672, 'ave_value': -70.0088921942337} step=16074
2022-04-20 15:58.57 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155603/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:59.01 [info     ] TD3PlusBC_20220420155603: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.00039345479150961713, 'time_algorithm_update': 0.008539542120102554, 'critic_loss': 170.01227917587548, 'actor_loss': 2.569254752488164, 'time_step': 0.008997429184049194, 'td_error': 17.570189018145815, 'init_value': -88.23554229736328, 'ave_value': -70.43499714816475} step=16416
2022-04-20 15:59.01 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155603/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:59.05 [info     ] TD3PlusBC_20220420155603: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.00039827963064985664, 'time_algorithm_update': 0.00904237805751332, 'critic_loss': 170.2979419329013, 'actor_loss': 2.568641792263901, 'time_step': 0.009506328761229041, 'td_error': 16.498708198964295, 'init_value': -85.60253143310547, 'ave_value': -70.10988220591065} step=16758
2022-04-20 15:59.05 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155603/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:59.09 [info     ] TD3PlusBC_20220420155603: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.000398584276612042, 'time_algorithm_update': 0.00906719868643242, 'critic_loss': 171.2291419938294, 'actor_loss': 2.5685518819686264, 'time_step': 0.009527414862872565, 'td_error': 17.328003385189817, 'init_value': -86.21955871582031, 'ave_value': -70.05026143277954} step=17100
2022-04-20 15:59.09 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420155603/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 102 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.28189108e-01  9.

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 15:59.10 [info     ] FQE_20220420155909: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00016473431185067418, 'time_algorithm_update': 0.004318372312798558, 'loss': 0.00855412289881742, 'time_step': 0.004560609897935247, 'init_value': -0.16602542996406555, 'ave_value': -0.12473410451281312, 'soft_opc': nan} step=166




2022-04-20 15:59.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155909/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.11 [info     ] FQE_20220420155909: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00016831777181970068, 'time_algorithm_update': 0.004925766623163798, 'loss': 0.005915719663713083, 'time_step': 0.0051667388663234485, 'init_value': -0.2961985766887665, 'ave_value': -0.20010771751529663, 'soft_opc': nan} step=332




2022-04-20 15:59.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155909/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.11 [info     ] FQE_20220420155909: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00017198309840926206, 'time_algorithm_update': 0.005010965358780091, 'loss': 0.005164414764864437, 'time_step': 0.0052547641547329455, 'init_value': -0.3165355324745178, 'ave_value': -0.19850674043954897, 'soft_opc': nan} step=498




2022-04-20 15:59.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155909/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.12 [info     ] FQE_20220420155909: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00016718600169721856, 'time_algorithm_update': 0.0050041072339896695, 'loss': 0.004972795507006617, 'time_step': 0.00524695666439562, 'init_value': -0.3876376748085022, 'ave_value': -0.22906344488120495, 'soft_opc': nan} step=664




2022-04-20 15:59.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155909/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.13 [info     ] FQE_20220420155909: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00016673501715602646, 'time_algorithm_update': 0.005058295755501253, 'loss': 0.0046510552280548826, 'time_step': 0.005294581493699407, 'init_value': -0.4984973073005676, 'ave_value': -0.32196391409028086, 'soft_opc': nan} step=830




2022-04-20 15:59.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155909/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.14 [info     ] FQE_20220420155909: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00016493682401726045, 'time_algorithm_update': 0.005010803061795522, 'loss': 0.004272486209442996, 'time_step': 0.005246186830911292, 'init_value': -0.4965781569480896, 'ave_value': -0.29794137931999387, 'soft_opc': nan} step=996




2022-04-20 15:59.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155909/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.15 [info     ] FQE_20220420155909: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00016718312918421733, 'time_algorithm_update': 0.005044292254620288, 'loss': 0.0044136980172605755, 'time_step': 0.0052830627165645, 'init_value': -0.5839935541152954, 'ave_value': -0.3795102701618898, 'soft_opc': nan} step=1162




2022-04-20 15:59.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155909/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.16 [info     ] FQE_20220420155909: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.0001711371433304017, 'time_algorithm_update': 0.005123958530196224, 'loss': 0.0042279169316886064, 'time_step': 0.005368637751383954, 'init_value': -0.6492298245429993, 'ave_value': -0.4236713397690003, 'soft_opc': nan} step=1328




2022-04-20 15:59.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155909/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.17 [info     ] FQE_20220420155909: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00016447004065456162, 'time_algorithm_update': 0.004988151860524373, 'loss': 0.004008790337149592, 'time_step': 0.005226425377719374, 'init_value': -0.6708317399024963, 'ave_value': -0.44011010026920017, 'soft_opc': nan} step=1494




2022-04-20 15:59.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155909/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.18 [info     ] FQE_20220420155909: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.0001691953245415745, 'time_algorithm_update': 0.004866130380745393, 'loss': 0.004326840407489967, 'time_step': 0.0051095830388816, 'init_value': -0.7156575918197632, 'ave_value': -0.4738427674968366, 'soft_opc': nan} step=1660




2022-04-20 15:59.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155909/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.19 [info     ] FQE_20220420155909: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.0001651623162878565, 'time_algorithm_update': 0.004462427403553423, 'loss': 0.004506762253109321, 'time_step': 0.004697353006845497, 'init_value': -0.7770054936408997, 'ave_value': -0.5409478173415, 'soft_opc': nan} step=1826




2022-04-20 15:59.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155909/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.20 [info     ] FQE_20220420155909: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00016895977847547415, 'time_algorithm_update': 0.005066901804452919, 'loss': 0.004546336730078132, 'time_step': 0.005306523966501994, 'init_value': -0.7895178198814392, 'ave_value': -0.5297817591309278, 'soft_opc': nan} step=1992




2022-04-20 15:59.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155909/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.21 [info     ] FQE_20220420155909: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.0001676642751119223, 'time_algorithm_update': 0.005032298076583679, 'loss': 0.005124907268783504, 'time_step': 0.005273411072880389, 'init_value': -0.8633453845977783, 'ave_value': -0.5990397888546188, 'soft_opc': nan} step=2158




2022-04-20 15:59.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155909/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.22 [info     ] FQE_20220420155909: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00016535190214593727, 'time_algorithm_update': 0.0049687308001231, 'loss': 0.005504415327744806, 'time_step': 0.005208118852362575, 'init_value': -0.896057665348053, 'ave_value': -0.6358049029064876, 'soft_opc': nan} step=2324




2022-04-20 15:59.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155909/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.23 [info     ] FQE_20220420155909: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.0001694983746632036, 'time_algorithm_update': 0.0051620049648974315, 'loss': 0.005964892011708075, 'time_step': 0.00541008667773511, 'init_value': -0.9266834259033203, 'ave_value': -0.6438243323786034, 'soft_opc': nan} step=2490




2022-04-20 15:59.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155909/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.24 [info     ] FQE_20220420155909: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00016941938055566996, 'time_algorithm_update': 0.005183675202978663, 'loss': 0.007367814301950745, 'time_step': 0.005425784961286798, 'init_value': -1.0527997016906738, 'ave_value': -0.7475224796430887, 'soft_opc': nan} step=2656




2022-04-20 15:59.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155909/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.25 [info     ] FQE_20220420155909: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00017120321112942984, 'time_algorithm_update': 0.005097324589648879, 'loss': 0.007645723625248949, 'time_step': 0.005342849765915468, 'init_value': -1.1011682748794556, 'ave_value': -0.7974778731194165, 'soft_opc': nan} step=2822




2022-04-20 15:59.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155909/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.26 [info     ] FQE_20220420155909: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00016955582492322806, 'time_algorithm_update': 0.00504439135631883, 'loss': 0.008058060031785372, 'time_step': 0.0052915308848921075, 'init_value': -1.1225378513336182, 'ave_value': -0.7927981845984186, 'soft_opc': nan} step=2988




2022-04-20 15:59.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155909/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.27 [info     ] FQE_20220420155909: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00016978706221982656, 'time_algorithm_update': 0.005088992865688829, 'loss': 0.00925767842921462, 'time_step': 0.005333942103098674, 'init_value': -1.242005705833435, 'ave_value': -0.9117570293278576, 'soft_opc': nan} step=3154




2022-04-20 15:59.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155909/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.28 [info     ] FQE_20220420155909: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00016729515719126505, 'time_algorithm_update': 0.0042002789945487515, 'loss': 0.010137942416946885, 'time_step': 0.004441802760204637, 'init_value': -1.3132565021514893, 'ave_value': -0.9885215489881801, 'soft_opc': nan} step=3320




2022-04-20 15:59.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155909/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.29 [info     ] FQE_20220420155909: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00016631850277084903, 'time_algorithm_update': 0.005118966102600098, 'loss': 0.011423412402558803, 'time_step': 0.005360204053212361, 'init_value': -1.3727822303771973, 'ave_value': -1.0428376537227484, 'soft_opc': nan} step=3486




2022-04-20 15:59.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155909/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.30 [info     ] FQE_20220420155909: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.0001709561750113246, 'time_algorithm_update': 0.005066601626844291, 'loss': 0.012517237355539867, 'time_step': 0.005316563399441271, 'init_value': -1.5312490463256836, 'ave_value': -1.1744827879542434, 'soft_opc': nan} step=3652




2022-04-20 15:59.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155909/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.31 [info     ] FQE_20220420155909: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00017065886991569795, 'time_algorithm_update': 0.00509089446929564, 'loss': 0.014128753719767219, 'time_step': 0.00533318088715335, 'init_value': -1.6163742542266846, 'ave_value': -1.2449316251754492, 'soft_opc': nan} step=3818




2022-04-20 15:59.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155909/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.32 [info     ] FQE_20220420155909: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00016883769667292215, 'time_algorithm_update': 0.00503877990217094, 'loss': 0.014923725006484753, 'time_step': 0.0052825930606888, 'init_value': -1.6772239208221436, 'ave_value': -1.2830791100918375, 'soft_opc': nan} step=3984




2022-04-20 15:59.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155909/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.32 [info     ] FQE_20220420155909: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00016807073570159544, 'time_algorithm_update': 0.005077965288277132, 'loss': 0.017306009500360407, 'time_step': 0.005319053868213332, 'init_value': -1.7197506427764893, 'ave_value': -1.3262907274570819, 'soft_opc': nan} step=4150




2022-04-20 15:59.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155909/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.33 [info     ] FQE_20220420155909: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00016996228551290123, 'time_algorithm_update': 0.005039992102657456, 'loss': 0.018728324192497564, 'time_step': 0.005286093217780791, 'init_value': -1.7465862035751343, 'ave_value': -1.3401051615843096, 'soft_opc': nan} step=4316




2022-04-20 15:59.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155909/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.34 [info     ] FQE_20220420155909: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00016573825514460183, 'time_algorithm_update': 0.005016510745128953, 'loss': 0.019731066961133158, 'time_step': 0.0052577084805592, 'init_value': -1.844570279121399, 'ave_value': -1.4233304402958769, 'soft_opc': nan} step=4482




2022-04-20 15:59.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155909/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.35 [info     ] FQE_20220420155909: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00017074648156223526, 'time_algorithm_update': 0.0051504833152495235, 'loss': 0.02075900195842519, 'time_step': 0.005394283547458878, 'init_value': -1.8588342666625977, 'ave_value': -1.4537133918898935, 'soft_opc': nan} step=4648




2022-04-20 15:59.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155909/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.36 [info     ] FQE_20220420155909: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00016274078782782498, 'time_algorithm_update': 0.0034539383578013226, 'loss': 0.02160895506834544, 'time_step': 0.0036866693611604623, 'init_value': -1.908529281616211, 'ave_value': -1.4965458384496872, 'soft_opc': nan} step=4814




2022-04-20 15:59.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155909/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.37 [info     ] FQE_20220420155909: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00016434939510851022, 'time_algorithm_update': 0.003600466682250241, 'loss': 0.0233296891036219, 'time_step': 0.003838132662945483, 'init_value': -1.931351661682129, 'ave_value': -1.5150192865699126, 'soft_opc': nan} step=4980




2022-04-20 15:59.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155909/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.37 [info     ] FQE_20220420155909: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00016253396689173687, 'time_algorithm_update': 0.0034966770424900285, 'loss': 0.024639353180635183, 'time_step': 0.0037294755499046968, 'init_value': -1.9687163829803467, 'ave_value': -1.5435932180869418, 'soft_opc': nan} step=5146




2022-04-20 15:59.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155909/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.38 [info     ] FQE_20220420155909: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00016233863600765365, 'time_algorithm_update': 0.0035332039178135885, 'loss': 0.02655343887471897, 'time_step': 0.0037688519581254706, 'init_value': -2.0261764526367188, 'ave_value': -1.595765333112564, 'soft_opc': nan} step=5312




2022-04-20 15:59.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155909/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.39 [info     ] FQE_20220420155909: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.0001612973500447101, 'time_algorithm_update': 0.003523892666920122, 'loss': 0.02757817566470546, 'time_step': 0.003759104085255818, 'init_value': -1.9913357496261597, 'ave_value': -1.5659956429892026, 'soft_opc': nan} step=5478




2022-04-20 15:59.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155909/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.40 [info     ] FQE_20220420155909: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00016099142741007977, 'time_algorithm_update': 0.0035438393972006188, 'loss': 0.03039047365912786, 'time_step': 0.0037771477756730044, 'init_value': -2.079937219619751, 'ave_value': -1.6700539687312803, 'soft_opc': nan} step=5644




2022-04-20 15:59.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155909/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.40 [info     ] FQE_20220420155909: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.000162233789283109, 'time_algorithm_update': 0.0036297562610672182, 'loss': 0.030663007098188377, 'time_step': 0.003861486193645431, 'init_value': -2.048114538192749, 'ave_value': -1.6257748555241, 'soft_opc': nan} step=5810




2022-04-20 15:59.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155909/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.41 [info     ] FQE_20220420155909: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00016189770526196584, 'time_algorithm_update': 0.003568948033344315, 'loss': 0.031959992524864804, 'time_step': 0.0038029845938625105, 'init_value': -2.1648406982421875, 'ave_value': -1.757651931169938, 'soft_opc': nan} step=5976




2022-04-20 15:59.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155909/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.42 [info     ] FQE_20220420155909: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00016238603247217386, 'time_algorithm_update': 0.003575218729225986, 'loss': 0.032316704409713126, 'time_step': 0.0038117586848247483, 'init_value': -2.1138315200805664, 'ave_value': -1.708518280874233, 'soft_opc': nan} step=6142




2022-04-20 15:59.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155909/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.42 [info     ] FQE_20220420155909: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00016390702810632177, 'time_algorithm_update': 0.003510557025311941, 'loss': 0.03350077247939982, 'time_step': 0.0037465928548789888, 'init_value': -2.1104674339294434, 'ave_value': -1.7150364221107248, 'soft_opc': nan} step=6308




2022-04-20 15:59.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155909/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.43 [info     ] FQE_20220420155909: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00016405927129538664, 'time_algorithm_update': 0.0036326000489384294, 'loss': 0.035878860810350806, 'time_step': 0.003873293658336961, 'init_value': -2.065883159637451, 'ave_value': -1.673110478837882, 'soft_opc': nan} step=6474




2022-04-20 15:59.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155909/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.44 [info     ] FQE_20220420155909: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00016512784613184183, 'time_algorithm_update': 0.0036101183259343527, 'loss': 0.037586157217184196, 'time_step': 0.003850734377481851, 'init_value': -2.2540414333343506, 'ave_value': -1.8689341559595016, 'soft_opc': nan} step=6640




2022-04-20 15:59.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155909/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.44 [info     ] FQE_20220420155909: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00016570091247558594, 'time_algorithm_update': 0.0036224026277840854, 'loss': 0.03871245209074366, 'time_step': 0.0038631766675466515, 'init_value': -2.3397724628448486, 'ave_value': -1.9562479073156578, 'soft_opc': nan} step=6806




2022-04-20 15:59.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155909/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.45 [info     ] FQE_20220420155909: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00016717738415821488, 'time_algorithm_update': 0.003571467227246388, 'loss': 0.03545787123365732, 'time_step': 0.0038127568830926733, 'init_value': -2.358377695083618, 'ave_value': -1.930409133163289, 'soft_opc': nan} step=6972




2022-04-20 15:59.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155909/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.46 [info     ] FQE_20220420155909: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00015953649957495998, 'time_algorithm_update': 0.0034165324934993884, 'loss': 0.04197934830662267, 'time_step': 0.0036494013774825865, 'init_value': -2.2717442512512207, 'ave_value': -1.8464728425634471, 'soft_opc': nan} step=7138




2022-04-20 15:59.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155909/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.46 [info     ] FQE_20220420155909: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00016558313944253577, 'time_algorithm_update': 0.003515585359320583, 'loss': 0.04301245527970324, 'time_step': 0.0037535816790109657, 'init_value': -2.419356346130371, 'ave_value': -1.9972807448944665, 'soft_opc': nan} step=7304




2022-04-20 15:59.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155909/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.47 [info     ] FQE_20220420155909: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00016264455864228397, 'time_algorithm_update': 0.0034996601472417994, 'loss': 0.04532232758636777, 'time_step': 0.0037351530718516155, 'init_value': -2.348259449005127, 'ave_value': -1.9428718467016477, 'soft_opc': nan} step=7470




2022-04-20 15:59.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155909/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.48 [info     ] FQE_20220420155909: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00016230129333863775, 'time_algorithm_update': 0.0035973686769784213, 'loss': 0.04609397275188197, 'time_step': 0.0038314957216561557, 'init_value': -2.4824225902557373, 'ave_value': -2.0274638767513604, 'soft_opc': nan} step=7636




2022-04-20 15:59.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155909/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.49 [info     ] FQE_20220420155909: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00016187328890145543, 'time_algorithm_update': 0.003573309944336673, 'loss': 0.04818383335130267, 'time_step': 0.0038069802594472126, 'init_value': -2.532834768295288, 'ave_value': -2.0613860097940306, 'soft_opc': nan} step=7802




2022-04-20 15:59.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155909/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.49 [info     ] FQE_20220420155909: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.0001673238823212773, 'time_algorithm_update': 0.003519815134714885, 'loss': 0.048862575136085815, 'time_step': 0.0037603047956903295, 'init_value': -2.529282808303833, 'ave_value': -2.076551483390299, 'soft_opc': nan} step=7968




2022-04-20 15:59.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155909/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.50 [info     ] FQE_20220420155909: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00016775906804096267, 'time_algorithm_update': 0.0035629890051232763, 'loss': 0.04899036055514926, 'time_step': 0.0038055569292551063, 'init_value': -2.497993230819702, 'ave_value': -2.044577752271885, 'soft_opc': nan} step=8134




2022-04-20 15:59.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155909/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.51 [info     ] FQE_20220420155909: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00016732675483427853, 'time_algorithm_update': 0.003596797046891178, 'loss': 0.05120118914020573, 'time_step': 0.0038369506238454796, 'init_value': -2.5056493282318115, 'ave_value': -2.0363640382088857, 'soft_opc': nan} step=8300




2022-04-20 15:59.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155909/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.43210892e-01 -3.25999953e-02
 -1.38000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.28049971e-01  6.00000000e-01  4.67532035e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.87189108e-01  2.46000047e-02
 -8.00013420e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.61927906e-02  2.08952959e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.53789108e-01  1.32000047e-02
  8.79998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -9.71586383e-02 -6.00000000e-01 -5.33093061e-02]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.39489108e-01 -4.75999953e-02
 -9.30001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  1.41298638e-01  5.43892365e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.9573146

2022-04-20 15:59.51 [info     ] Directory is created at d3rlpy_logs/FQE_20220420155951
2022-04-20 15:59.51 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 15:59.51 [debug    ] Building models...
2022-04-20 15:59.51 [debug    ] Models have been built.
2022-04-20 15:59.51 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420155951/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 15:59.52 [info     ] FQE_20220420155951: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00016205532606257948, 'time_algorithm_update': 0.003561613864676897, 'loss': 0.030628468787167653, 'time_step': 0.0037956591262373815, 'init_value': -1.4209822416305542, 'ave_value': -1.4478614462388528, 'soft_opc': nan} step=344




2022-04-20 15:59.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155951/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:59.54 [info     ] FQE_20220420155951: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00016565932783969614, 'time_algorithm_update': 0.0035061711488768113, 'loss': 0.025066493918339528, 'time_step': 0.0037456390469573263, 'init_value': -2.142667293548584, 'ave_value': -2.1935670665121294, 'soft_opc': nan} step=688




2022-04-20 15:59.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155951/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:59.55 [info     ] FQE_20220420155951: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00016545556312383606, 'time_algorithm_update': 0.003481930771539378, 'loss': 0.028800785067751137, 'time_step': 0.003718309624250545, 'init_value': -3.0504212379455566, 'ave_value': -3.1318653565537824, 'soft_opc': nan} step=1032




2022-04-20 15:59.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155951/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:59.57 [info     ] FQE_20220420155951: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.0001638594061829323, 'time_algorithm_update': 0.0034963329171025475, 'loss': 0.03281188083455226, 'time_step': 0.0037336536618166193, 'init_value': -3.6191446781158447, 'ave_value': -3.740099219904021, 'soft_opc': nan} step=1376




2022-04-20 15:59.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155951/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:59.58 [info     ] FQE_20220420155951: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.0001603046128916186, 'time_algorithm_update': 0.0034433922102284987, 'loss': 0.04155567005482437, 'time_step': 0.0036742569402206777, 'init_value': -4.374241352081299, 'ave_value': -4.582362372777215, 'soft_opc': nan} step=1720




2022-04-20 15:59.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155951/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:59.59 [info     ] FQE_20220420155951: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00016513536142748455, 'time_algorithm_update': 0.003499526617138885, 'loss': 0.0510435500642458, 'time_step': 0.00373683211415313, 'init_value': -4.984189033508301, 'ave_value': -5.278381986399223, 'soft_opc': nan} step=2064




2022-04-20 15:59.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155951/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.01 [info     ] FQE_20220420155951: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00016339504441549612, 'time_algorithm_update': 0.003480459368506143, 'loss': 0.06590412483485632, 'time_step': 0.003714420074640318, 'init_value': -5.56361198425293, 'ave_value': -5.965224948871109, 'soft_opc': nan} step=2408




2022-04-20 16:00.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155951/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.02 [info     ] FQE_20220420155951: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00016232354696406873, 'time_algorithm_update': 0.003490128489427788, 'loss': 0.08157090617203001, 'time_step': 0.003724593062733495, 'init_value': -6.078405857086182, 'ave_value': -6.4934777218946005, 'soft_opc': nan} step=2752




2022-04-20 16:00.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155951/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.04 [info     ] FQE_20220420155951: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00016329316205756608, 'time_algorithm_update': 0.0034422125927237577, 'loss': 0.09339587037928056, 'time_step': 0.0036783031252927557, 'init_value': -6.666995048522949, 'ave_value': -7.141065891403369, 'soft_opc': nan} step=3096




2022-04-20 16:00.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155951/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.05 [info     ] FQE_20220420155951: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00016197978064071302, 'time_algorithm_update': 0.0034754269344862117, 'loss': 0.11108841547244337, 'time_step': 0.0037073950434840003, 'init_value': -7.3585205078125, 'ave_value': -7.90333877872059, 'soft_opc': nan} step=3440




2022-04-20 16:00.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155951/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.06 [info     ] FQE_20220420155951: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00016285790953525278, 'time_algorithm_update': 0.003488941941150399, 'loss': 0.12297886175774904, 'time_step': 0.003724801678990209, 'init_value': -7.909310340881348, 'ave_value': -8.470482878874135, 'soft_opc': nan} step=3784




2022-04-20 16:00.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155951/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.08 [info     ] FQE_20220420155951: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00016499951828357785, 'time_algorithm_update': 0.0034955663736476457, 'loss': 0.14563104644591032, 'time_step': 0.003733296033947967, 'init_value': -8.529779434204102, 'ave_value': -9.096203878551496, 'soft_opc': nan} step=4128




2022-04-20 16:00.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155951/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.09 [info     ] FQE_20220420155951: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00016533566075702046, 'time_algorithm_update': 0.00347592317780783, 'loss': 0.16454311491120174, 'time_step': 0.0037158928638280826, 'init_value': -9.222545623779297, 'ave_value': -9.6895190923416, 'soft_opc': nan} step=4472




2022-04-20 16:00.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155951/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.10 [info     ] FQE_20220420155951: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00016015837358873943, 'time_algorithm_update': 0.00295290142990822, 'loss': 0.18674451496105554, 'time_step': 0.003183704476023829, 'init_value': -10.117431640625, 'ave_value': -10.535042809268674, 'soft_opc': nan} step=4816




2022-04-20 16:00.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155951/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.12 [info     ] FQE_20220420155951: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00016279761181321254, 'time_algorithm_update': 0.003485583981802297, 'loss': 0.20316720773942423, 'time_step': 0.003722100063811901, 'init_value': -10.648356437683105, 'ave_value': -10.976457255195852, 'soft_opc': nan} step=5160




2022-04-20 16:00.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155951/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.13 [info     ] FQE_20220420155951: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.0001639557439227437, 'time_algorithm_update': 0.0035738272722377336, 'loss': 0.21729728211323882, 'time_step': 0.003811665052591368, 'init_value': -11.363384246826172, 'ave_value': -11.627436405562518, 'soft_opc': nan} step=5504




2022-04-20 16:00.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155951/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.15 [info     ] FQE_20220420155951: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00016309009041897085, 'time_algorithm_update': 0.003542780876159668, 'loss': 0.23507802193269653, 'time_step': 0.0037805715272592943, 'init_value': -11.81002426147461, 'ave_value': -11.947277911279249, 'soft_opc': nan} step=5848




2022-04-20 16:00.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155951/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.16 [info     ] FQE_20220420155951: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00016631636508675508, 'time_algorithm_update': 0.0035587472971095597, 'loss': 0.2467097785150589, 'time_step': 0.0037966010182402853, 'init_value': -12.182251930236816, 'ave_value': -12.188483324546267, 'soft_opc': nan} step=6192




2022-04-20 16:00.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155951/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.18 [info     ] FQE_20220420155951: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.0001682902491369913, 'time_algorithm_update': 0.003499717906463978, 'loss': 0.26022004226200973, 'time_step': 0.003740133934242781, 'init_value': -13.045068740844727, 'ave_value': -12.954534952129396, 'soft_opc': nan} step=6536




2022-04-20 16:00.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155951/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.19 [info     ] FQE_20220420155951: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.0001632335574127907, 'time_algorithm_update': 0.0035413794739301814, 'loss': 0.27762280801462746, 'time_step': 0.003779455672862918, 'init_value': -13.680855751037598, 'ave_value': -13.376958103806015, 'soft_opc': nan} step=6880




2022-04-20 16:00.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155951/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.20 [info     ] FQE_20220420155951: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00016181344209715377, 'time_algorithm_update': 0.0034990629484487135, 'loss': 0.29326372440947696, 'time_step': 0.003734356442163157, 'init_value': -14.119606018066406, 'ave_value': -13.730869607062726, 'soft_opc': nan} step=7224




2022-04-20 16:00.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155951/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.22 [info     ] FQE_20220420155951: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00016735182252041128, 'time_algorithm_update': 0.003564264192137607, 'loss': 0.30293719726073187, 'time_step': 0.003804555466008741, 'init_value': -14.583344459533691, 'ave_value': -14.215774944100554, 'soft_opc': nan} step=7568




2022-04-20 16:00.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155951/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.23 [info     ] FQE_20220420155951: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00016025609748308048, 'time_algorithm_update': 0.00356088682662609, 'loss': 0.31178511951530224, 'time_step': 0.003794162772422613, 'init_value': -14.847987174987793, 'ave_value': -14.493797304084197, 'soft_opc': nan} step=7912




2022-04-20 16:00.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155951/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.25 [info     ] FQE_20220420155951: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.0001636778199395468, 'time_algorithm_update': 0.0035345589005669883, 'loss': 0.3202482766883318, 'time_step': 0.003772208856981854, 'init_value': -15.113211631774902, 'ave_value': -14.5990493166875, 'soft_opc': nan} step=8256




2022-04-20 16:00.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155951/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.26 [info     ] FQE_20220420155951: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00016201235527216003, 'time_algorithm_update': 0.003571743881979654, 'loss': 0.33053192127019515, 'time_step': 0.0038089253181634946, 'init_value': -15.472865104675293, 'ave_value': -14.802648050606939, 'soft_opc': nan} step=8600




2022-04-20 16:00.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155951/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.27 [info     ] FQE_20220420155951: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00016761311264925225, 'time_algorithm_update': 0.003534380086632662, 'loss': 0.34124938107832054, 'time_step': 0.0037764920744785043, 'init_value': -16.06596565246582, 'ave_value': -15.432254678993655, 'soft_opc': nan} step=8944




2022-04-20 16:00.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155951/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.29 [info     ] FQE_20220420155951: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00016373257304346837, 'time_algorithm_update': 0.00350703195083973, 'loss': 0.3558627678958569, 'time_step': 0.003745674393897833, 'init_value': -16.304712295532227, 'ave_value': -15.77094948700386, 'soft_opc': nan} step=9288




2022-04-20 16:00.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155951/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.30 [info     ] FQE_20220420155951: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00016379356384277344, 'time_algorithm_update': 0.0035234072873758715, 'loss': 0.35363501600574615, 'time_step': 0.003761201403861822, 'init_value': -16.368892669677734, 'ave_value': -15.527186322227742, 'soft_opc': nan} step=9632




2022-04-20 16:00.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155951/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.32 [info     ] FQE_20220420155951: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00016144541806952897, 'time_algorithm_update': 0.0035510596840880636, 'loss': 0.35585984110637287, 'time_step': 0.0037861521853957067, 'init_value': -16.201852798461914, 'ave_value': -15.451524950150686, 'soft_opc': nan} step=9976




2022-04-20 16:00.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155951/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.33 [info     ] FQE_20220420155951: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.000165149916050046, 'time_algorithm_update': 0.0035063416458839592, 'loss': 0.3600428409896098, 'time_step': 0.003745684790056805, 'init_value': -16.85326385498047, 'ave_value': -15.942745665303088, 'soft_opc': nan} step=10320




2022-04-20 16:00.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155951/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.35 [info     ] FQE_20220420155951: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00015888103218965753, 'time_algorithm_update': 0.00352057745290357, 'loss': 0.37102841141871934, 'time_step': 0.003753443096959314, 'init_value': -17.089380264282227, 'ave_value': -16.245018675674995, 'soft_opc': nan} step=10664




2022-04-20 16:00.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155951/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.36 [info     ] FQE_20220420155951: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00016786885815997456, 'time_algorithm_update': 0.0036131074262219804, 'loss': 0.37290156303991584, 'time_step': 0.003854259502056033, 'init_value': -16.782751083374023, 'ave_value': -16.02058383717898, 'soft_opc': nan} step=11008




2022-04-20 16:00.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155951/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.37 [info     ] FQE_20220420155951: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00016376791998397474, 'time_algorithm_update': 0.0036497164604275727, 'loss': 0.37084086265869787, 'time_step': 0.003886793241944424, 'init_value': -16.918798446655273, 'ave_value': -16.017827709426953, 'soft_opc': nan} step=11352




2022-04-20 16:00.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155951/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.39 [info     ] FQE_20220420155951: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00016225909077843955, 'time_algorithm_update': 0.0034920205903607743, 'loss': 0.3708701449836236, 'time_step': 0.003728791724803836, 'init_value': -16.847957611083984, 'ave_value': -15.99781229161565, 'soft_opc': nan} step=11696




2022-04-20 16:00.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155951/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.40 [info     ] FQE_20220420155951: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.0001631808835406636, 'time_algorithm_update': 0.0035209433976994002, 'loss': 0.3776774130393426, 'time_step': 0.003757279972697413, 'init_value': -16.993356704711914, 'ave_value': -16.021340883740535, 'soft_opc': nan} step=12040




2022-04-20 16:00.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155951/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.42 [info     ] FQE_20220420155951: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00016503902368767318, 'time_algorithm_update': 0.0035192418930142426, 'loss': 0.3823792764559648, 'time_step': 0.00375827245934065, 'init_value': -17.11265754699707, 'ave_value': -16.21266343286177, 'soft_opc': nan} step=12384




2022-04-20 16:00.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155951/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.43 [info     ] FQE_20220420155951: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00016554497009099915, 'time_algorithm_update': 0.003554425267286079, 'loss': 0.3872488058973537, 'time_step': 0.0037939534630886344, 'init_value': -17.20998764038086, 'ave_value': -16.339518376626074, 'soft_opc': nan} step=12728




2022-04-20 16:00.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155951/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.45 [info     ] FQE_20220420155951: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00016385178233301915, 'time_algorithm_update': 0.003511797550112702, 'loss': 0.3901004298200268, 'time_step': 0.003751494163690611, 'init_value': -17.248075485229492, 'ave_value': -16.338146263190655, 'soft_opc': nan} step=13072




2022-04-20 16:00.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155951/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.46 [info     ] FQE_20220420155951: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00016343247058779695, 'time_algorithm_update': 0.003518227920975796, 'loss': 0.3948512798998245, 'time_step': 0.003756527290787808, 'init_value': -17.46312713623047, 'ave_value': -16.68537934431219, 'soft_opc': nan} step=13416




2022-04-20 16:00.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155951/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.47 [info     ] FQE_20220420155951: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.0001663787420405898, 'time_algorithm_update': 0.003581112207368363, 'loss': 0.4000040175241613, 'time_step': 0.003822252500888913, 'init_value': -17.76418113708496, 'ave_value': -17.118598393570664, 'soft_opc': nan} step=13760




2022-04-20 16:00.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155951/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.49 [info     ] FQE_20220420155951: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00016580764637436977, 'time_algorithm_update': 0.003476589225059332, 'loss': 0.4064255655190886, 'time_step': 0.003715879695360051, 'init_value': -17.57046890258789, 'ave_value': -17.0049071012069, 'soft_opc': nan} step=14104




2022-04-20 16:00.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155951/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.50 [info     ] FQE_20220420155951: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.0001663191373958144, 'time_algorithm_update': 0.003516359384669814, 'loss': 0.41405334357757034, 'time_step': 0.003756893928660903, 'init_value': -17.85858917236328, 'ave_value': -17.381806200296776, 'soft_opc': nan} step=14448




2022-04-20 16:00.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155951/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.52 [info     ] FQE_20220420155951: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00016177393669305847, 'time_algorithm_update': 0.003508898407913918, 'loss': 0.41697188889672765, 'time_step': 0.003744577252587607, 'init_value': -17.590503692626953, 'ave_value': -17.3352597700992, 'soft_opc': nan} step=14792




2022-04-20 16:00.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155951/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.53 [info     ] FQE_20220420155951: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00016633091970931653, 'time_algorithm_update': 0.0035633326962936757, 'loss': 0.4102076452285008, 'time_step': 0.0038048292315283486, 'init_value': -17.077383041381836, 'ave_value': -17.13081529433, 'soft_opc': nan} step=15136




2022-04-20 16:00.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155951/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.54 [info     ] FQE_20220420155951: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.0001635877198951189, 'time_algorithm_update': 0.003513381924740104, 'loss': 0.40964010040557314, 'time_step': 0.0037488209646801616, 'init_value': -17.453502655029297, 'ave_value': -17.538582733497947, 'soft_opc': nan} step=15480




2022-04-20 16:00.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155951/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.56 [info     ] FQE_20220420155951: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00016369306763937307, 'time_algorithm_update': 0.003533698791681334, 'loss': 0.41381081765474276, 'time_step': 0.0037710403287133506, 'init_value': -17.334680557250977, 'ave_value': -17.480933946939942, 'soft_opc': nan} step=15824




2022-04-20 16:00.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155951/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.57 [info     ] FQE_20220420155951: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00016217800073845443, 'time_algorithm_update': 0.003585737805033839, 'loss': 0.4038451579886727, 'time_step': 0.003821167834969454, 'init_value': -16.886606216430664, 'ave_value': -17.442677382053176, 'soft_opc': nan} step=16168




2022-04-20 16:00.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155951/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.59 [info     ] FQE_20220420155951: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00016255850015684615, 'time_algorithm_update': 0.0034936770450237184, 'loss': 0.3974252705770913, 'time_step': 0.003728976083356281, 'init_value': -16.882732391357422, 'ave_value': -17.465843509330607, 'soft_opc': nan} step=16512




2022-04-20 16:00.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155951/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:01.00 [info     ] FQE_20220420155951: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00016169353973033817, 'time_algorithm_update': 0.003527274658513624, 'loss': 0.40031698041292296, 'time_step': 0.003762711619221887, 'init_value': -16.856521606445312, 'ave_value': -17.546952403647204, 'soft_opc': nan} step=16856




2022-04-20 16:01.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155951/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:01.02 [info     ] FQE_20220420155951: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00016587556794632312, 'time_algorithm_update': 0.0036003014375997145, 'loss': 0.4018378015553449, 'time_step': 0.0038397180479626323, 'init_value': -16.824424743652344, 'ave_value': -17.549681344962686, 'soft_opc': nan} step=17200




2022-04-20 16:01.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155951/model_17200.pt
most optimal hyper params for td3+bc at this point:  [0.009309517446710828, 0.007281179591166241, 3.635309530650009e-05, 7]
search iteration:  7
using hyper params:  [0.0012009373312255548, 0.007673227118548031, 7.942796560357695e-05, 1]
2022-04-20 16:01.02 [debug    ] RoundIterator is selected.
2022-04-20 16:01.02 [info     ] Directory is created at d3rlpy_logs/TD3PlusBC_20220420160102
2022-04-20 16:01.02 [debug    ] Fitting scaler...              scaler=standard


  mean = torch.tensor(self._mean, dtype=torch.float32, device=x.device)
  std = torch.tensor(self._std, dtype=torch.float32, device=x.device)
  minimum = torch.tensor(
  maximum = torch.tensor(


2022-04-20 16:01.02 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 16:01.02 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 16:01.02 [info     ] Parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160102/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0012009373312255548, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'alpha': 2.5, 'batch_size': 256, 'critic_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'critic_learning_rate': 0.007673227118548031, 'critic_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:01.05 [info     ] TD3PlusBC_20220420160102: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.0003214314667104978, 'time_algorithm_update': 0.006881640668500934, 'critic_loss': 0.34518458790075013, 'actor_loss': 0.05061521886559258, 'time_step': 0.007280484974732873, 'td_error': 0.8049307122873867, 'init_value': -0.4980107247829437, 'ave_value': 0.1577823761974562} step=342
2022-04-20 16:01.05 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160102/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:01.08 [info     ] TD3PlusBC_20220420160102: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.0003218344080517864, 'time_algorithm_update': 0.006904937370478759, 'critic_loss': 0.16546623225797685, 'actor_loss': -0.031461270362661595, 'time_step': 0.007305564936141522, 'td_error': 0.8034967399306548, 'init_value': -0.7350252270698547, 'ave_value': 0.25834303301935263} step=684
2022-04-20 16:01.08 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160102/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:01.11 [info     ] TD3PlusBC_20220420160102: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.0003223133366010342, 'time_algorithm_update': 0.006933880828277409, 'critic_loss': 0.214024435837715, 'actor_loss': -0.02862369397666022, 'time_step': 0.007336403194226716, 'td_error': 0.8017491146121317, 'init_value': -0.9339361190795898, 'ave_value': 0.35415973865343175} step=1026
2022-04-20 16:01.11 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160102/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:01.13 [info     ] TD3PlusBC_20220420160102: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.00032458946718807107, 'time_algorithm_update': 0.006940058100293254, 'critic_loss': 0.28671472050162433, 'actor_loss': -0.00394363821162815, 'time_step': 0.007343888282775879, 'td_error': 0.8014510890815012, 'init_value': -1.2902042865753174, 'ave_value': 0.3849331048148189} step=1368
2022-04-20 16:01.14 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160102/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:01.16 [info     ] TD3PlusBC_20220420160102: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.00032395577570151166, 'time_algorithm_update': 0.006831510025158263, 'critic_loss': 0.3476248649520832, 'actor_loss': -0.0009543626890545002, 'time_step': 0.0072333980024906624, 'td_error': 0.8048319501031624, 'init_value': -1.5944619178771973, 'ave_value': 0.4109221410583537} step=1710
2022-04-20 16:01.16 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160102/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:01.19 [info     ] TD3PlusBC_20220420160102: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.00031841568082396747, 'time_algorithm_update': 0.006856560707092285, 'critic_loss': 0.41160955073104966, 'actor_loss': 0.012502883827825736, 'time_step': 0.007252960874323259, 'td_error': 0.8071285725143941, 'init_value': -1.8554004430770874, 'ave_value': 0.5199066883249644} step=2052
2022-04-20 16:01.19 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160102/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:01.22 [info     ] TD3PlusBC_20220420160102: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.0003240547682109632, 'time_algorithm_update': 0.006851610384489361, 'critic_loss': 0.5085496709447855, 'actor_loss': 0.013140732629431618, 'time_step': 0.007255140800922238, 'td_error': 0.8160476986800251, 'init_value': -2.1753411293029785, 'ave_value': 0.5335925994258843} step=2394
2022-04-20 16:01.22 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160102/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:01.26 [info     ] TD3PlusBC_20220420160102: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.00032720370599400927, 'time_algorithm_update': 0.008105522707888954, 'critic_loss': 0.5923926540111241, 'actor_loss': 0.0062226219135418275, 'time_step': 0.0085120940069009, 'td_error': 0.8278259818691899, 'init_value': -2.5052242279052734, 'ave_value': 0.6137444916976247} step=2736
2022-04-20 16:01.26 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160102/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:01.29 [info     ] TD3PlusBC_20220420160102: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.00032662090502287214, 'time_algorithm_update': 0.008934504804555436, 'critic_loss': 0.6777234060896767, 'actor_loss': 0.04447993320244097, 'time_step': 0.009341292911105685, 'td_error': 0.8427308960057702, 'init_value': -2.8111767768859863, 'ave_value': 0.6396440259597961} step=3078
2022-04-20 16:01.29 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160102/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:01.33 [info     ] TD3PlusBC_20220420160102: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.00031878097712645056, 'time_algorithm_update': 0.008625963975114432, 'critic_loss': 0.7553591145670902, 'actor_loss': 0.031235508773236248, 'time_step': 0.00902322718971654, 'td_error': 0.8607389759119073, 'init_value': -3.235196590423584, 'ave_value': 0.6896199247864838} step=3420
2022-04-20 16:01.33 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160102/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:01.37 [info     ] TD3PlusBC_20220420160102: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.00032371665999206186, 'time_algorithm_update': 0.008915612572117856, 'critic_loss': 0.8452260487696581, 'actor_loss': 0.03701916130052672, 'time_step': 0.009318752595555712, 'td_error': 0.876578536227671, 'init_value': -3.4178218841552734, 'ave_value': 0.793415036309586} step=3762
2022-04-20 16:01.37 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160102/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:01.40 [info     ] TD3PlusBC_20220420160102: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.0003225880059582448, 'time_algorithm_update': 0.008927058755305776, 'critic_loss': 0.9790593871944829, 'actor_loss': 0.03565737145424586, 'time_step': 0.00933004819858841, 'td_error': 0.8990213554891634, 'init_value': -3.6314594745635986, 'ave_value': 0.8470322964389742} step=4104
2022-04-20 16:01.40 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160102/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:01.44 [info     ] TD3PlusBC_20220420160102: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.0003262521230686478, 'time_algorithm_update': 0.008582108202036361, 'critic_loss': 1.1127312756769838, 'actor_loss': 0.05478362299022619, 'time_step': 0.00898640685611301, 'td_error': 0.9103779816390455, 'init_value': -4.007298946380615, 'ave_value': 0.902461212816279} step=4446
2022-04-20 16:01.44 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160102/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:01.48 [info     ] TD3PlusBC_20220420160102: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.0003224374258030228, 'time_algorithm_update': 0.008821108884978713, 'critic_loss': 1.1946717478901323, 'actor_loss': 0.04813826356453505, 'time_step': 0.00922395402227926, 'td_error': 0.9467585006394296, 'init_value': -4.300079345703125, 'ave_value': 0.9383941203962827} step=4788
2022-04-20 16:01.48 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160102/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:01.51 [info     ] TD3PlusBC_20220420160102: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.0003241035673353407, 'time_algorithm_update': 0.008754060979475054, 'critic_loss': 1.3129392146889927, 'actor_loss': 0.043726360327319094, 'time_step': 0.00915718148326316, 'td_error': 0.9683140155122693, 'init_value': -4.586923122406006, 'ave_value': 1.0191460139470527} step=5130
2022-04-20 16:01.51 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160102/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:01.55 [info     ] TD3PlusBC_20220420160102: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.0003227009410746613, 'time_algorithm_update': 0.00896512834649337, 'critic_loss': 1.4810439962566944, 'actor_loss': 0.0415850562092505, 'time_step': 0.00936588488127056, 'td_error': 0.9885539051278901, 'init_value': -5.0247321128845215, 'ave_value': 1.0396433682210307} step=5472
2022-04-20 16:01.55 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160102/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:01.59 [info     ] TD3PlusBC_20220420160102: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.00033004799781487004, 'time_algorithm_update': 0.008915902578342728, 'critic_loss': 1.5803893334858599, 'actor_loss': 0.06524032997021899, 'time_step': 0.009323652724773562, 'td_error': 1.0117176443341875, 'init_value': -5.160798072814941, 'ave_value': 1.1636885256714582} step=5814
2022-04-20 16:01.59 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160102/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:02.02 [info     ] TD3PlusBC_20220420160102: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.00033429631015710663, 'time_algorithm_update': 0.008434204330221254, 'critic_loss': 1.805222855412472, 'actor_loss': 0.05029227991985996, 'time_step': 0.008847777606451024, 'td_error': 1.0363680559545145, 'init_value': -5.57025146484375, 'ave_value': 1.1903173614079385} step=6156
2022-04-20 16:02.02 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160102/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:02.06 [info     ] TD3PlusBC_20220420160102: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.0003260597150925307, 'time_algorithm_update': 0.008963395280447619, 'critic_loss': 1.8955626929538292, 'actor_loss': 0.06825344597822741, 'time_step': 0.009367256136665567, 'td_error': 1.0538187624519653, 'init_value': -5.974595069885254, 'ave_value': 1.2118187326441208} step=6498
2022-04-20 16:02.06 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160102/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:02.09 [info     ] TD3PlusBC_20220420160102: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.0003267108348377964, 'time_algorithm_update': 0.008917681654991462, 'critic_loss': 2.0725739884097676, 'actor_loss': 0.061381015752316914, 'time_step': 0.009324001289947688, 'td_error': 1.0888495933851692, 'init_value': -6.199249267578125, 'ave_value': 1.2551912120902944} step=6840
2022-04-20 16:02.09 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160102/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:02.13 [info     ] TD3PlusBC_20220420160102: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.0003241314525492707, 'time_algorithm_update': 0.008818989608720032, 'critic_loss': 2.3657321377455838, 'actor_loss': 0.05468565127567241, 'time_step': 0.009221453415720086, 'td_error': 1.1092992936495285, 'init_value': -6.693446159362793, 'ave_value': 1.27476412332063} step=7182
2022-04-20 16:02.13 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160102/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:02.17 [info     ] TD3PlusBC_20220420160102: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.0003242890040079753, 'time_algorithm_update': 0.008934633773669862, 'critic_loss': 2.486781087012319, 'actor_loss': 0.06169032704150468, 'time_step': 0.009338311284606219, 'td_error': 1.1604064490218937, 'init_value': -6.851449489593506, 'ave_value': 1.4214598150396938} step=7524
2022-04-20 16:02.17 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160102/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:02.20 [info     ] TD3PlusBC_20220420160102: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.00032519875911244176, 'time_algorithm_update': 0.008517820235581426, 'critic_loss': 2.7950595375041516, 'actor_loss': 0.06175995811994313, 'time_step': 0.008923304708380448, 'td_error': 1.174340684100491, 'init_value': -7.286149501800537, 'ave_value': 1.451562947434885} step=7866
2022-04-20 16:02.20 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160102/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:02.24 [info     ] TD3PlusBC_20220420160102: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.00033243078934518913, 'time_algorithm_update': 0.008877423074510362, 'critic_loss': 2.822149618675834, 'actor_loss': 0.04974394853700671, 'time_step': 0.009289325329295377, 'td_error': 1.218705441784284, 'init_value': -7.486911773681641, 'ave_value': 1.4945251218407407} step=8208
2022-04-20 16:02.24 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160102/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:02.28 [info     ] TD3PlusBC_20220420160102: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.00032017175217120966, 'time_algorithm_update': 0.008865347382617973, 'critic_loss': 3.0171121909604435, 'actor_loss': 0.050327118573307295, 'time_step': 0.00926406899390862, 'td_error': 1.234295484934991, 'init_value': -7.905189514160156, 'ave_value': 1.5343551398077477} step=8550
2022-04-20 16:02.28 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160102/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:02.31 [info     ] TD3PlusBC_20220420160102: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.0003237020202547486, 'time_algorithm_update': 0.008711174217581052, 'critic_loss': 3.414972075419119, 'actor_loss': 0.06233164271590305, 'time_step': 0.00911411486173931, 'td_error': 1.2851083665812673, 'init_value': -8.386159896850586, 'ave_value': 1.53689951714682} step=8892
2022-04-20 16:02.31 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160102/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:02.35 [info     ] TD3PlusBC_20220420160102: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.00032567699053134136, 'time_algorithm_update': 0.008880717015405845, 'critic_loss': 3.497533706545133, 'actor_loss': 0.06027072090764492, 'time_step': 0.009286121318214819, 'td_error': 1.3187498987627604, 'init_value': -8.552881240844727, 'ave_value': 1.5998379705895813} step=9234
2022-04-20 16:02.35 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160102/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:02.39 [info     ] TD3PlusBC_20220420160102: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.00032959974300094514, 'time_algorithm_update': 0.008490972351609614, 'critic_loss': 3.7629034161916253, 'actor_loss': 0.07045082466905578, 'time_step': 0.008900682131449381, 'td_error': 1.323144829233918, 'init_value': -9.182953834533691, 'ave_value': 1.6359268236250055} step=9576
2022-04-20 16:02.39 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160102/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:02.42 [info     ] TD3PlusBC_20220420160102: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.00032644801669650606, 'time_algorithm_update': 0.008887124340436613, 'critic_loss': 4.0219476393788876, 'actor_loss': 0.06833815287079728, 'time_step': 0.009293082861872444, 'td_error': 1.37646729150203, 'init_value': -9.302325248718262, 'ave_value': 1.693949909391016} step=9918
2022-04-20 16:02.42 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160102/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:02.46 [info     ] TD3PlusBC_20220420160102: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.000326185895685564, 'time_algorithm_update': 0.008917678169339721, 'critic_loss': 4.182842639803189, 'actor_loss': 0.06846342667153008, 'time_step': 0.009324549234401413, 'td_error': 1.4156119071160471, 'init_value': -9.858869552612305, 'ave_value': 1.7199465003630634} step=10260
2022-04-20 16:02.46 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160102/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:02.50 [info     ] TD3PlusBC_20220420160102: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.00033006542607357626, 'time_algorithm_update': 0.00870556998671147, 'critic_loss': 4.4529920040864, 'actor_loss': 0.05875352702555601, 'time_step': 0.009116506715964156, 'td_error': 1.4300124014507807, 'init_value': -10.355581283569336, 'ave_value': 1.7492383728767031} step=10602
2022-04-20 16:02.50 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160102/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:02.54 [info     ] TD3PlusBC_20220420160102: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.00033129725539893435, 'time_algorithm_update': 0.008908513693781624, 'critic_loss': 4.7054575279093624, 'actor_loss': 0.06775318744064074, 'time_step': 0.009323072015193471, 'td_error': 1.5004503784308714, 'init_value': -10.286239624023438, 'ave_value': 1.8979182870263183} step=10944
2022-04-20 16:02.54 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160102/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:02.57 [info     ] TD3PlusBC_20220420160102: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.00032365252400002285, 'time_algorithm_update': 0.0085966740435327, 'critic_loss': 4.859158571003474, 'actor_loss': 0.06122672634079442, 'time_step': 0.008997855130691974, 'td_error': 1.547057898260276, 'init_value': -10.878252029418945, 'ave_value': 1.8062419562200336} step=11286
2022-04-20 16:02.57 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160102/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:03.01 [info     ] TD3PlusBC_20220420160102: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.00032432525478608425, 'time_algorithm_update': 0.008842417371203328, 'critic_loss': 5.134073232698162, 'actor_loss': 0.07507791025945318, 'time_step': 0.009245855069299888, 'td_error': 1.5887813878980301, 'init_value': -11.289068222045898, 'ave_value': 1.8416175318091503} step=11628
2022-04-20 16:03.01 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160102/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:03.05 [info     ] TD3PlusBC_20220420160102: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.00032507885269254273, 'time_algorithm_update': 0.008955260466413888, 'critic_loss': 5.449137591130552, 'actor_loss': 0.0677774738934305, 'time_step': 0.00935797593746966, 'td_error': 1.64417099398676, 'init_value': -11.416781425476074, 'ave_value': 1.891856481062819} step=11970
2022-04-20 16:03.05 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160102/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:03.08 [info     ] TD3PlusBC_20220420160102: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.0003251137092099552, 'time_algorithm_update': 0.008542813055696543, 'critic_loss': 5.5593707015988425, 'actor_loss': 0.08401349984240114, 'time_step': 0.008946560279667726, 'td_error': 1.6771950753327196, 'init_value': -11.740094184875488, 'ave_value': 1.9658389462985366} step=12312
2022-04-20 16:03.08 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160102/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:03.12 [info     ] TD3PlusBC_20220420160102: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.0003249742831403052, 'time_algorithm_update': 0.008942148838823999, 'critic_loss': 6.094763153129154, 'actor_loss': 0.05251717994436186, 'time_step': 0.00934422713274147, 'td_error': 1.737961229987045, 'init_value': -12.077054977416992, 'ave_value': 2.0226435073524263} step=12654
2022-04-20 16:03.12 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160102/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:03.15 [info     ] TD3PlusBC_20220420160102: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.0003234433848955478, 'time_algorithm_update': 0.008521310767235115, 'critic_loss': 6.192297989862007, 'actor_loss': 0.0668623085128285, 'time_step': 0.008922688445152595, 'td_error': 1.7896803881958423, 'init_value': -12.704442977905273, 'ave_value': 2.0554846566003486} step=12996
2022-04-20 16:03.15 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160102/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:03.19 [info     ] TD3PlusBC_20220420160102: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.0003281880540457385, 'time_algorithm_update': 0.008942061697530467, 'critic_loss': 6.513570535252666, 'actor_loss': 0.0726819635308974, 'time_step': 0.00934859047159117, 'td_error': 1.8602164549367457, 'init_value': -13.563993453979492, 'ave_value': 1.9165581552167466} step=13338
2022-04-20 16:03.19 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160102/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:03.23 [info     ] TD3PlusBC_20220420160102: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.00032200869063884894, 'time_algorithm_update': 0.00887283037977609, 'critic_loss': 6.828686419111944, 'actor_loss': 0.07379246788502436, 'time_step': 0.00927291348663687, 'td_error': 1.894293439118429, 'init_value': -13.5552978515625, 'ave_value': 2.1387485266721864} step=13680
2022-04-20 16:03.23 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160102/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:03.26 [info     ] TD3PlusBC_20220420160102: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.00032690742559600297, 'time_algorithm_update': 0.00843195887336954, 'critic_loss': 7.136350943679698, 'actor_loss': 0.061117401597095514, 'time_step': 0.008838848063820287, 'td_error': 1.9377119262635187, 'init_value': -13.73351764678955, 'ave_value': 2.1999438360581793} step=14022
2022-04-20 16:03.26 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160102/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:03.30 [info     ] TD3PlusBC_20220420160102: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.00033430606998198216, 'time_algorithm_update': 0.00885191437793754, 'critic_loss': 7.34590416623835, 'actor_loss': 0.10069426571765142, 'time_step': 0.009264509580288714, 'td_error': 2.0176246634963046, 'init_value': -13.914093017578125, 'ave_value': 2.197773984365652} step=14364
2022-04-20 16:03.30 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160102/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:03.34 [info     ] TD3PlusBC_20220420160102: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.00032326910230848525, 'time_algorithm_update': 0.008392427399841666, 'critic_loss': 7.705250801050175, 'actor_loss': 0.07587915338706552, 'time_step': 0.00879644859603971, 'td_error': 2.066269348733737, 'init_value': -14.5802640914917, 'ave_value': 2.163654876310412} step=14706
2022-04-20 16:03.34 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160102/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:03.37 [info     ] TD3PlusBC_20220420160102: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.0003305032239322774, 'time_algorithm_update': 0.008935632761458905, 'critic_loss': 8.03412072316945, 'actor_loss': 0.05264224580418297, 'time_step': 0.00934337523945591, 'td_error': 2.118366142245684, 'init_value': -14.864501953125, 'ave_value': 2.1329899159112427} step=15048
2022-04-20 16:03.37 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160102/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:03.41 [info     ] TD3PlusBC_20220420160102: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.0003319699861849958, 'time_algorithm_update': 0.008991375304104989, 'critic_loss': 8.329625828572881, 'actor_loss': 0.07890820490163669, 'time_step': 0.009402942239192495, 'td_error': 2.197458152320113, 'init_value': -15.207555770874023, 'ave_value': 2.2913786252551955} step=15390
2022-04-20 16:03.41 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160102/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:03.45 [info     ] TD3PlusBC_20220420160102: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.00032337994603385704, 'time_algorithm_update': 0.008518961437961511, 'critic_loss': 8.683918874863295, 'actor_loss': 0.07878147980623078, 'time_step': 0.008921623229980469, 'td_error': 2.267823606352336, 'init_value': -15.30566692352295, 'ave_value': 2.400957591411498} step=15732
2022-04-20 16:03.45 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160102/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:03.48 [info     ] TD3PlusBC_20220420160102: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.0003269450706348085, 'time_algorithm_update': 0.008931256874262938, 'critic_loss': 8.92541386823208, 'actor_loss': 0.08740829419322878, 'time_step': 0.009338064500462939, 'td_error': 2.317569584340028, 'init_value': -15.853841781616211, 'ave_value': 2.349465339273248} step=16074
2022-04-20 16:03.48 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160102/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:03.52 [info     ] TD3PlusBC_20220420160102: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.00032365252400002285, 'time_algorithm_update': 0.008483469834801747, 'critic_loss': 9.263364741676732, 'actor_loss': 0.09060976355832223, 'time_step': 0.008887303502936112, 'td_error': 2.396764854395445, 'init_value': -16.290136337280273, 'ave_value': 2.348378702667416} step=16416
2022-04-20 16:03.52 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160102/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:03.56 [info     ] TD3PlusBC_20220420160102: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.00032680215891341715, 'time_algorithm_update': 0.008982408116435447, 'critic_loss': 9.714454603125477, 'actor_loss': 0.08830730889362899, 'time_step': 0.009386936823527018, 'td_error': 2.455767070802192, 'init_value': -16.50439453125, 'ave_value': 2.451633053749783} step=16758
2022-04-20 16:03.56 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160102/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:03.59 [info     ] TD3PlusBC_20220420160102: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.00032333323830052425, 'time_algorithm_update': 0.008903126967580695, 'critic_loss': 9.934374481613872, 'actor_loss': 0.067507094066394, 'time_step': 0.009305558009454382, 'td_error': 2.5180551944491927, 'init_value': -17.203887939453125, 'ave_value': 2.3478645567797325} step=17100
2022-04-20 16:03.59 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160102/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 102 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.28189108e-01 

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 16:04.00 [info     ] FQE_20220420160400: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.0001589289630752012, 'time_algorithm_update': 0.0042113783847854795, 'loss': 0.007280851149067552, 'time_step': 0.00444559016859675, 'init_value': -0.32643118500709534, 'ave_value': -0.2903456049560158, 'soft_opc': nan} step=166




2022-04-20 16:04.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160400/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:04.01 [info     ] FQE_20220420160400: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00016273935157132437, 'time_algorithm_update': 0.005119168614766684, 'loss': 0.005169394950618048, 'time_step': 0.0053577796522393285, 'init_value': -0.4964022934436798, 'ave_value': -0.4129939686523767, 'soft_opc': nan} step=332




2022-04-20 16:04.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160400/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:04.02 [info     ] FQE_20220420160400: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.0001633397067885801, 'time_algorithm_update': 0.005084764526551028, 'loss': 0.004705412945912096, 'time_step': 0.005316844905715391, 'init_value': -0.5481293797492981, 'ave_value': -0.4149177723463524, 'soft_opc': nan} step=498




2022-04-20 16:04.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160400/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:04.03 [info     ] FQE_20220420160400: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00016515226249235222, 'time_algorithm_update': 0.005166440124971321, 'loss': 0.00465092964537711, 'time_step': 0.005405177553016019, 'init_value': -0.6313756704330444, 'ave_value': -0.47639562051726486, 'soft_opc': nan} step=664




2022-04-20 16:04.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160400/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:04.04 [info     ] FQE_20220420160400: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.0001560262886874647, 'time_algorithm_update': 0.005026027380702007, 'loss': 0.004247971918400243, 'time_step': 0.00524993976914739, 'init_value': -0.7262439727783203, 'ave_value': -0.5318121828831027, 'soft_opc': nan} step=830




2022-04-20 16:04.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160400/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:04.05 [info     ] FQE_20220420160400: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00016377201999526425, 'time_algorithm_update': 0.005113189478954637, 'loss': 0.0040274768259870005, 'time_step': 0.005351172872336514, 'init_value': -0.7385789155960083, 'ave_value': -0.523139315353589, 'soft_opc': nan} step=996




2022-04-20 16:04.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160400/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:04.06 [info     ] FQE_20220420160400: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.0001645432897360928, 'time_algorithm_update': 0.00502234769154744, 'loss': 0.003967671963689198, 'time_step': 0.0052545731326183644, 'init_value': -0.8091561198234558, 'ave_value': -0.5535215830795366, 'soft_opc': nan} step=1162




2022-04-20 16:04.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160400/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:04.07 [info     ] FQE_20220420160400: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.0001666933657175087, 'time_algorithm_update': 0.005013059420757983, 'loss': 0.0038389279696450806, 'time_step': 0.005250935094902314, 'init_value': -0.9230307340621948, 'ave_value': -0.640877063234278, 'soft_opc': nan} step=1328




2022-04-20 16:04.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160400/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:04.08 [info     ] FQE_20220420160400: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00016148406338978963, 'time_algorithm_update': 0.005101629050381212, 'loss': 0.0036196622812379913, 'time_step': 0.005336893610207431, 'init_value': -0.9573962688446045, 'ave_value': -0.6556151858782647, 'soft_opc': nan} step=1494




2022-04-20 16:04.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160400/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:04.09 [info     ] FQE_20220420160400: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00016280685562685313, 'time_algorithm_update': 0.0044666370713567155, 'loss': 0.0036605252696380736, 'time_step': 0.00470076267977795, 'init_value': -1.0455784797668457, 'ave_value': -0.728838854585984, 'soft_opc': nan} step=1660




2022-04-20 16:04.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160400/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:04.10 [info     ] FQE_20220420160400: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00015983380467058663, 'time_algorithm_update': 0.005065038979771626, 'loss': 0.003543911700933633, 'time_step': 0.0052984134260430395, 'init_value': -1.1596577167510986, 'ave_value': -0.7960587406489025, 'soft_opc': nan} step=1826




2022-04-20 16:04.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160400/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:04.11 [info     ] FQE_20220420160400: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00015920472432331867, 'time_algorithm_update': 0.004998422530760248, 'loss': 0.0035245291193027094, 'time_step': 0.0052309754383133115, 'init_value': -1.2028741836547852, 'ave_value': -0.8166835626931274, 'soft_opc': nan} step=1992




2022-04-20 16:04.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160400/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:04.12 [info     ] FQE_20220420160400: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00016323629632053604, 'time_algorithm_update': 0.0050342987818890305, 'loss': 0.0035477406794137984, 'time_step': 0.0052745270441813645, 'init_value': -1.2951855659484863, 'ave_value': -0.8672288076196973, 'soft_opc': nan} step=2158




2022-04-20 16:04.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160400/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:04.13 [info     ] FQE_20220420160400: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00016270200890230844, 'time_algorithm_update': 0.005017691347972456, 'loss': 0.0035963534175541744, 'time_step': 0.005254104912999165, 'init_value': -1.3967828750610352, 'ave_value': -0.9435114613079743, 'soft_opc': nan} step=2324




2022-04-20 16:04.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160400/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:04.14 [info     ] FQE_20220420160400: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00016496267663427145, 'time_algorithm_update': 0.005053534565201725, 'loss': 0.003924712407576616, 'time_step': 0.00529402566243367, 'init_value': -1.459424614906311, 'ave_value': -0.9859843121489158, 'soft_opc': nan} step=2490




2022-04-20 16:04.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160400/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:04.15 [info     ] FQE_20220420160400: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.0001894824476127165, 'time_algorithm_update': 0.0063864825719810395, 'loss': 0.004077412082157266, 'time_step': 0.006648172815162015, 'init_value': -1.573957920074463, 'ave_value': -1.0499602718631158, 'soft_opc': nan} step=2656




2022-04-20 16:04.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160400/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:04.16 [info     ] FQE_20220420160400: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.0001590869512902685, 'time_algorithm_update': 0.005070192268095821, 'loss': 0.004084832168602194, 'time_step': 0.005303734756377806, 'init_value': -1.6340463161468506, 'ave_value': -1.0984546337511625, 'soft_opc': nan} step=2822




2022-04-20 16:04.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160400/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:04.17 [info     ] FQE_20220420160400: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.0001598898186741105, 'time_algorithm_update': 0.004969181784664292, 'loss': 0.004311023110310626, 'time_step': 0.0052022977047655955, 'init_value': -1.7282482385635376, 'ave_value': -1.1697120978857751, 'soft_opc': nan} step=2988




2022-04-20 16:04.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160400/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:04.18 [info     ] FQE_20220420160400: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00016183881874544075, 'time_algorithm_update': 0.004151621496821025, 'loss': 0.00472723748022971, 'time_step': 0.004384605281324272, 'init_value': -1.8338872194290161, 'ave_value': -1.2511979768740702, 'soft_opc': nan} step=3154




2022-04-20 16:04.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160400/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:04.19 [info     ] FQE_20220420160400: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00016689444162759436, 'time_algorithm_update': 0.005168037242200001, 'loss': 0.005208028410280864, 'time_step': 0.0054085211581494435, 'init_value': -1.9166615009307861, 'ave_value': -1.3164723125697524, 'soft_opc': nan} step=3320




2022-04-20 16:04.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160400/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:04.20 [info     ] FQE_20220420160400: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00016530881445091892, 'time_algorithm_update': 0.005152481148041874, 'loss': 0.005901921791212454, 'time_step': 0.005388532776430428, 'init_value': -2.094564437866211, 'ave_value': -1.462742377542429, 'soft_opc': nan} step=3486




2022-04-20 16:04.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160400/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:04.21 [info     ] FQE_20220420160400: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.0001615515674453184, 'time_algorithm_update': 0.0050846381359789744, 'loss': 0.0060856587454081925, 'time_step': 0.005319715982460114, 'init_value': -2.1484694480895996, 'ave_value': -1.505235532919566, 'soft_opc': nan} step=3652




2022-04-20 16:04.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160400/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:04.22 [info     ] FQE_20220420160400: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00016268908259380296, 'time_algorithm_update': 0.005122747765966208, 'loss': 0.006327319025561348, 'time_step': 0.005359069410576878, 'init_value': -2.2137930393218994, 'ave_value': -1.5562768394681248, 'soft_opc': nan} step=3818




2022-04-20 16:04.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160400/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:04.23 [info     ] FQE_20220420160400: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.0001661921121987952, 'time_algorithm_update': 0.005044185971639243, 'loss': 0.006914649330533996, 'time_step': 0.005284260554486011, 'init_value': -2.2719855308532715, 'ave_value': -1.5885466359555722, 'soft_opc': nan} step=3984




2022-04-20 16:04.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160400/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:04.24 [info     ] FQE_20220420160400: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00016220506415309677, 'time_algorithm_update': 0.005021267626658979, 'loss': 0.007456919244989887, 'time_step': 0.005259623010474515, 'init_value': -2.3317055702209473, 'ave_value': -1.6241381636879466, 'soft_opc': nan} step=4150




2022-04-20 16:04.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160400/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:04.25 [info     ] FQE_20220420160400: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00016662011663597752, 'time_algorithm_update': 0.005024107105760689, 'loss': 0.00751862668710009, 'time_step': 0.005266149360013296, 'init_value': -2.418680191040039, 'ave_value': -1.6720019008863616, 'soft_opc': nan} step=4316




2022-04-20 16:04.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160400/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:04.26 [info     ] FQE_20220420160400: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.0001621145799935582, 'time_algorithm_update': 0.0050712378628282664, 'loss': 0.007986800784752873, 'time_step': 0.005309508507510266, 'init_value': -2.5437214374542236, 'ave_value': -1.7382878414413951, 'soft_opc': nan} step=4482




2022-04-20 16:04.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160400/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:04.26 [info     ] FQE_20220420160400: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00015581085021237293, 'time_algorithm_update': 0.0043961901262582065, 'loss': 0.008832638829046053, 'time_step': 0.004623831036579178, 'init_value': -2.6548328399658203, 'ave_value': -1.848377533438238, 'soft_opc': nan} step=4648




2022-04-20 16:04.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160400/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:04.27 [info     ] FQE_20220420160400: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00016334114304508072, 'time_algorithm_update': 0.005114131663219038, 'loss': 0.009719239875003808, 'time_step': 0.005350733377847327, 'init_value': -2.819275379180908, 'ave_value': -1.9950559400760375, 'soft_opc': nan} step=4814




2022-04-20 16:04.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160400/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:04.28 [info     ] FQE_20220420160400: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00016356663531567677, 'time_algorithm_update': 0.005053942462047899, 'loss': 0.010224514720114285, 'time_step': 0.00529158402638263, 'init_value': -2.871865749359131, 'ave_value': -2.001694924834075, 'soft_opc': nan} step=4980




2022-04-20 16:04.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160400/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:04.29 [info     ] FQE_20220420160400: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00016049017389136623, 'time_algorithm_update': 0.005190286291650979, 'loss': 0.01028321702622371, 'time_step': 0.0054250639605234905, 'init_value': -2.992804765701294, 'ave_value': -2.10285004797551, 'soft_opc': nan} step=5146




2022-04-20 16:04.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160400/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:04.30 [info     ] FQE_20220420160400: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00015970023281602975, 'time_algorithm_update': 0.005096704126840614, 'loss': 0.01100776106692817, 'time_step': 0.005331102623996964, 'init_value': -3.1227221488952637, 'ave_value': -2.2213313769381324, 'soft_opc': nan} step=5312




2022-04-20 16:04.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160400/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:04.31 [info     ] FQE_20220420160400: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.0001633267804800746, 'time_algorithm_update': 0.0050546376101941945, 'loss': 0.011910654578837622, 'time_step': 0.005292139857648367, 'init_value': -3.16939640045166, 'ave_value': -2.250692245646103, 'soft_opc': nan} step=5478




2022-04-20 16:04.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160400/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:04.32 [info     ] FQE_20220420160400: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.0001599961016551558, 'time_algorithm_update': 0.005020683070263231, 'loss': 0.012921375716918608, 'time_step': 0.005254960921873529, 'init_value': -3.3168282508850098, 'ave_value': -2.3757130955522126, 'soft_opc': nan} step=5644




2022-04-20 16:04.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160400/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:04.33 [info     ] FQE_20220420160400: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00015822376113340078, 'time_algorithm_update': 0.005108115184737976, 'loss': 0.013666634698937008, 'time_step': 0.005340324826987393, 'init_value': -3.3682713508605957, 'ave_value': -2.414164032350789, 'soft_opc': nan} step=5810




2022-04-20 16:04.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160400/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:04.34 [info     ] FQE_20220420160400: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00016039538096232587, 'time_algorithm_update': 0.005063227860324354, 'loss': 0.013649314454706183, 'time_step': 0.0052997534533581105, 'init_value': -3.4036879539489746, 'ave_value': -2.4564731828532778, 'soft_opc': nan} step=5976




2022-04-20 16:04.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160400/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:04.35 [info     ] FQE_20220420160400: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00016034654824130507, 'time_algorithm_update': 0.0046938700848315135, 'loss': 0.014166019982111314, 'time_step': 0.004922497703368406, 'init_value': -3.454591751098633, 'ave_value': -2.4771107901055536, 'soft_opc': nan} step=6142




2022-04-20 16:04.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160400/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:04.36 [info     ] FQE_20220420160400: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.0001573232283075172, 'time_algorithm_update': 0.004679790462356016, 'loss': 0.014823978406332254, 'time_step': 0.004912520029458655, 'init_value': -3.545058250427246, 'ave_value': -2.5642919878970396, 'soft_opc': nan} step=6308




2022-04-20 16:04.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160400/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:04.37 [info     ] FQE_20220420160400: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00016208154609404415, 'time_algorithm_update': 0.005119961428355022, 'loss': 0.015102578230982041, 'time_step': 0.005354312529046851, 'init_value': -3.6094632148742676, 'ave_value': -2.63749693578428, 'soft_opc': nan} step=6474




2022-04-20 16:04.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160400/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:04.38 [info     ] FQE_20220420160400: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00016078747898699288, 'time_algorithm_update': 0.005105179476450725, 'loss': 0.01624145348351561, 'time_step': 0.0053419262529855755, 'init_value': -3.6399617195129395, 'ave_value': -2.6771682448483802, 'soft_opc': nan} step=6640




2022-04-20 16:04.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160400/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:04.39 [info     ] FQE_20220420160400: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00016601832516222116, 'time_algorithm_update': 0.005122268056295004, 'loss': 0.016384912665978253, 'time_step': 0.005361455032624394, 'init_value': -3.692312240600586, 'ave_value': -2.716830356231144, 'soft_opc': nan} step=6806




2022-04-20 16:04.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160400/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:04.40 [info     ] FQE_20220420160400: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00016108047531311772, 'time_algorithm_update': 0.005081867597189294, 'loss': 0.017261455202312488, 'time_step': 0.0053156068526118636, 'init_value': -3.816990375518799, 'ave_value': -2.797988767505766, 'soft_opc': nan} step=6972




2022-04-20 16:04.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160400/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:04.41 [info     ] FQE_20220420160400: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00016100291746208467, 'time_algorithm_update': 0.005087048174387001, 'loss': 0.017708430282002967, 'time_step': 0.00531893178641078, 'init_value': -3.8526883125305176, 'ave_value': -2.802452505225534, 'soft_opc': nan} step=7138




2022-04-20 16:04.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160400/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:04.42 [info     ] FQE_20220420160400: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00016611886311726398, 'time_algorithm_update': 0.005087043865617499, 'loss': 0.018074981204138686, 'time_step': 0.005327214677649808, 'init_value': -3.939307689666748, 'ave_value': -2.8748847285637984, 'soft_opc': nan} step=7304




2022-04-20 16:04.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160400/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:04.43 [info     ] FQE_20220420160400: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00016170955566038568, 'time_algorithm_update': 0.005153462111231792, 'loss': 0.01897489293113755, 'time_step': 0.005390139947454613, 'init_value': -3.9660301208496094, 'ave_value': -2.864334780850389, 'soft_opc': nan} step=7470




2022-04-20 16:04.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160400/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:04.44 [info     ] FQE_20220420160400: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00016255838325224728, 'time_algorithm_update': 0.004828237625489752, 'loss': 0.019556743592509147, 'time_step': 0.005062547074743064, 'init_value': -4.081170082092285, 'ave_value': -2.984312324494392, 'soft_opc': nan} step=7636




2022-04-20 16:04.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160400/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:04.44 [info     ] FQE_20220420160400: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00015944745167192207, 'time_algorithm_update': 0.004587094467806529, 'loss': 0.020551121764934163, 'time_step': 0.00481722153813006, 'init_value': -4.220184803009033, 'ave_value': -3.072901019064693, 'soft_opc': nan} step=7802




2022-04-20 16:04.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160400/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:04.45 [info     ] FQE_20220420160400: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00016650234360292735, 'time_algorithm_update': 0.005142591085778661, 'loss': 0.02119625277932262, 'time_step': 0.005385855594313288, 'init_value': -4.290848255157471, 'ave_value': -3.0871900636065113, 'soft_opc': nan} step=7968




2022-04-20 16:04.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160400/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:04.46 [info     ] FQE_20220420160400: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.0001603767096278179, 'time_algorithm_update': 0.005070946302758642, 'loss': 0.02169301653917632, 'time_step': 0.005302337278802711, 'init_value': -4.298898696899414, 'ave_value': -3.08777990835207, 'soft_opc': nan} step=8134




2022-04-20 16:04.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160400/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:04.47 [info     ] FQE_20220420160400: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00016504454325480633, 'time_algorithm_update': 0.0050592939537691785, 'loss': 0.019816127598723286, 'time_step': 0.005300371043653373, 'init_value': -4.341730117797852, 'ave_value': -3.096216742466162, 'soft_opc': nan} step=8300




2022-04-20 16:04.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160400/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.43210892e-01 -3.25999953e-02
 -1.38000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.28049971e-01  6.00000000e-01  4.67532035e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.87189108e-01  2.46000047e-02
 -8.00013420e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.61927906e-02  2.08952959e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.53789108e-01  1.32000047e-02
  8.79998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -9.71586383e-02 -6.00000000e-01 -5.33093061e-02]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.39489108e-01 -4.75999953e-02
 -9.30001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  1.41298638e-01  5.43892365e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.9573146

2022-04-20 16:04.48 [info     ] Directory is created at d3rlpy_logs/FQE_20220420160448
2022-04-20 16:04.48 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 16:04.48 [debug    ] Building models...
2022-04-20 16:04.48 [debug    ] Models have been built.
2022-04-20 16:04.48 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420160448/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 16:04.50 [info     ] FQE_20220420160448: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00016686320304870605, 'time_algorithm_update': 0.005014744608901268, 'loss': 0.03233492645424206, 'time_step': 0.005259310783341874, 'init_value': -1.4951285123825073, 'ave_value': -1.4722789835553984, 'soft_opc': nan} step=344




2022-04-20 16:04.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160448/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:04.52 [info     ] FQE_20220420160448: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00017041314479916594, 'time_algorithm_update': 0.005161685999049697, 'loss': 0.026788015958181647, 'time_step': 0.005407935658166575, 'init_value': -1.7313194274902344, 'ave_value': -1.7469559487525945, 'soft_opc': nan} step=688




2022-04-20 16:04.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160448/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:04.54 [info     ] FQE_20220420160448: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00016808994980745538, 'time_algorithm_update': 0.004636916310288185, 'loss': 0.03222461133047418, 'time_step': 0.0048793408759804655, 'init_value': -2.099351167678833, 'ave_value': -2.1877527193913053, 'soft_opc': nan} step=1032




2022-04-20 16:04.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160448/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:04.56 [info     ] FQE_20220420160448: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00017211534256158874, 'time_algorithm_update': 0.005123873089635095, 'loss': 0.03869345230808438, 'time_step': 0.005374416362407596, 'init_value': -2.2004776000976562, 'ave_value': -2.3722676652055736, 'soft_opc': nan} step=1376




2022-04-20 16:04.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160448/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:04.58 [info     ] FQE_20220420160448: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.0001683165860730548, 'time_algorithm_update': 0.005134097365445869, 'loss': 0.04855486880283976, 'time_step': 0.005379405132559843, 'init_value': -2.4580609798431396, 'ave_value': -2.6757987815500783, 'soft_opc': nan} step=1720




2022-04-20 16:04.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160448/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:05.00 [info     ] FQE_20220420160448: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.0001726899036141329, 'time_algorithm_update': 0.005049431046774221, 'loss': 0.05775436936325372, 'time_step': 0.005296628835589387, 'init_value': -2.531193971633911, 'ave_value': -2.777080277501614, 'soft_opc': nan} step=2064




2022-04-20 16:05.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160448/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:05.01 [info     ] FQE_20220420160448: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00017060096873793492, 'time_algorithm_update': 0.004690362963565561, 'loss': 0.07730552163733126, 'time_step': 0.004938396603562111, 'init_value': -2.868955135345459, 'ave_value': -3.1375150446813587, 'soft_opc': nan} step=2408




2022-04-20 16:05.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160448/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:05.03 [info     ] FQE_20220420160448: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00017164820848509322, 'time_algorithm_update': 0.005132111006004866, 'loss': 0.09716556063138468, 'time_step': 0.005379849395086599, 'init_value': -3.0029408931732178, 'ave_value': -3.2834713639603375, 'soft_opc': nan} step=2752




2022-04-20 16:05.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160448/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:05.05 [info     ] FQE_20220420160448: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00017003888307615768, 'time_algorithm_update': 0.005102914433146632, 'loss': 0.11973282688860457, 'time_step': 0.005348802999008534, 'init_value': -3.1849191188812256, 'ave_value': -3.409967677728147, 'soft_opc': nan} step=3096




2022-04-20 16:05.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160448/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:05.07 [info     ] FQE_20220420160448: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00017014492389767668, 'time_algorithm_update': 0.005101501248603643, 'loss': 0.1503012869723661, 'time_step': 0.005345218403394832, 'init_value': -3.160867214202881, 'ave_value': -3.333319532615762, 'soft_opc': nan} step=3440




2022-04-20 16:05.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160448/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:05.09 [info     ] FQE_20220420160448: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.000172958817592887, 'time_algorithm_update': 0.005142008842423905, 'loss': 0.1816283395178183, 'time_step': 0.00539139051770055, 'init_value': -3.1806437969207764, 'ave_value': -3.3690477285634826, 'soft_opc': nan} step=3784




2022-04-20 16:05.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160448/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:05.11 [info     ] FQE_20220420160448: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00016259939171547113, 'time_algorithm_update': 0.004681012658185737, 'loss': 0.2177637830961409, 'time_step': 0.004918256471323413, 'init_value': -3.198479652404785, 'ave_value': -3.4501519640232226, 'soft_opc': nan} step=4128




2022-04-20 16:05.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160448/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:05.13 [info     ] FQE_20220420160448: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00015706447667853776, 'time_algorithm_update': 0.0050550241803014005, 'loss': 0.2612195884873874, 'time_step': 0.005277894957121028, 'init_value': -3.113058567047119, 'ave_value': -3.432174165799442, 'soft_opc': nan} step=4472




2022-04-20 16:05.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160448/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:05.15 [info     ] FQE_20220420160448: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.0001505932142568189, 'time_algorithm_update': 0.004931501870931581, 'loss': 0.3115246759424376, 'time_step': 0.005147566629010577, 'init_value': -2.807586908340454, 'ave_value': -3.111256808289201, 'soft_opc': nan} step=4816




2022-04-20 16:05.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160448/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:05.17 [info     ] FQE_20220420160448: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00016418931096099144, 'time_algorithm_update': 0.005025788795116336, 'loss': 0.35410783548167973, 'time_step': 0.005261736553768779, 'init_value': -2.6151485443115234, 'ave_value': -2.823276389450221, 'soft_opc': nan} step=5160




2022-04-20 16:05.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160448/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:05.19 [info     ] FQE_20220420160448: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.0001689763956291731, 'time_algorithm_update': 0.004541515610938848, 'loss': 0.41724065202829796, 'time_step': 0.004782363425853641, 'init_value': -2.774275302886963, 'ave_value': -3.054475211016498, 'soft_opc': nan} step=5504




2022-04-20 16:05.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160448/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:05.21 [info     ] FQE_20220420160448: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00016932224118432334, 'time_algorithm_update': 0.005115220713060956, 'loss': 0.4606314974858664, 'time_step': 0.0053605735301971436, 'init_value': -2.5192532539367676, 'ave_value': -2.8031966084647957, 'soft_opc': nan} step=5848




2022-04-20 16:05.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160448/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:05.23 [info     ] FQE_20220420160448: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00017214583796124126, 'time_algorithm_update': 0.005090831324111584, 'loss': 0.5111113429870889, 'time_step': 0.0053418971771417665, 'init_value': -2.2578063011169434, 'ave_value': -2.5306048208344225, 'soft_opc': nan} step=6192




2022-04-20 16:05.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160448/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:05.25 [info     ] FQE_20220420160448: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00016968679982562397, 'time_algorithm_update': 0.0050827819247578464, 'loss': 0.5678329343344409, 'time_step': 0.005328655935997187, 'init_value': -2.1461992263793945, 'ave_value': -2.402722419873887, 'soft_opc': nan} step=6536




2022-04-20 16:05.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160448/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:05.27 [info     ] FQE_20220420160448: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00016956828361333801, 'time_algorithm_update': 0.004789019046827804, 'loss': 0.6081069818979432, 'time_step': 0.005033472249674243, 'init_value': -1.8181527853012085, 'ave_value': -2.1026347592791863, 'soft_opc': nan} step=6880




2022-04-20 16:05.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160448/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:05.29 [info     ] FQE_20220420160448: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00017073819803637127, 'time_algorithm_update': 0.0049941685310629915, 'loss': 0.6699212028666638, 'time_step': 0.00523943540661834, 'init_value': -2.239452838897705, 'ave_value': -2.4617916320207285, 'soft_opc': nan} step=7224




2022-04-20 16:05.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160448/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:05.31 [info     ] FQE_20220420160448: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00017192821170008458, 'time_algorithm_update': 0.005100314700326254, 'loss': 0.7060479921137177, 'time_step': 0.005348577055820199, 'init_value': -2.0718445777893066, 'ave_value': -1.9855539242124505, 'soft_opc': nan} step=7568




2022-04-20 16:05.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160448/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:05.33 [info     ] FQE_20220420160448: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00017185821089633676, 'time_algorithm_update': 0.005135018465130828, 'loss': 0.7436617982335562, 'time_step': 0.005381240401157113, 'init_value': -2.506075382232666, 'ave_value': -2.0650576234778373, 'soft_opc': nan} step=7912




2022-04-20 16:05.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160448/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:05.35 [info     ] FQE_20220420160448: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00017289782679358193, 'time_algorithm_update': 0.005107600328534148, 'loss': 0.7654800360567521, 'time_step': 0.005356705465982127, 'init_value': -2.6183369159698486, 'ave_value': -2.018008744115899, 'soft_opc': nan} step=8256




2022-04-20 16:05.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160448/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:05.36 [info     ] FQE_20220420160448: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00016806777133498082, 'time_algorithm_update': 0.004622023466021516, 'loss': 0.7985184982765553, 'time_step': 0.0048656089361323865, 'init_value': -2.980747938156128, 'ave_value': -2.194114045977492, 'soft_opc': nan} step=8600




2022-04-20 16:05.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160448/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:05.38 [info     ] FQE_20220420160448: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00017187137936436854, 'time_algorithm_update': 0.005026042461395264, 'loss': 0.8119303498763678, 'time_step': 0.005273963129797647, 'init_value': -3.2021737098693848, 'ave_value': -2.340038198977709, 'soft_opc': nan} step=8944




2022-04-20 16:05.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160448/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:05.40 [info     ] FQE_20220420160448: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00017115196516347486, 'time_algorithm_update': 0.004998513432436211, 'loss': 0.8242035898011785, 'time_step': 0.0052454520103543305, 'init_value': -3.301703929901123, 'ave_value': -2.3754207175636144, 'soft_opc': nan} step=9288




2022-04-20 16:05.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160448/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:05.42 [info     ] FQE_20220420160448: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00017250623813895293, 'time_algorithm_update': 0.005112106023832809, 'loss': 0.8262078812844011, 'time_step': 0.00536333960156108, 'init_value': -3.5248234272003174, 'ave_value': -2.3856137780153803, 'soft_opc': nan} step=9632




2022-04-20 16:05.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160448/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:05.44 [info     ] FQE_20220420160448: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00017303436301475348, 'time_algorithm_update': 0.004719011312307313, 'loss': 0.8499900107488556, 'time_step': 0.004966446826624316, 'init_value': -4.023735523223877, 'ave_value': -2.6638405504569342, 'soft_opc': nan} step=9976




2022-04-20 16:05.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160448/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:05.46 [info     ] FQE_20220420160448: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00017276267672694005, 'time_algorithm_update': 0.005174964666366577, 'loss': 0.87588764348089, 'time_step': 0.005422637213108151, 'init_value': -4.165437698364258, 'ave_value': -2.5306359341451983, 'soft_opc': nan} step=10320




2022-04-20 16:05.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160448/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:05.48 [info     ] FQE_20220420160448: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00017457507377447083, 'time_algorithm_update': 0.005082047262857127, 'loss': 0.901794180024935, 'time_step': 0.005334147880243701, 'init_value': -4.677342891693115, 'ave_value': -2.9361217409473013, 'soft_opc': nan} step=10664




2022-04-20 16:05.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160448/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:05.50 [info     ] FQE_20220420160448: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.0001700596753941026, 'time_algorithm_update': 0.00501308330269747, 'loss': 0.9253971478576923, 'time_step': 0.005259025235508763, 'init_value': -5.010210037231445, 'ave_value': -3.0403290471448017, 'soft_opc': nan} step=11008




2022-04-20 16:05.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160448/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:05.52 [info     ] FQE_20220420160448: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00017111939053202784, 'time_algorithm_update': 0.0050478917221690334, 'loss': 0.9385284489713782, 'time_step': 0.005295159511787947, 'init_value': -5.749818801879883, 'ave_value': -3.3629372579683254, 'soft_opc': nan} step=11352




2022-04-20 16:05.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160448/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:05.54 [info     ] FQE_20220420160448: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00016808232595754224, 'time_algorithm_update': 0.004683458527853322, 'loss': 0.9403862857129858, 'time_step': 0.004927515983581543, 'init_value': -5.747276306152344, 'ave_value': -3.276256914087781, 'soft_opc': nan} step=11696




2022-04-20 16:05.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160448/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:05.56 [info     ] FQE_20220420160448: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.0001742798228596532, 'time_algorithm_update': 0.005086771970571473, 'loss': 0.9518262114657392, 'time_step': 0.005337312471034915, 'init_value': -6.301811218261719, 'ave_value': -3.5376253668084607, 'soft_opc': nan} step=12040




2022-04-20 16:05.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160448/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:05.58 [info     ] FQE_20220420160448: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00017011165618896484, 'time_algorithm_update': 0.005047586768172508, 'loss': 0.9687074041427222, 'time_step': 0.00529265888901644, 'init_value': -6.8628034591674805, 'ave_value': -3.671281863598841, 'soft_opc': nan} step=12384




2022-04-20 16:05.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160448/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:06.00 [info     ] FQE_20220420160448: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00017336288163828295, 'time_algorithm_update': 0.005140750907188238, 'loss': 0.9619333632290363, 'time_step': 0.00539073763891708, 'init_value': -6.8258867263793945, 'ave_value': -3.522666360295235, 'soft_opc': nan} step=12728




2022-04-20 16:06.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160448/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:06.02 [info     ] FQE_20220420160448: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00016887312711671342, 'time_algorithm_update': 0.004808687886526418, 'loss': 0.9814930658592561, 'time_step': 0.0050522262273832805, 'init_value': -7.506016731262207, 'ave_value': -3.8920029937835023, 'soft_opc': nan} step=13072




2022-04-20 16:06.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160448/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:06.04 [info     ] FQE_20220420160448: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00017157543537228606, 'time_algorithm_update': 0.005115091107612432, 'loss': 0.9761925414096304, 'time_step': 0.005364106145015982, 'init_value': -8.120813369750977, 'ave_value': -4.312218184603865, 'soft_opc': nan} step=13416




2022-04-20 16:06.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160448/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:06.06 [info     ] FQE_20220420160448: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.0001721860364426014, 'time_algorithm_update': 0.005080722099126771, 'loss': 0.9876216654340888, 'time_step': 0.005328976137693538, 'init_value': -8.90131664276123, 'ave_value': -4.909022353392363, 'soft_opc': nan} step=13760




2022-04-20 16:06.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160448/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:06.08 [info     ] FQE_20220420160448: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.0001687878786131393, 'time_algorithm_update': 0.005081112301626871, 'loss': 0.9590290607451353, 'time_step': 0.005324254202288251, 'init_value': -9.689701080322266, 'ave_value': -5.642443527014473, 'soft_opc': nan} step=14104




2022-04-20 16:06.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160448/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:06.10 [info     ] FQE_20220420160448: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00016968818598015365, 'time_algorithm_update': 0.005098747652630473, 'loss': 0.9536311236361779, 'time_step': 0.005339319622793863, 'init_value': -10.150104522705078, 'ave_value': -5.860373870018232, 'soft_opc': nan} step=14448




2022-04-20 16:06.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160448/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:06.12 [info     ] FQE_20220420160448: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00017277861750403115, 'time_algorithm_update': 0.004693774982940319, 'loss': 0.9471090993692362, 'time_step': 0.004941733077515003, 'init_value': -10.720272064208984, 'ave_value': -6.3145410066017424, 'soft_opc': nan} step=14792




2022-04-20 16:06.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160448/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:06.14 [info     ] FQE_20220420160448: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00017489111700723337, 'time_algorithm_update': 0.005086952170660329, 'loss': 0.9604813513389334, 'time_step': 0.005337654851203741, 'init_value': -11.044780731201172, 'ave_value': -6.530487945971188, 'soft_opc': nan} step=15136




2022-04-20 16:06.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160448/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:06.16 [info     ] FQE_20220420160448: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00017073750495910645, 'time_algorithm_update': 0.0051024313582930456, 'loss': 0.9591305936730012, 'time_step': 0.005347743283870609, 'init_value': -11.268869400024414, 'ave_value': -6.704223911725152, 'soft_opc': nan} step=15480




2022-04-20 16:06.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160448/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:06.18 [info     ] FQE_20220420160448: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.0001767118309819421, 'time_algorithm_update': 0.0050698484099188515, 'loss': 0.9497603406956376, 'time_step': 0.005323290131812872, 'init_value': -12.199644088745117, 'ave_value': -7.421764429871764, 'soft_opc': nan} step=15824




2022-04-20 16:06.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160448/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:06.19 [info     ] FQE_20220420160448: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.000171587910763053, 'time_algorithm_update': 0.0046831022861392, 'loss': 0.9465236699555156, 'time_step': 0.0049299978932669, 'init_value': -12.824457168579102, 'ave_value': -7.73002781724507, 'soft_opc': nan} step=16168




2022-04-20 16:06.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160448/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:06.21 [info     ] FQE_20220420160448: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00017257069432458213, 'time_algorithm_update': 0.005075588475826175, 'loss': 0.9617984429716544, 'time_step': 0.00532380924668423, 'init_value': -13.522308349609375, 'ave_value': -8.322475465440803, 'soft_opc': nan} step=16512




2022-04-20 16:06.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160448/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:06.23 [info     ] FQE_20220420160448: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00017272109209105026, 'time_algorithm_update': 0.005112334046252939, 'loss': 0.9580022687265692, 'time_step': 0.005362760882044948, 'init_value': -13.862627029418945, 'ave_value': -8.603698486831409, 'soft_opc': nan} step=16856




2022-04-20 16:06.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160448/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:06.25 [info     ] FQE_20220420160448: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.0001740587312121724, 'time_algorithm_update': 0.005124458739923877, 'loss': 0.9316615046565097, 'time_step': 0.005375574494517127, 'init_value': -14.175287246704102, 'ave_value': -9.033898984904292, 'soft_opc': nan} step=17200




2022-04-20 16:06.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160448/model_17200.pt
most optimal hyper params for td3+bc at this point:  [0.0012009373312255548, 0.007673227118548031, 7.942796560357695e-05, 1]
search iteration:  8
using hyper params:  [0.0046790174895716425, 0.004634079686493763, 4.7447679580785114e-05, 1]
2022-04-20 16:06.25 [debug    ] RoundIterator is selected.
2022-04-20 16:06.25 [info     ] Directory is created at d3rlpy_logs/TD3PlusBC_20220420160625
2022-04-20 16:06.25 [debug    ] Fitting scaler...              scaler=standard


  mean = torch.tensor(self._mean, dtype=torch.float32, device=x.device)
  std = torch.tensor(self._std, dtype=torch.float32, device=x.device)
  minimum = torch.tensor(
  maximum = torch.tensor(


2022-04-20 16:06.26 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 16:06.26 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 16:06.26 [info     ] Parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160625/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0046790174895716425, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'alpha': 2.5, 'batch_size': 256, 'critic_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'critic_learning_rate': 0.004634079686493763, 'critic_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:06.29 [info     ] TD3PlusBC_20220420160625: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.000325115800601, 'time_algorithm_update': 0.008390789840653626, 'critic_loss': 0.40133248408984024, 'actor_loss': 0.021089689207007312, 'time_step': 0.008795015993174057, 'td_error': 0.8067623900836577, 'init_value': -0.4856705665588379, 'ave_value': 0.16463203247472946} step=342
2022-04-20 16:06.29 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160625/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:06.33 [info     ] TD3PlusBC_20220420160625: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.0003290713181969715, 'time_algorithm_update': 0.008987837367587619, 'critic_loss': 0.18694474582957943, 'actor_loss': -0.024662761638561886, 'time_step': 0.009395768767908999, 'td_error': 0.8051552122938224, 'init_value': -0.7358222007751465, 'ave_value': 0.23022253900235687} step=684
2022-04-20 16:06.33 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160625/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:06.37 [info     ] TD3PlusBC_20220420160625: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.00033037076916610984, 'time_algorithm_update': 0.008901521476388674, 'critic_loss': 0.2358456050094805, 'actor_loss': -0.021219208596916925, 'time_step': 0.009311590975488138, 'td_error': 0.8025016514466419, 'init_value': -1.0204529762268066, 'ave_value': 0.28312960804314224} step=1026
2022-04-20 16:06.37 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160625/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:06.40 [info     ] TD3PlusBC_20220420160625: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.00032514508007562646, 'time_algorithm_update': 0.00863970929419088, 'critic_loss': 0.28241911486924043, 'actor_loss': 0.002298576833560453, 'time_step': 0.009047368813676444, 'td_error': 0.8034810249969578, 'init_value': -1.386818766593933, 'ave_value': 0.30502693888538324} step=1368
2022-04-20 16:06.40 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160625/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:06.44 [info     ] TD3PlusBC_20220420160625: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.00033247122290538765, 'time_algorithm_update': 0.008988362306739852, 'critic_loss': 0.34897270949001896, 'actor_loss': 0.015670306784541982, 'time_step': 0.00940272891730593, 'td_error': 0.8033443175047147, 'init_value': -1.570643424987793, 'ave_value': 0.40282490902316026} step=1710
2022-04-20 16:06.44 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160625/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:06.47 [info     ] TD3PlusBC_20220420160625: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.0003292476921750788, 'time_algorithm_update': 0.008368466332641959, 'critic_loss': 0.4192351421112554, 'actor_loss': 0.01716477177732172, 'time_step': 0.008776903849596169, 'td_error': 0.809879442637028, 'init_value': -1.9098892211914062, 'ave_value': 0.4640034397848806} step=2052
2022-04-20 16:06.47 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160625/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:06.51 [info     ] TD3PlusBC_20220420160625: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.00032969803838004845, 'time_algorithm_update': 0.008949923236467684, 'critic_loss': 0.48486026971225155, 'actor_loss': 0.02264776292164423, 'time_step': 0.009359425271463674, 'td_error': 0.8182310556308332, 'init_value': -2.2084174156188965, 'ave_value': 0.5315314884789221} step=2394
2022-04-20 16:06.51 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160625/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:06.55 [info     ] TD3PlusBC_20220420160625: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.0003283497882865326, 'time_algorithm_update': 0.008829497454459207, 'critic_loss': 0.5848111258438456, 'actor_loss': 0.031032904677572307, 'time_step': 0.009237051010131836, 'td_error': 0.8323742424110004, 'init_value': -2.4514145851135254, 'ave_value': 0.619707940153249} step=2736
2022-04-20 16:06.55 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160625/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:06.58 [info     ] TD3PlusBC_20220420160625: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.0003274128450984843, 'time_algorithm_update': 0.008512186725237215, 'critic_loss': 0.634324567795497, 'actor_loss': 0.048107504103964534, 'time_step': 0.008920776216607345, 'td_error': 0.8438715211341918, 'init_value': -2.757840394973755, 'ave_value': 0.6533583807727523} step=3078
2022-04-20 16:06.59 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160625/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:07.02 [info     ] TD3PlusBC_20220420160625: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.0003277537418387787, 'time_algorithm_update': 0.009026425623754311, 'critic_loss': 0.6974446412328391, 'actor_loss': 0.03506354793731929, 'time_step': 0.009434913334093596, 'td_error': 0.8586145881340397, 'init_value': -3.1630167961120605, 'ave_value': 0.6848571240468169} step=3420
2022-04-20 16:07.02 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160625/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:07.06 [info     ] TD3PlusBC_20220420160625: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.00033146526381286264, 'time_algorithm_update': 0.008475818132099352, 'critic_loss': 0.7942707349508129, 'actor_loss': 0.06725415938168938, 'time_step': 0.008887700867234614, 'td_error': 0.8844320616889901, 'init_value': -3.4884254932403564, 'ave_value': 0.7100081874790962} step=3762
2022-04-20 16:07.06 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160625/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:07.10 [info     ] TD3PlusBC_20220420160625: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.00033454518569143196, 'time_algorithm_update': 0.009017090351260894, 'critic_loss': 0.9220081615256287, 'actor_loss': 0.04552469531085059, 'time_step': 0.009430052244175247, 'td_error': 0.9053258970919816, 'init_value': -3.768303632736206, 'ave_value': 0.7764291652564698} step=4104
2022-04-20 16:07.10 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160625/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:07.13 [info     ] TD3PlusBC_20220420160625: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.0003307228199919762, 'time_algorithm_update': 0.008898171067934984, 'critic_loss': 0.9981369702962407, 'actor_loss': 0.0526075298238916, 'time_step': 0.009308139483133951, 'td_error': 0.9263697535689426, 'init_value': -4.038064479827881, 'ave_value': 0.8725642174207563} step=4446
2022-04-20 16:07.13 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160625/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:07.17 [info     ] TD3PlusBC_20220420160625: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.0003301239850228293, 'time_algorithm_update': 0.008563624487982856, 'critic_loss': 1.0954649305047348, 'actor_loss': 0.04560236541325586, 'time_step': 0.008975443087126079, 'td_error': 0.9512876439699274, 'init_value': -4.377661228179932, 'ave_value': 0.962234164344709} step=4788
2022-04-20 16:07.17 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160625/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:07.21 [info     ] TD3PlusBC_20220420160625: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.00032617683299103673, 'time_algorithm_update': 0.008915719930191485, 'critic_loss': 1.20545665196508, 'actor_loss': 0.04850869954765191, 'time_step': 0.009320800067388524, 'td_error': 0.9753344679604512, 'init_value': -4.6445112228393555, 'ave_value': 0.9872750776811074} step=5130
2022-04-20 16:07.21 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160625/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:07.24 [info     ] TD3PlusBC_20220420160625: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.0003336626186705472, 'time_algorithm_update': 0.008699191941155327, 'critic_loss': 1.361197376198936, 'actor_loss': 0.05588455465540551, 'time_step': 0.009112452205858733, 'td_error': 0.9860390786924352, 'init_value': -5.1705427169799805, 'ave_value': 1.011168040590495} step=5472
2022-04-20 16:07.24 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160625/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:07.28 [info     ] TD3PlusBC_20220420160625: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.00033038052899098537, 'time_algorithm_update': 0.009012216712996277, 'critic_loss': 1.484438408623662, 'actor_loss': 0.06596089163196017, 'time_step': 0.00942148939210769, 'td_error': 1.0171919871308526, 'init_value': -5.570051670074463, 'ave_value': 1.0130632784662281} step=5814
2022-04-20 16:07.28 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160625/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:07.32 [info     ] TD3PlusBC_20220420160625: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.0003268279527363024, 'time_algorithm_update': 0.008878380931608858, 'critic_loss': 1.6884612613602688, 'actor_loss': 0.060278544779758005, 'time_step': 0.009286728518748144, 'td_error': 1.063157697212113, 'init_value': -5.669558048248291, 'ave_value': 1.1541280557013007} step=6156
2022-04-20 16:07.32 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160625/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:07.35 [info     ] TD3PlusBC_20220420160625: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.0003302104291860123, 'time_algorithm_update': 0.008572149695011607, 'critic_loss': 1.8019499962615688, 'actor_loss': 0.05958865362911196, 'time_step': 0.008985087188363772, 'td_error': 1.0907711141426821, 'init_value': -5.896182537078857, 'ave_value': 1.227937334740308} step=6498
2022-04-20 16:07.35 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160625/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:07.39 [info     ] TD3PlusBC_20220420160625: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.0003317545729073865, 'time_algorithm_update': 0.008967176515456528, 'critic_loss': 1.9475269477966932, 'actor_loss': 0.07340323380385226, 'time_step': 0.009378702319853487, 'td_error': 1.1094848283770709, 'init_value': -6.426177978515625, 'ave_value': 1.2290363407533968} step=6840
2022-04-20 16:07.39 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160625/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:07.43 [info     ] TD3PlusBC_20220420160625: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.00032747279830843383, 'time_algorithm_update': 0.008897166503103156, 'critic_loss': 2.1002934306685686, 'actor_loss': 0.05939247003859944, 'time_step': 0.009307826471607588, 'td_error': 1.1534368354585889, 'init_value': -6.796106815338135, 'ave_value': 1.2250114217948018} step=7182
2022-04-20 16:07.43 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160625/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:07.46 [info     ] TD3PlusBC_20220420160625: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.0003272985157213713, 'time_algorithm_update': 0.008998950322469076, 'critic_loss': 2.281408359955626, 'actor_loss': 0.07454088674476969, 'time_step': 0.009407364136991446, 'td_error': 1.185410471701297, 'init_value': -7.245485782623291, 'ave_value': 1.2286734300030413} step=7524
2022-04-20 16:07.46 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160625/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:07.50 [info     ] TD3PlusBC_20220420160625: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.00032929230851736684, 'time_algorithm_update': 0.009071701451351768, 'critic_loss': 2.52443664324911, 'actor_loss': 0.06107037314023191, 'time_step': 0.00948005043275175, 'td_error': 1.213094373736803, 'init_value': -7.5212883949279785, 'ave_value': 1.321550075518605} step=7866
2022-04-20 16:07.50 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160625/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:07.54 [info     ] TD3PlusBC_20220420160625: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.0003309793639601323, 'time_algorithm_update': 0.008518534794188383, 'critic_loss': 2.6621352193648353, 'actor_loss': 0.07098737814360195, 'time_step': 0.008929713427671912, 'td_error': 1.264041698016835, 'init_value': -7.723736763000488, 'ave_value': 1.4413980810664309} step=8208
2022-04-20 16:07.54 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160625/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:07.57 [info     ] TD3PlusBC_20220420160625: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.0003296122913472137, 'time_algorithm_update': 0.008977818907352915, 'critic_loss': 2.880984621612649, 'actor_loss': 0.06568382386314242, 'time_step': 0.009387264474790696, 'td_error': 1.2743743989066623, 'init_value': -8.246005058288574, 'ave_value': 1.4639674004706815} step=8550
2022-04-20 16:07.57 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160625/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:08.01 [info     ] TD3PlusBC_20220420160625: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.00032841322714822335, 'time_algorithm_update': 0.008906550574720952, 'critic_loss': 3.0891813278546807, 'actor_loss': 0.0679045014126956, 'time_step': 0.009316349587245294, 'td_error': 1.3213900107998007, 'init_value': -8.486681938171387, 'ave_value': 1.509172993571464} step=8892
2022-04-20 16:08.01 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160625/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:08.05 [info     ] TD3PlusBC_20220420160625: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.0003282779838606628, 'time_algorithm_update': 0.008528872540122584, 'critic_loss': 3.241407276419868, 'actor_loss': 0.07322283673007586, 'time_step': 0.008936538333781281, 'td_error': 1.4080286457528983, 'init_value': -8.450271606445312, 'ave_value': 1.6319943747675332} step=9234
2022-04-20 16:08.05 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160625/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:08.08 [info     ] TD3PlusBC_20220420160625: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.0003340425547103436, 'time_algorithm_update': 0.008985874248526947, 'critic_loss': 3.550082783252872, 'actor_loss': 0.08140835041801135, 'time_step': 0.00940355362250791, 'td_error': 1.3979206684392358, 'init_value': -9.286554336547852, 'ave_value': 1.5248151737577407} step=9576
2022-04-20 16:08.08 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160625/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:08.12 [info     ] TD3PlusBC_20220420160625: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.0003023670430769, 'time_algorithm_update': 0.008159425523546007, 'critic_loss': 3.681846081426269, 'actor_loss': 0.0795331288848007, 'time_step': 0.00853410101773446, 'td_error': 1.449874173189054, 'init_value': -9.506802558898926, 'ave_value': 1.6304094769180173} step=9918
2022-04-20 16:08.12 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160625/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:08.15 [info     ] TD3PlusBC_20220420160625: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.00029899850923415514, 'time_algorithm_update': 0.008486997314363892, 'critic_loss': 3.9455340105539176, 'actor_loss': 0.08054008553817607, 'time_step': 0.008856439451028032, 'td_error': 1.493556790765207, 'init_value': -9.958795547485352, 'ave_value': 1.5972442374464024} step=10260
2022-04-20 16:08.15 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160625/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:08.19 [info     ] TD3PlusBC_20220420160625: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.0003323973270884731, 'time_algorithm_update': 0.009004121635392395, 'critic_loss': 4.246584014934406, 'actor_loss': 0.08062017796158094, 'time_step': 0.009418373219451012, 'td_error': 1.5232249338717343, 'init_value': -10.110798835754395, 'ave_value': 1.6964995867790873} step=10602
2022-04-20 16:08.19 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160625/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:08.23 [info     ] TD3PlusBC_20220420160625: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.0003264598679124263, 'time_algorithm_update': 0.008517602730912772, 'critic_loss': 4.396567278089579, 'actor_loss': 0.06988896272684399, 'time_step': 0.008923932822824221, 'td_error': 1.5711213364678491, 'init_value': -10.520326614379883, 'ave_value': 1.8155983726157563} step=10944
2022-04-20 16:08.23 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160625/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:08.26 [info     ] TD3PlusBC_20220420160625: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.0003331815987302546, 'time_algorithm_update': 0.008859748031660827, 'critic_loss': 4.640723750256655, 'actor_loss': 0.06906835412421422, 'time_step': 0.009275139423838834, 'td_error': 1.6288211817068896, 'init_value': -10.691146850585938, 'ave_value': 1.8199589872769257} step=11286
2022-04-20 16:08.26 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160625/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:08.30 [info     ] TD3PlusBC_20220420160625: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.0003283546681989703, 'time_algorithm_update': 0.008617049769351357, 'critic_loss': 4.986313468880123, 'actor_loss': 0.07792492809961414, 'time_step': 0.00902611609787969, 'td_error': 1.6303542084971585, 'init_value': -11.428049087524414, 'ave_value': 1.8140758312881193} step=11628
2022-04-20 16:08.30 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160625/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:08.34 [info     ] TD3PlusBC_20220420160625: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.0003297077982049239, 'time_algorithm_update': 0.008957763861494454, 'critic_loss': 5.175003906788184, 'actor_loss': 0.06882243725465752, 'time_step': 0.009367028872172037, 'td_error': 1.6865419347096011, 'init_value': -11.871123313903809, 'ave_value': 1.8480918201220196} step=11970
2022-04-20 16:08.34 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160625/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:08.37 [info     ] TD3PlusBC_20220420160625: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.0003270782225313242, 'time_algorithm_update': 0.008928887328209236, 'critic_loss': 5.5258422835528505, 'actor_loss': 0.08211444433756739, 'time_step': 0.009335655915109735, 'td_error': 1.7374960771066774, 'init_value': -12.234676361083984, 'ave_value': 1.9382201506176477} step=12312
2022-04-20 16:08.37 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160625/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:08.41 [info     ] TD3PlusBC_20220420160625: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.00033156495345266243, 'time_algorithm_update': 0.008523902000739561, 'critic_loss': 5.833735978394224, 'actor_loss': 0.08107234223892815, 'time_step': 0.008935995966370344, 'td_error': 1.809454731556501, 'init_value': -12.37571907043457, 'ave_value': 1.90236790835161} step=12654
2022-04-20 16:08.41 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160625/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:08.45 [info     ] TD3PlusBC_20220420160625: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.0003293947866785596, 'time_algorithm_update': 0.0087518120369716, 'critic_loss': 6.080106709435669, 'actor_loss': 0.06999661806121207, 'time_step': 0.009162566118073045, 'td_error': 1.8420164691163554, 'init_value': -12.838513374328613, 'ave_value': 1.9835831415942693} step=12996
2022-04-20 16:08.45 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160625/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:08.48 [info     ] TD3PlusBC_20220420160625: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.0003300019872118855, 'time_algorithm_update': 0.008475797915319253, 'critic_loss': 6.333150894669761, 'actor_loss': 0.09485176889694225, 'time_step': 0.008884646739178931, 'td_error': 1.893308535357875, 'init_value': -13.102066040039062, 'ave_value': 2.03382728624187} step=13338
2022-04-20 16:08.48 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160625/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:08.52 [info     ] TD3PlusBC_20220420160625: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.00032583245059900117, 'time_algorithm_update': 0.00887205377656814, 'critic_loss': 6.720646081263559, 'actor_loss': 0.08056293112056995, 'time_step': 0.009277716714736314, 'td_error': 1.956609451270439, 'init_value': -13.6611328125, 'ave_value': 2.027695523942695} step=13680
2022-04-20 16:08.52 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160625/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:08.56 [info     ] TD3PlusBC_20220420160625: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.00032938084407159457, 'time_algorithm_update': 0.008935260493852938, 'critic_loss': 6.858843520022275, 'actor_loss': 0.09665579982755478, 'time_step': 0.009347247798540438, 'td_error': 2.004730556963923, 'init_value': -13.995477676391602, 'ave_value': 2.041533648947103} step=14022
2022-04-20 16:08.56 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160625/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:08.59 [info     ] TD3PlusBC_20220420160625: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.0003252594094527395, 'time_algorithm_update': 0.007145446643494723, 'critic_loss': 7.254805081420475, 'actor_loss': 0.08087193142426641, 'time_step': 0.007551248310602199, 'td_error': 2.050426925946792, 'init_value': -14.518524169921875, 'ave_value': 2.1122883325348702} step=14364
2022-04-20 16:08.59 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160625/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:09.01 [info     ] TD3PlusBC_20220420160625: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.00032919122461687057, 'time_algorithm_update': 0.006930680302848593, 'critic_loss': 7.4634221419256335, 'actor_loss': 0.09153843024059345, 'time_step': 0.007338826419317234, 'td_error': 2.1389592842666896, 'init_value': -14.680493354797363, 'ave_value': 2.130312775038938} step=14706
2022-04-20 16:09.01 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160625/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:09.04 [info     ] TD3PlusBC_20220420160625: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.00032596978527760646, 'time_algorithm_update': 0.0068849834085207935, 'critic_loss': 7.785307071013757, 'actor_loss': 0.10139052201694215, 'time_step': 0.00729049785792479, 'td_error': 2.1734621642159886, 'init_value': -15.244958877563477, 'ave_value': 2.0982332052196466} step=15048
2022-04-20 16:09.04 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160625/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:09.07 [info     ] TD3PlusBC_20220420160625: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.0003268056445651584, 'time_algorithm_update': 0.006934839382506253, 'critic_loss': 8.118595854232186, 'actor_loss': 0.07605955865212351, 'time_step': 0.007338436723452563, 'td_error': 2.2523451731949153, 'init_value': -15.159808158874512, 'ave_value': 2.1865220520325708} step=15390
2022-04-20 16:09.07 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160625/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:09.10 [info     ] TD3PlusBC_20220420160625: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.00032888867004572995, 'time_algorithm_update': 0.006898901615923608, 'critic_loss': 8.436258739198161, 'actor_loss': 0.08402537521824502, 'time_step': 0.007309939429076791, 'td_error': 2.3012625773227198, 'init_value': -15.697664260864258, 'ave_value': 2.268026819244564} step=15732
2022-04-20 16:09.10 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160625/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:09.13 [info     ] TD3PlusBC_20220420160625: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.0003253563105711463, 'time_algorithm_update': 0.00688415591479742, 'critic_loss': 8.500598236482743, 'actor_loss': 0.09369145257518305, 'time_step': 0.007287776261045222, 'td_error': 2.3666016934367597, 'init_value': -16.1997127532959, 'ave_value': 2.2070126654524023} step=16074
2022-04-20 16:09.13 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160625/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:09.16 [info     ] TD3PlusBC_20220420160625: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.00033217912528947085, 'time_algorithm_update': 0.006974936925876907, 'critic_loss': 8.994481895402162, 'actor_loss': 0.09246694723590773, 'time_step': 0.0073851528223495035, 'td_error': 2.431703683207461, 'init_value': -16.41042709350586, 'ave_value': 2.354803610481741} step=16416
2022-04-20 16:09.16 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160625/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:09.19 [info     ] TD3PlusBC_20220420160625: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.0003237724304199219, 'time_algorithm_update': 0.006966252075998406, 'critic_loss': 9.199985841910044, 'actor_loss': 0.09692222245952539, 'time_step': 0.007369673740096957, 'td_error': 2.4897045953829955, 'init_value': -16.754396438598633, 'ave_value': 2.2755159440224673} step=16758
2022-04-20 16:09.19 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160625/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:09.22 [info     ] TD3PlusBC_20220420160625: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.00032906643828453376, 'time_algorithm_update': 0.006982597691273829, 'critic_loss': 9.68190930531039, 'actor_loss': 0.0863365080665078, 'time_step': 0.007390171463726557, 'td_error': 2.5769108574111312, 'init_value': -17.09097671508789, 'ave_value': 2.281874489179133} step=17100
2022-04-20 16:09.22 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420160625/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 102 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.28189108e-01  9

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 16:09.23 [info     ] FQE_20220420160922: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.0001566955842167498, 'time_algorithm_update': 0.0034632840788508036, 'loss': 0.008117120958449521, 'time_step': 0.003690124994300934, 'init_value': -0.04144562408328056, 'ave_value': 0.03322481896459788, 'soft_opc': nan} step=166




2022-04-20 16:09.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160922/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.23 [info     ] FQE_20220420160922: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00015670132924275226, 'time_algorithm_update': 0.0034216886543365845, 'loss': 0.00630099916163024, 'time_step': 0.003649343927222562, 'init_value': -0.1759285181760788, 'ave_value': -0.030134580648495807, 'soft_opc': nan} step=332




2022-04-20 16:09.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160922/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.24 [info     ] FQE_20220420160922: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00015847079725150602, 'time_algorithm_update': 0.0035626414310501284, 'loss': 0.005388300116903272, 'time_step': 0.0037912489419960112, 'init_value': -0.23998641967773438, 'ave_value': -0.06147489571725732, 'soft_opc': nan} step=498




2022-04-20 16:09.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160922/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.25 [info     ] FQE_20220420160922: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00015653759600168252, 'time_algorithm_update': 0.003495846886232675, 'loss': 0.005160521917853011, 'time_step': 0.0037183704146419665, 'init_value': -0.3585379421710968, 'ave_value': -0.12900119342816038, 'soft_opc': nan} step=664




2022-04-20 16:09.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160922/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.25 [info     ] FQE_20220420160922: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00015598320099244635, 'time_algorithm_update': 0.0034737328448927545, 'loss': 0.0047887361283043784, 'time_step': 0.00369783912796572, 'init_value': -0.45149561762809753, 'ave_value': -0.17056930472755485, 'soft_opc': nan} step=830




2022-04-20 16:09.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160922/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.26 [info     ] FQE_20220420160922: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00015371535197798028, 'time_algorithm_update': 0.0035315148801688687, 'loss': 0.0043522861326135785, 'time_step': 0.0037538674940545873, 'init_value': -0.49490368366241455, 'ave_value': -0.1904709422491021, 'soft_opc': nan} step=996




2022-04-20 16:09.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160922/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.27 [info     ] FQE_20220420160922: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00015628481485757483, 'time_algorithm_update': 0.0035969851964927583, 'loss': 0.004342222200560731, 'time_step': 0.0038195992090615883, 'init_value': -0.6074550747871399, 'ave_value': -0.2695755652743518, 'soft_opc': nan} step=1162




2022-04-20 16:09.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160922/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.28 [info     ] FQE_20220420160922: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00016158747385783368, 'time_algorithm_update': 0.0035800818937370576, 'loss': 0.004151425919781652, 'time_step': 0.003813859928085143, 'init_value': -0.6559644937515259, 'ave_value': -0.29127136716988306, 'soft_opc': nan} step=1328




2022-04-20 16:09.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160922/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.28 [info     ] FQE_20220420160922: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.0001579235835247729, 'time_algorithm_update': 0.0035914828978389143, 'loss': 0.0039016463020984755, 'time_step': 0.00382227926369173, 'init_value': -0.7059406042098999, 'ave_value': -0.32298054228545175, 'soft_opc': nan} step=1494




2022-04-20 16:09.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160922/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.29 [info     ] FQE_20220420160922: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00015706613839390767, 'time_algorithm_update': 0.0035275235233536684, 'loss': 0.003975272828752332, 'time_step': 0.003757055983485946, 'init_value': -0.8110994100570679, 'ave_value': -0.39463128553334254, 'soft_opc': nan} step=1660




2022-04-20 16:09.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160922/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.30 [info     ] FQE_20220420160922: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00015711497111492846, 'time_algorithm_update': 0.0035525474203638285, 'loss': 0.003978764963005276, 'time_step': 0.003781005560633648, 'init_value': -0.8707479238510132, 'ave_value': -0.4431514165884404, 'soft_opc': nan} step=1826




2022-04-20 16:09.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160922/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.30 [info     ] FQE_20220420160922: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.000159318188586867, 'time_algorithm_update': 0.0036158173917287804, 'loss': 0.003992861328248204, 'time_step': 0.00384969883654491, 'init_value': -0.9434652328491211, 'ave_value': -0.5009380942546167, 'soft_opc': nan} step=1992




2022-04-20 16:09.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160922/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.31 [info     ] FQE_20220420160922: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00016129016876220703, 'time_algorithm_update': 0.0035206639623067467, 'loss': 0.004429889031467084, 'time_step': 0.0037545640784573844, 'init_value': -0.9560900330543518, 'ave_value': -0.49515183276773705, 'soft_opc': nan} step=2158




2022-04-20 16:09.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160922/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.32 [info     ] FQE_20220420160922: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00015911711267678133, 'time_algorithm_update': 0.00363828187965485, 'loss': 0.004616212721841118, 'time_step': 0.003869460289736828, 'init_value': -1.0754048824310303, 'ave_value': -0.6032022641904585, 'soft_opc': nan} step=2324




2022-04-20 16:09.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160922/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.33 [info     ] FQE_20220420160922: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.0001578359718782356, 'time_algorithm_update': 0.0035627434052616716, 'loss': 0.004784449094454814, 'time_step': 0.0037921552198478974, 'init_value': -1.1194744110107422, 'ave_value': -0.6451736277418194, 'soft_opc': nan} step=2490




2022-04-20 16:09.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160922/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.33 [info     ] FQE_20220420160922: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.0001555652503507683, 'time_algorithm_update': 0.0035412397729345114, 'loss': 0.0055232178812554805, 'time_step': 0.003766533840133483, 'init_value': -1.2341656684875488, 'ave_value': -0.7296187352657587, 'soft_opc': nan} step=2656




2022-04-20 16:09.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160922/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.34 [info     ] FQE_20220420160922: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00015884996896766755, 'time_algorithm_update': 0.0036671391452651426, 'loss': 0.00598924895762522, 'time_step': 0.0038970622671655863, 'init_value': -1.318056583404541, 'ave_value': -0.7941524489967337, 'soft_opc': nan} step=2822




2022-04-20 16:09.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160922/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.35 [info     ] FQE_20220420160922: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00016086790935102715, 'time_algorithm_update': 0.0035343715943485857, 'loss': 0.006098850910041302, 'time_step': 0.0037646035113966607, 'init_value': -1.3508106470108032, 'ave_value': -0.8267350639032015, 'soft_opc': nan} step=2988




2022-04-20 16:09.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160922/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.35 [info     ] FQE_20220420160922: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00015773112515369094, 'time_algorithm_update': 0.00359842145299337, 'loss': 0.006826358931125085, 'time_step': 0.003827406699398914, 'init_value': -1.451838731765747, 'ave_value': -0.929675890649627, 'soft_opc': nan} step=3154




2022-04-20 16:09.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160922/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.36 [info     ] FQE_20220420160922: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.0001530374389096915, 'time_algorithm_update': 0.0026106762598796063, 'loss': 0.007461484694847815, 'time_step': 0.002834904624755124, 'init_value': -1.5402323007583618, 'ave_value': -0.9851872779617741, 'soft_opc': nan} step=3320




2022-04-20 16:09.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160922/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.37 [info     ] FQE_20220420160922: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.0001643479588520096, 'time_algorithm_update': 0.0036236162645271026, 'loss': 0.008681982571921166, 'time_step': 0.003859617623938135, 'init_value': -1.491152286529541, 'ave_value': -0.9310450421121844, 'soft_opc': nan} step=3486




2022-04-20 16:09.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160922/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.37 [info     ] FQE_20220420160922: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00015965140009500893, 'time_algorithm_update': 0.003460303846612034, 'loss': 0.009172975680375674, 'time_step': 0.003695964813232422, 'init_value': -1.6289048194885254, 'ave_value': -1.0431703136680095, 'soft_opc': nan} step=3652




2022-04-20 16:09.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160922/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.38 [info     ] FQE_20220420160922: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00015754010303910957, 'time_algorithm_update': 0.0035681020782654545, 'loss': 0.009695859142741839, 'time_step': 0.0037975799606507084, 'init_value': -1.7115706205368042, 'ave_value': -1.1005277262379727, 'soft_opc': nan} step=3818




2022-04-20 16:09.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160922/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.39 [info     ] FQE_20220420160922: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.0001606409808239305, 'time_algorithm_update': 0.003517209765422775, 'loss': 0.010727797435029936, 'time_step': 0.0037483809942222505, 'init_value': -1.7767889499664307, 'ave_value': -1.153400937336925, 'soft_opc': nan} step=3984




2022-04-20 16:09.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160922/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.39 [info     ] FQE_20220420160922: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.0001570704471634095, 'time_algorithm_update': 0.003600549985127277, 'loss': 0.011732167724228228, 'time_step': 0.003826348178357963, 'init_value': -1.87538480758667, 'ave_value': -1.2131541148239227, 'soft_opc': nan} step=4150




2022-04-20 16:09.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160922/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.40 [info     ] FQE_20220420160922: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00016097850110157426, 'time_algorithm_update': 0.003502192267452378, 'loss': 0.012239111926131713, 'time_step': 0.0037361081824245222, 'init_value': -2.020993232727051, 'ave_value': -1.3478634982306914, 'soft_opc': nan} step=4316




2022-04-20 16:09.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160922/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.41 [info     ] FQE_20220420160922: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00016049448266086807, 'time_algorithm_update': 0.0036211559571415544, 'loss': 0.013462740304481507, 'time_step': 0.0038572822708681405, 'init_value': -1.9980121850967407, 'ave_value': -1.334774468937335, 'soft_opc': nan} step=4482




2022-04-20 16:09.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160922/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.42 [info     ] FQE_20220420160922: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.0001590137022087373, 'time_algorithm_update': 0.003563234604984881, 'loss': 0.014659756086261785, 'time_step': 0.0037943009870598115, 'init_value': -2.03879976272583, 'ave_value': -1.3760776207048178, 'soft_opc': nan} step=4648




2022-04-20 16:09.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160922/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.42 [info     ] FQE_20220420160922: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00015842914581298828, 'time_algorithm_update': 0.003558997648308076, 'loss': 0.01575552123593144, 'time_step': 0.003787451479808394, 'init_value': -2.1762051582336426, 'ave_value': -1.48101263150556, 'soft_opc': nan} step=4814




2022-04-20 16:09.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160922/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.43 [info     ] FQE_20220420160922: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00015713507870593704, 'time_algorithm_update': 0.0034800495009824454, 'loss': 0.0163618087297283, 'time_step': 0.003709632230092244, 'init_value': -2.2417643070220947, 'ave_value': -1.5492592602733288, 'soft_opc': nan} step=4980




2022-04-20 16:09.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160922/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.44 [info     ] FQE_20220420160922: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.0001597059778420322, 'time_algorithm_update': 0.00358452423509345, 'loss': 0.01776248892328803, 'time_step': 0.0038170455450035005, 'init_value': -2.327329635620117, 'ave_value': -1.6426084759687116, 'soft_opc': nan} step=5146




2022-04-20 16:09.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160922/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.44 [info     ] FQE_20220420160922: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.0001564744007156556, 'time_algorithm_update': 0.0034867467650447985, 'loss': 0.018991087204973067, 'time_step': 0.0037144393805997916, 'init_value': -2.5074591636657715, 'ave_value': -1.7504314732846913, 'soft_opc': nan} step=5312




2022-04-20 16:09.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160922/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.45 [info     ] FQE_20220420160922: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00016106324023511037, 'time_algorithm_update': 0.0037058879093951488, 'loss': 0.0202928650142423, 'time_step': 0.003938169364469597, 'init_value': -2.565736770629883, 'ave_value': -1.7937402007849634, 'soft_opc': nan} step=5478




2022-04-20 16:09.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160922/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.46 [info     ] FQE_20220420160922: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00015873075967811677, 'time_algorithm_update': 0.0035881005137799733, 'loss': 0.020914348376724106, 'time_step': 0.0038181040660444513, 'init_value': -2.5400500297546387, 'ave_value': -1.7649744038612725, 'soft_opc': nan} step=5644




2022-04-20 16:09.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160922/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.46 [info     ] FQE_20220420160922: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00015752861298710467, 'time_algorithm_update': 0.0036618551575993918, 'loss': 0.02253386993045324, 'time_step': 0.0038947929818946197, 'init_value': -2.7341532707214355, 'ave_value': -1.921358559028925, 'soft_opc': nan} step=5810




2022-04-20 16:09.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160922/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.47 [info     ] FQE_20220420160922: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00015917312668030522, 'time_algorithm_update': 0.0035252111503876835, 'loss': 0.023839271529184945, 'time_step': 0.0037580110940588526, 'init_value': -2.7535793781280518, 'ave_value': -1.9554976325347886, 'soft_opc': nan} step=5976




2022-04-20 16:09.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160922/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.48 [info     ] FQE_20220420160922: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00016062518200242375, 'time_algorithm_update': 0.0035668396088014164, 'loss': 0.024775262501270295, 'time_step': 0.003803620855492282, 'init_value': -2.812387228012085, 'ave_value': -1.9825443944918948, 'soft_opc': nan} step=6142




2022-04-20 16:09.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160922/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.49 [info     ] FQE_20220420160922: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.0001567788870937853, 'time_algorithm_update': 0.0035720460386161343, 'loss': 0.025704956554572087, 'time_step': 0.0038017968097365045, 'init_value': -2.9830093383789062, 'ave_value': -2.1128574961544695, 'soft_opc': nan} step=6308




2022-04-20 16:09.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160922/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.49 [info     ] FQE_20220420160922: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00015587117298539863, 'time_algorithm_update': 0.0035549158073333374, 'loss': 0.027078447385174953, 'time_step': 0.003777960696852351, 'init_value': -3.0262365341186523, 'ave_value': -2.1442770310685977, 'soft_opc': nan} step=6474




2022-04-20 16:09.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160922/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.50 [info     ] FQE_20220420160922: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.0001609109970460455, 'time_algorithm_update': 0.003556534468409527, 'loss': 0.027686534894080496, 'time_step': 0.0037891204098621047, 'init_value': -3.17952036857605, 'ave_value': -2.220974701788981, 'soft_opc': nan} step=6640




2022-04-20 16:09.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160922/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.51 [info     ] FQE_20220420160922: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.0001594517604414239, 'time_algorithm_update': 0.0036920179803687407, 'loss': 0.029803192001032496, 'time_step': 0.0039220947817147495, 'init_value': -3.2010552883148193, 'ave_value': -2.256912417109082, 'soft_opc': nan} step=6806




2022-04-20 16:09.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160922/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.51 [info     ] FQE_20220420160922: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00015728732189500188, 'time_algorithm_update': 0.003469283322253859, 'loss': 0.030796197645579386, 'time_step': 0.003699519548071436, 'init_value': -3.322415351867676, 'ave_value': -2.3622782537176668, 'soft_opc': nan} step=6972




2022-04-20 16:09.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160922/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.52 [info     ] FQE_20220420160922: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00015721263655697006, 'time_algorithm_update': 0.0036131933511021628, 'loss': 0.03343636544522302, 'time_step': 0.00384113013026226, 'init_value': -3.3984885215759277, 'ave_value': -2.4095193200887324, 'soft_opc': nan} step=7138




2022-04-20 16:09.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160922/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.53 [info     ] FQE_20220420160922: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.0001567070742687547, 'time_algorithm_update': 0.003492109746818083, 'loss': 0.03377769930787519, 'time_step': 0.003723914364734328, 'init_value': -3.4623630046844482, 'ave_value': -2.4500669209941015, 'soft_opc': nan} step=7304




2022-04-20 16:09.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160922/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.53 [info     ] FQE_20220420160922: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00015958964106548265, 'time_algorithm_update': 0.003656269556068512, 'loss': 0.03501702096812562, 'time_step': 0.0038903247879212162, 'init_value': -3.638848304748535, 'ave_value': -2.598042905303809, 'soft_opc': nan} step=7470




2022-04-20 16:09.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160922/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.54 [info     ] FQE_20220420160922: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00016141512307776026, 'time_algorithm_update': 0.0035441481923482506, 'loss': 0.03699628359228597, 'time_step': 0.0037761251610445687, 'init_value': -3.6179046630859375, 'ave_value': -2.5329754761235606, 'soft_opc': nan} step=7636




2022-04-20 16:09.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160922/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.55 [info     ] FQE_20220420160922: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00016045139496584973, 'time_algorithm_update': 0.003547287849058588, 'loss': 0.03792648563155882, 'time_step': 0.003781301429472774, 'init_value': -3.599799633026123, 'ave_value': -2.523385177902575, 'soft_opc': nan} step=7802




2022-04-20 16:09.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160922/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.56 [info     ] FQE_20220420160922: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.0001584090382219797, 'time_algorithm_update': 0.0035050015851675748, 'loss': 0.03778962644394936, 'time_step': 0.0037353742553527095, 'init_value': -3.704338550567627, 'ave_value': -2.652646570323824, 'soft_opc': nan} step=7968




2022-04-20 16:09.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160922/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.56 [info     ] FQE_20220420160922: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.0001575932445296322, 'time_algorithm_update': 0.003668858344296375, 'loss': 0.039013161683730015, 'time_step': 0.0038991721279649848, 'init_value': -3.7770824432373047, 'ave_value': -2.6409073407635897, 'soft_opc': nan} step=8134




2022-04-20 16:09.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160922/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.57 [info     ] FQE_20220420160922: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00016182301992393402, 'time_algorithm_update': 0.003528306283146502, 'loss': 0.039052019418912925, 'time_step': 0.003762094371290092, 'init_value': -3.740048408508301, 'ave_value': -2.6620199024358446, 'soft_opc': nan} step=8300




2022-04-20 16:09.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160922/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.43210892e-01 -3.25999953e-02
 -1.38000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.28049971e-01  6.00000000e-01  4.67532035e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.87189108e-01  2.46000047e-02
 -8.00013420e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.61927906e-02  2.08952959e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.53789108e-01  1.32000047e-02
  8.79998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -9.71586383e-02 -6.00000000e-01 -5.33093061e-02]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.39489108e-01 -4.75999953e-02
 -9.30001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  1.41298638e-01  5.43892365e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.9573146

2022-04-20 16:09.57 [info     ] Directory is created at d3rlpy_logs/FQE_20220420160957
2022-04-20 16:09.57 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 16:09.57 [debug    ] Building models...
2022-04-20 16:09.57 [debug    ] Models have been built.
2022-04-20 16:09.57 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420160957/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 16:09.59 [info     ] FQE_20220420160957: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00015900509302006213, 'time_algorithm_update': 0.0035098181214443473, 'loss': 0.029799914344885322, 'time_step': 0.0037426317847052285, 'init_value': -1.097383737564087, 'ave_value': -1.0760917174050937, 'soft_opc': nan} step=344




2022-04-20 16:09.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160957/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.00 [info     ] FQE_20220420160957: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00016347336214642193, 'time_algorithm_update': 0.0034595041774040046, 'loss': 0.02371975800650584, 'time_step': 0.0036908907945766008, 'init_value': -1.779991626739502, 'ave_value': -1.738423429711445, 'soft_opc': nan} step=688




2022-04-20 16:10.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160957/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.02 [info     ] FQE_20220420160957: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00015971272490745368, 'time_algorithm_update': 0.0035144187683282895, 'loss': 0.027060806006734626, 'time_step': 0.00374942047651424, 'init_value': -2.723331928253174, 'ave_value': -2.697025533322547, 'soft_opc': nan} step=1032




2022-04-20 16:10.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160957/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.03 [info     ] FQE_20220420160957: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.0001653938792472662, 'time_algorithm_update': 0.003575090059014254, 'loss': 0.028993584784180966, 'time_step': 0.003816231045612069, 'init_value': -3.292616128921509, 'ave_value': -3.2931321828819073, 'soft_opc': nan} step=1376




2022-04-20 16:10.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160957/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.04 [info     ] FQE_20220420160957: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00016276573025903037, 'time_algorithm_update': 0.003492137720418531, 'loss': 0.035179571990400206, 'time_step': 0.0037292034126991448, 'init_value': -4.001494407653809, 'ave_value': -4.06273431128881, 'soft_opc': nan} step=1720




2022-04-20 16:10.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160957/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.06 [info     ] FQE_20220420160957: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00016533358152522596, 'time_algorithm_update': 0.003528737051542415, 'loss': 0.04266401150718678, 'time_step': 0.003769464271013127, 'init_value': -4.442788600921631, 'ave_value': -4.593316865895732, 'soft_opc': nan} step=2064




2022-04-20 16:10.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160957/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.07 [info     ] FQE_20220420160957: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00016554912855458814, 'time_algorithm_update': 0.003547818854797718, 'loss': 0.05034564019364933, 'time_step': 0.003784798605497493, 'init_value': -5.012277126312256, 'ave_value': -5.310838540848176, 'soft_opc': nan} step=2408




2022-04-20 16:10.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160957/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.09 [info     ] FQE_20220420160957: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00016383792078772256, 'time_algorithm_update': 0.003471615009529646, 'loss': 0.06191229402509973, 'time_step': 0.0037086259487063384, 'init_value': -5.468863010406494, 'ave_value': -5.903427600375637, 'soft_opc': nan} step=2752




2022-04-20 16:10.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160957/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.10 [info     ] FQE_20220420160957: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.0001651658568271371, 'time_algorithm_update': 0.0035804468531941257, 'loss': 0.07141543644328796, 'time_step': 0.00382112417110177, 'init_value': -5.953042030334473, 'ave_value': -6.5142837146515244, 'soft_opc': nan} step=3096




2022-04-20 16:10.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160957/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.11 [info     ] FQE_20220420160957: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00016711340394130972, 'time_algorithm_update': 0.0035635163617688554, 'loss': 0.08949688064519229, 'time_step': 0.0038093058175818865, 'init_value': -6.707399845123291, 'ave_value': -7.522367710925152, 'soft_opc': nan} step=3440




2022-04-20 16:10.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160957/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.13 [info     ] FQE_20220420160957: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00016178363977476608, 'time_algorithm_update': 0.0035325545211170994, 'loss': 0.10154226657847852, 'time_step': 0.003769397735595703, 'init_value': -7.018280029296875, 'ave_value': -7.933808856945787, 'soft_opc': nan} step=3784




2022-04-20 16:10.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160957/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.14 [info     ] FQE_20220420160957: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00016801509746285372, 'time_algorithm_update': 0.003578215144401373, 'loss': 0.11787600450889142, 'time_step': 0.003821388926616935, 'init_value': -7.723735809326172, 'ave_value': -8.756225621700601, 'soft_opc': nan} step=4128




2022-04-20 16:10.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160957/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.16 [info     ] FQE_20220420160957: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00017293317373408827, 'time_algorithm_update': 0.003637096216512281, 'loss': 0.13464439980317514, 'time_step': 0.0038859144199726195, 'init_value': -8.143511772155762, 'ave_value': -9.260255993120708, 'soft_opc': nan} step=4472




2022-04-20 16:10.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160957/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.17 [info     ] FQE_20220420160957: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.0001635884129723837, 'time_algorithm_update': 0.003560343454050463, 'loss': 0.1498786134558708, 'time_step': 0.0037977875665176747, 'init_value': -8.712980270385742, 'ave_value': -9.987136803111083, 'soft_opc': nan} step=4816




2022-04-20 16:10.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160957/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.19 [info     ] FQE_20220420160957: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00016577299251112828, 'time_algorithm_update': 0.00359040568041247, 'loss': 0.16765960312426784, 'time_step': 0.003827417312666427, 'init_value': -9.101008415222168, 'ave_value': -10.54750639429339, 'soft_opc': nan} step=5160




2022-04-20 16:10.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160957/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.20 [info     ] FQE_20220420160957: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00016852173694344454, 'time_algorithm_update': 0.0036266785721446194, 'loss': 0.18827220938320077, 'time_step': 0.00387083652407624, 'init_value': -9.587179183959961, 'ave_value': -11.162116841016115, 'soft_opc': nan} step=5504




2022-04-20 16:10.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160957/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.21 [info     ] FQE_20220420160957: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00016460099885630053, 'time_algorithm_update': 0.0035412457100180692, 'loss': 0.20479603146939257, 'time_step': 0.0037789552710777107, 'init_value': -9.65544319152832, 'ave_value': -11.351014918208474, 'soft_opc': nan} step=5848




2022-04-20 16:10.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160957/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.23 [info     ] FQE_20220420160957: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00016483595204907795, 'time_algorithm_update': 0.0034528936064520547, 'loss': 0.22522480455552077, 'time_step': 0.0036895885023959848, 'init_value': -10.197732925415039, 'ave_value': -12.010802668453874, 'soft_opc': nan} step=6192




2022-04-20 16:10.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160957/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.24 [info     ] FQE_20220420160957: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00016529268996660099, 'time_algorithm_update': 0.00353196055390114, 'loss': 0.23352910632397547, 'time_step': 0.0037718519221904665, 'init_value': -10.571847915649414, 'ave_value': -12.50264477764575, 'soft_opc': nan} step=6536




2022-04-20 16:10.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160957/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.26 [info     ] FQE_20220420160957: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00016887312711671342, 'time_algorithm_update': 0.0034600593322931333, 'loss': 0.24273308276653635, 'time_step': 0.0037015267582826837, 'init_value': -10.682382583618164, 'ave_value': -12.839641271565458, 'soft_opc': nan} step=6880




2022-04-20 16:10.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160957/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.27 [info     ] FQE_20220420160957: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00016215790149777434, 'time_algorithm_update': 0.0034700049910434457, 'loss': 0.25061602664699906, 'time_step': 0.003702430531036022, 'init_value': -10.976384162902832, 'ave_value': -13.302699718201483, 'soft_opc': nan} step=7224




2022-04-20 16:10.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160957/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.28 [info     ] FQE_20220420160957: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00016625329505565555, 'time_algorithm_update': 0.0035303235054016113, 'loss': 0.2595071786483966, 'time_step': 0.003770006257434224, 'init_value': -11.05874252319336, 'ave_value': -13.526590651503831, 'soft_opc': nan} step=7568




2022-04-20 16:10.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160957/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.30 [info     ] FQE_20220420160957: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.0001691143180048743, 'time_algorithm_update': 0.003605672093324883, 'loss': 0.27228707737332686, 'time_step': 0.003849821728329326, 'init_value': -11.414541244506836, 'ave_value': -14.026988786333115, 'soft_opc': nan} step=7912




2022-04-20 16:10.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160957/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.31 [info     ] FQE_20220420160957: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00016526704610780228, 'time_algorithm_update': 0.0034974473853443943, 'loss': 0.27467660731527693, 'time_step': 0.003738697185072788, 'init_value': -11.579957962036133, 'ave_value': -14.35953677405922, 'soft_opc': nan} step=8256




2022-04-20 16:10.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160957/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.33 [info     ] FQE_20220420160957: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.0001666365667831066, 'time_algorithm_update': 0.003572425176930982, 'loss': 0.2743179017114778, 'time_step': 0.0038148698418639425, 'init_value': -11.620487213134766, 'ave_value': -14.645968433441427, 'soft_opc': nan} step=8600




2022-04-20 16:10.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160957/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.34 [info     ] FQE_20220420160957: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00016670448835505995, 'time_algorithm_update': 0.0035625668459160383, 'loss': 0.2803631274917618, 'time_step': 0.003802713959716087, 'init_value': -11.90804672241211, 'ave_value': -15.207475309211526, 'soft_opc': nan} step=8944




2022-04-20 16:10.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160957/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.36 [info     ] FQE_20220420160957: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.0001701178938843483, 'time_algorithm_update': 0.0035423622574917105, 'loss': 0.2760917131875663, 'time_step': 0.003788370725720428, 'init_value': -11.753396987915039, 'ave_value': -15.204219441534322, 'soft_opc': nan} step=9288




2022-04-20 16:10.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160957/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.37 [info     ] FQE_20220420160957: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00016382198001063146, 'time_algorithm_update': 0.0035283849682918814, 'loss': 0.27959192296771634, 'time_step': 0.003768667925235837, 'init_value': -11.709973335266113, 'ave_value': -15.409222692907745, 'soft_opc': nan} step=9632




2022-04-20 16:10.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160957/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.38 [info     ] FQE_20220420160957: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00016489070515299952, 'time_algorithm_update': 0.0035891394282496253, 'loss': 0.2739889052490769, 'time_step': 0.0038273556287898576, 'init_value': -11.684417724609375, 'ave_value': -15.493632139814263, 'soft_opc': nan} step=9976




2022-04-20 16:10.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160957/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.40 [info     ] FQE_20220420160957: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.0001679180666457775, 'time_algorithm_update': 0.0035088741502096485, 'loss': 0.274247000113043, 'time_step': 0.0037506499955820482, 'init_value': -11.893318176269531, 'ave_value': -15.831371589964842, 'soft_opc': nan} step=10320




2022-04-20 16:10.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160957/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.41 [info     ] FQE_20220420160957: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00016744122948757437, 'time_algorithm_update': 0.0035599262215370354, 'loss': 0.27123240750218025, 'time_step': 0.0038030251514079958, 'init_value': -12.058830261230469, 'ave_value': -16.144099496907412, 'soft_opc': nan} step=10664




2022-04-20 16:10.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160957/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.43 [info     ] FQE_20220420160957: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.0001659649749134862, 'time_algorithm_update': 0.0035954339559688127, 'loss': 0.2711093783118697, 'time_step': 0.00383909497150155, 'init_value': -11.898712158203125, 'ave_value': -16.294546506468308, 'soft_opc': nan} step=11008




2022-04-20 16:10.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160957/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.44 [info     ] FQE_20220420160957: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00016085353008536406, 'time_algorithm_update': 0.0035597397837527963, 'loss': 0.26407671071112504, 'time_step': 0.0037942556447761004, 'init_value': -11.685124397277832, 'ave_value': -16.15763534991876, 'soft_opc': nan} step=11352




2022-04-20 16:10.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160957/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.46 [info     ] FQE_20220420160957: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00016834916070450183, 'time_algorithm_update': 0.003996126180471376, 'loss': 0.2543902133569814, 'time_step': 0.0042389582755953765, 'init_value': -11.633183479309082, 'ave_value': -16.24768300036174, 'soft_opc': nan} step=11696




2022-04-20 16:10.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160957/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.48 [info     ] FQE_20220420160957: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00016936174658841864, 'time_algorithm_update': 0.005136726900588634, 'loss': 0.25372592194633947, 'time_step': 0.0053816513959751575, 'init_value': -11.802026748657227, 'ave_value': -16.462978113076215, 'soft_opc': nan} step=12040




2022-04-20 16:10.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160957/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.50 [info     ] FQE_20220420160957: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.0001697124436844227, 'time_algorithm_update': 0.004997895207515982, 'loss': 0.2503536739982231, 'time_step': 0.005244252293608909, 'init_value': -11.881420135498047, 'ave_value': -16.549667975024597, 'soft_opc': nan} step=12384




2022-04-20 16:10.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160957/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.52 [info     ] FQE_20220420160957: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00017410170200259187, 'time_algorithm_update': 0.004972638778908308, 'loss': 0.24989506137167472, 'time_step': 0.005221347476160804, 'init_value': -11.859207153320312, 'ave_value': -16.672804259944723, 'soft_opc': nan} step=12728




2022-04-20 16:10.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160957/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.54 [info     ] FQE_20220420160957: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00017349803170492483, 'time_algorithm_update': 0.005023865505706432, 'loss': 0.2543999321791235, 'time_step': 0.005271867957226065, 'init_value': -12.30782699584961, 'ave_value': -17.020860368573118, 'soft_opc': nan} step=13072




2022-04-20 16:10.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160957/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.55 [info     ] FQE_20220420160957: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00016915520956349927, 'time_algorithm_update': 0.004784430875334629, 'loss': 0.2551791844510495, 'time_step': 0.005031010439229566, 'init_value': -12.721427917480469, 'ave_value': -17.512660683806974, 'soft_opc': nan} step=13416




2022-04-20 16:10.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160957/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.57 [info     ] FQE_20220420160957: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00017604855603949967, 'time_algorithm_update': 0.0050765560116878775, 'loss': 0.2576658788656946, 'time_step': 0.005330237538315529, 'init_value': -12.823938369750977, 'ave_value': -17.44781133565414, 'soft_opc': nan} step=13760




2022-04-20 16:10.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160957/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.59 [info     ] FQE_20220420160957: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00017408784045729527, 'time_algorithm_update': 0.005040768967118374, 'loss': 0.2572369611670458, 'time_step': 0.005293268103932225, 'init_value': -13.124554634094238, 'ave_value': -17.689219967723062, 'soft_opc': nan} step=14104




2022-04-20 16:10.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160957/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:11.01 [info     ] FQE_20220420160957: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00017383348110110261, 'time_algorithm_update': 0.005027053661124651, 'loss': 0.262578621344251, 'time_step': 0.0052784591220146, 'init_value': -13.331636428833008, 'ave_value': -17.765662956245762, 'soft_opc': nan} step=14448




2022-04-20 16:11.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160957/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:11.03 [info     ] FQE_20220420160957: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00017078463421311488, 'time_algorithm_update': 0.004541805317235547, 'loss': 0.2688854585948595, 'time_step': 0.004789256079252376, 'init_value': -13.559305191040039, 'ave_value': -18.056290332350862, 'soft_opc': nan} step=14792




2022-04-20 16:11.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160957/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:11.05 [info     ] FQE_20220420160957: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.0001750179501466973, 'time_algorithm_update': 0.005111571661261625, 'loss': 0.27538774988267484, 'time_step': 0.005363538514736087, 'init_value': -13.873368263244629, 'ave_value': -18.374713893907526, 'soft_opc': nan} step=15136




2022-04-20 16:11.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160957/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:11.07 [info     ] FQE_20220420160957: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00017558280811753383, 'time_algorithm_update': 0.005116888256960137, 'loss': 0.28469569651345006, 'time_step': 0.005370201759560164, 'init_value': -14.148757934570312, 'ave_value': -18.439547430104703, 'soft_opc': nan} step=15480




2022-04-20 16:11.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160957/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:11.09 [info     ] FQE_20220420160957: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00017313208690909453, 'time_algorithm_update': 0.005103174337120943, 'loss': 0.29403960553192815, 'time_step': 0.005354335834813672, 'init_value': -14.51588249206543, 'ave_value': -18.88938474918942, 'soft_opc': nan} step=15824




2022-04-20 16:11.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160957/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:11.11 [info     ] FQE_20220420160957: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00017604786296223485, 'time_algorithm_update': 0.005101786796436753, 'loss': 0.3016707986419977, 'time_step': 0.005354682373446088, 'init_value': -14.630237579345703, 'ave_value': -18.917024087160826, 'soft_opc': nan} step=16168




2022-04-20 16:11.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160957/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:11.13 [info     ] FQE_20220420160957: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00017036532246789268, 'time_algorithm_update': 0.004619976115781207, 'loss': 0.3078008577075982, 'time_step': 0.00486667558204296, 'init_value': -14.766862869262695, 'ave_value': -18.974155403904383, 'soft_opc': nan} step=16512




2022-04-20 16:11.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160957/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:11.15 [info     ] FQE_20220420160957: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.0001745695291563522, 'time_algorithm_update': 0.005054460015407828, 'loss': 0.3133826510991555, 'time_step': 0.0053076265856277114, 'init_value': -14.929309844970703, 'ave_value': -19.034737510927574, 'soft_opc': nan} step=16856




2022-04-20 16:11.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160957/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:11.17 [info     ] FQE_20220420160957: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00018485479576643124, 'time_algorithm_update': 0.005668244389600532, 'loss': 0.32303947190819093, 'time_step': 0.005929887294769287, 'init_value': -15.166177749633789, 'ave_value': -19.207149939137082, 'soft_opc': nan} step=17200




2022-04-20 16:11.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160957/model_17200.pt
search iteration:  9
using hyper params:  [0.008629976329157768, 0.00012336639894333922, 7.240777905640084e-05, 7]
2022-04-20 16:11.17 [debug    ] RoundIterator is selected.
2022-04-20 16:11.17 [info     ] Directory is created at d3rlpy_logs/TD3PlusBC_20220420161117
2022-04-20 16:11.17 [debug    ] Fitting scaler...              scaler=standard
2022-04-20 16:11.17 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 16:11.17 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 16:11.17 [info     ] Parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161117/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0086299763291

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:11.21 [info     ] TD3PlusBC_20220420161117: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.0003995776873582985, 'time_algorithm_update': 0.00862126740795827, 'critic_loss': 106.26339853297897, 'actor_loss': 3.250951210832038, 'time_step': 0.009104018322905602, 'td_error': 2.3474533142516685, 'init_value': -9.162572860717773, 'ave_value': -6.56070504128933} step=342
2022-04-20 16:11.21 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161117/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:11.25 [info     ] TD3PlusBC_20220420161117: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.0004066779599552266, 'time_algorithm_update': 0.009059436837135003, 'critic_loss': 33.523068478232936, 'actor_loss': 2.80123179697851, 'time_step': 0.009548820250215586, 'td_error': 4.208489344881104, 'init_value': -12.947166442871094, 'ave_value': -8.688022302574403} step=684
2022-04-20 16:11.25 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161117/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:11.29 [info     ] TD3PlusBC_20220420161117: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.000412118365192971, 'time_algorithm_update': 0.009053235165556969, 'critic_loss': 24.40956429710165, 'actor_loss': 2.6893079824614943, 'time_step': 0.009535321715282418, 'td_error': 3.2994724079615083, 'init_value': -17.853870391845703, 'ave_value': -11.749971286759184} step=1026
2022-04-20 16:11.29 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161117/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:11.32 [info     ] TD3PlusBC_20220420161117: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.00040172903161299854, 'time_algorithm_update': 0.008539356683429918, 'critic_loss': 24.10755726050215, 'actor_loss': 2.6340950302213257, 'time_step': 0.009012845524570398, 'td_error': 2.605442757756409, 'init_value': -23.074420928955078, 'ave_value': -15.064388479375625} step=1368
2022-04-20 16:11.32 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161117/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:11.36 [info     ] TD3PlusBC_20220420161117: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.00040419338739406297, 'time_algorithm_update': 0.00899795621459247, 'critic_loss': 25.13792167630112, 'actor_loss': 2.6041418376721834, 'time_step': 0.009475546970702055, 'td_error': 2.8456807551869874, 'init_value': -28.16583251953125, 'ave_value': -18.32498075580795} step=1710
2022-04-20 16:11.36 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161117/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:11.40 [info     ] TD3PlusBC_20220420161117: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.0004041606222676952, 'time_algorithm_update': 0.008567554211755942, 'critic_loss': 26.90107986662123, 'actor_loss': 2.589840212760613, 'time_step': 0.009051416352478385, 'td_error': 3.316665826626156, 'init_value': -32.84429931640625, 'ave_value': -21.402508954646418} step=2052
2022-04-20 16:11.40 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161117/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:11.43 [info     ] TD3PlusBC_20220420161117: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.00039858636800308675, 'time_algorithm_update': 0.009009581560279891, 'critic_loss': 28.9345248345046, 'actor_loss': 2.5824811611956324, 'time_step': 0.009484601299665128, 'td_error': 4.023809815656306, 'init_value': -37.48789596557617, 'ave_value': -24.396542585920542} step=2394
2022-04-20 16:11.43 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161117/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:11.47 [info     ] TD3PlusBC_20220420161117: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.00040439625232540374, 'time_algorithm_update': 0.008907589996070192, 'critic_loss': 31.38016590876886, 'actor_loss': 2.579648526788455, 'time_step': 0.009385974783646433, 'td_error': 4.730703522450567, 'init_value': -41.770355224609375, 'ave_value': -27.277158708685487} step=2736
2022-04-20 16:11.47 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161117/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:11.51 [info     ] TD3PlusBC_20220420161117: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.00040064081113938, 'time_algorithm_update': 0.008688221200864914, 'critic_loss': 34.047468771014294, 'actor_loss': 2.5770067889787995, 'time_step': 0.00916320190095065, 'td_error': 5.596032306130659, 'init_value': -45.9023323059082, 'ave_value': -29.97859967071345} step=3078
2022-04-20 16:11.51 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161117/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:11.55 [info     ] TD3PlusBC_20220420161117: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.0004034788287871065, 'time_algorithm_update': 0.009050848888374909, 'critic_loss': 36.99912087122599, 'actor_loss': 2.576250168315151, 'time_step': 0.009529645679986964, 'td_error': 6.427429835806602, 'init_value': -49.722618103027344, 'ave_value': -32.58179425240117} step=3420
2022-04-20 16:11.55 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161117/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:11.58 [info     ] TD3PlusBC_20220420161117: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.0004018879773323996, 'time_algorithm_update': 0.00865010769046538, 'critic_loss': 40.03707470810204, 'actor_loss': 2.571960877256784, 'time_step': 0.009131014695641591, 'td_error': 7.313256675196148, 'init_value': -53.44602584838867, 'ave_value': -35.020917413420804} step=3762
2022-04-20 16:11.58 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161117/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:12.02 [info     ] TD3PlusBC_20220420161117: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.000397569951955338, 'time_algorithm_update': 0.008872600326761168, 'critic_loss': 43.38946099866901, 'actor_loss': 2.571048367093181, 'time_step': 0.0093478208396867, 'td_error': 8.136615219566558, 'init_value': -56.70356369018555, 'ave_value': -37.261314920361514} step=4104
2022-04-20 16:12.02 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161117/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:12.06 [info     ] TD3PlusBC_20220420161117: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.0004042847114696837, 'time_algorithm_update': 0.009028913681967217, 'critic_loss': 46.656699721576175, 'actor_loss': 2.5703465952510722, 'time_step': 0.00951532871402495, 'td_error': 9.038606433465894, 'init_value': -59.91571044921875, 'ave_value': -39.461501905322244} step=4446
2022-04-20 16:12.06 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161117/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:12.09 [info     ] TD3PlusBC_20220420161117: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.0004067483701203999, 'time_algorithm_update': 0.008637409461172004, 'critic_loss': 50.03458630411249, 'actor_loss': 2.5687237218109487, 'time_step': 0.009122213424994932, 'td_error': 9.86008539370492, 'init_value': -62.66839599609375, 'ave_value': -41.42125776402552} step=4788
2022-04-20 16:12.09 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161117/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:12.13 [info     ] TD3PlusBC_20220420161117: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.0004001974362378929, 'time_algorithm_update': 0.00901751002373054, 'critic_loss': 53.47802145439282, 'actor_loss': 2.5689691353959647, 'time_step': 0.009501134443004228, 'td_error': 10.69850003518892, 'init_value': -65.39338684082031, 'ave_value': -43.36967788108834} step=5130
2022-04-20 16:12.13 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161117/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:12.17 [info     ] TD3PlusBC_20220420161117: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.0003997387244687443, 'time_algorithm_update': 0.00864921187796788, 'critic_loss': 56.64953229580706, 'actor_loss': 2.5679012446375618, 'time_step': 0.009125402099207827, 'td_error': 11.518163959833009, 'init_value': -67.91905212402344, 'ave_value': -45.10391780359033} step=5472
2022-04-20 16:12.17 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161117/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:12.21 [info     ] TD3PlusBC_20220420161117: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.0003974660795334487, 'time_algorithm_update': 0.00901854177664595, 'critic_loss': 59.7194444254825, 'actor_loss': 2.566944133468539, 'time_step': 0.009493499471430192, 'td_error': 12.249420180256738, 'init_value': -70.26749420166016, 'ave_value': -46.82795303744339} step=5814
2022-04-20 16:12.21 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161117/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:12.24 [info     ] TD3PlusBC_20220420161117: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.0003985689397443805, 'time_algorithm_update': 0.008998876426652161, 'critic_loss': 62.87185772120604, 'actor_loss': 2.5658556996730337, 'time_step': 0.009479172745643303, 'td_error': 13.075077714956358, 'init_value': -72.7586898803711, 'ave_value': -48.52827941774466} step=6156
2022-04-20 16:12.24 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161117/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:12.28 [info     ] TD3PlusBC_20220420161117: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.00039177122171859293, 'time_algorithm_update': 0.008456129079673722, 'critic_loss': 65.9746211314062, 'actor_loss': 2.5660907120732537, 'time_step': 0.008926588889451055, 'td_error': 13.81300565487147, 'init_value': -74.92898559570312, 'ave_value': -50.0933813390416} step=6498
2022-04-20 16:12.28 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161117/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:12.32 [info     ] TD3PlusBC_20220420161117: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.00039845739888866045, 'time_algorithm_update': 0.008930472602621156, 'critic_loss': 68.89152148174263, 'actor_loss': 2.5656126605139837, 'time_step': 0.009411782549138655, 'td_error': 14.384169821609362, 'init_value': -76.63542175292969, 'ave_value': -51.343109347978945} step=6840
2022-04-20 16:12.32 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161117/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:12.35 [info     ] TD3PlusBC_20220420161117: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.0003975831974319547, 'time_algorithm_update': 0.008543489272134346, 'critic_loss': 71.67301412214313, 'actor_loss': 2.565619252578557, 'time_step': 0.00902136654881706, 'td_error': 15.055850260964823, 'init_value': -78.67726135253906, 'ave_value': -52.74122901409} step=7182
2022-04-20 16:12.35 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161117/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:12.39 [info     ] TD3PlusBC_20220420161117: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.0003986749035573145, 'time_algorithm_update': 0.009007630292435138, 'critic_loss': 74.23266727603666, 'actor_loss': 2.5648899050483926, 'time_step': 0.009486877430252165, 'td_error': 15.68375339766797, 'init_value': -80.3013687133789, 'ave_value': -54.02772110065269} step=7524
2022-04-20 16:12.39 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161117/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:12.43 [info     ] TD3PlusBC_20220420161117: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.00039931277782596344, 'time_algorithm_update': 0.009020784444976272, 'critic_loss': 76.61219539976956, 'actor_loss': 2.5648114932210824, 'time_step': 0.00950210554557934, 'td_error': 16.205089395268296, 'init_value': -81.73722076416016, 'ave_value': -55.15342903801374} step=7866
2022-04-20 16:12.43 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161117/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:12.47 [info     ] TD3PlusBC_20220420161117: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.00040173321439508804, 'time_algorithm_update': 0.008617163401598122, 'critic_loss': 79.07701894414355, 'actor_loss': 2.564091463535153, 'time_step': 0.009096647563733552, 'td_error': 16.757800620527526, 'init_value': -83.36497497558594, 'ave_value': -56.30409000492747} step=8208
2022-04-20 16:12.47 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161117/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:12.50 [info     ] TD3PlusBC_20220420161117: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.00040065963365878277, 'time_algorithm_update': 0.008948010310792086, 'critic_loss': 81.2872009054262, 'actor_loss': 2.563572102819967, 'time_step': 0.009432271907204077, 'td_error': 17.2893548753408, 'init_value': -84.73664855957031, 'ave_value': -57.3252668221456} step=8550
2022-04-20 16:12.50 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161117/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:12.54 [info     ] TD3PlusBC_20220420161117: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.00039412612803498204, 'time_algorithm_update': 0.008587446826243262, 'critic_loss': 83.3469331083242, 'actor_loss': 2.564145123052318, 'time_step': 0.009064040685954847, 'td_error': 17.874920553598713, 'init_value': -86.02767944335938, 'ave_value': -58.32512639433137} step=8892
2022-04-20 16:12.54 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161117/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:12.58 [info     ] TD3PlusBC_20220420161117: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.00040381623987565963, 'time_algorithm_update': 0.009058658142536008, 'critic_loss': 85.3873664342869, 'actor_loss': 2.5644054663808724, 'time_step': 0.009541617499457465, 'td_error': 18.263974775608645, 'init_value': -87.37373352050781, 'ave_value': -59.33167033801661} step=9234
2022-04-20 16:12.58 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161117/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:13.02 [info     ] TD3PlusBC_20220420161117: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.00039628026081107515, 'time_algorithm_update': 0.008946560976798074, 'critic_loss': 87.13008902923406, 'actor_loss': 2.5640257269318343, 'time_step': 0.009420654229950486, 'td_error': 18.798043388092914, 'init_value': -88.51730346679688, 'ave_value': -60.25845983450729} step=9576
2022-04-20 16:13.02 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161117/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:13.05 [info     ] TD3PlusBC_20220420161117: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.00040054948706375927, 'time_algorithm_update': 0.008565201396830598, 'critic_loss': 88.79004147178249, 'actor_loss': 2.563928040844655, 'time_step': 0.009046190663387901, 'td_error': 19.139473202726773, 'init_value': -89.55223083496094, 'ave_value': -61.0463641487844} step=9918
2022-04-20 16:13.05 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161117/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:13.09 [info     ] TD3PlusBC_20220420161117: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.0003983319154259754, 'time_algorithm_update': 0.008980158476801644, 'critic_loss': 90.38990691112495, 'actor_loss': 2.5647615033980697, 'time_step': 0.00946224572365744, 'td_error': 19.492010327337475, 'init_value': -90.34371185302734, 'ave_value': -61.72576203294031} step=10260
2022-04-20 16:13.09 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161117/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:13.13 [info     ] TD3PlusBC_20220420161117: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.00040163701040702956, 'time_algorithm_update': 0.00867347410547803, 'critic_loss': 91.78347124690897, 'actor_loss': 2.5641087105399682, 'time_step': 0.009155537649901986, 'td_error': 19.80193497743951, 'init_value': -91.24900817871094, 'ave_value': -62.461308577607916} step=10602
2022-04-20 16:13.13 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161117/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:13.16 [info     ] TD3PlusBC_20220420161117: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.00039963833769859626, 'time_algorithm_update': 0.00894744772660105, 'critic_loss': 93.32992507979186, 'actor_loss': 2.563872037575259, 'time_step': 0.009423980935972336, 'td_error': 20.088654934473546, 'init_value': -92.0748062133789, 'ave_value': -63.160035348746355} step=10944
2022-04-20 16:13.16 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161117/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:13.20 [info     ] TD3PlusBC_20220420161117: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.00040292391302989937, 'time_algorithm_update': 0.008987389112773695, 'critic_loss': 94.60576570522018, 'actor_loss': 2.564593916050872, 'time_step': 0.00947030594474391, 'td_error': 20.494234363393797, 'init_value': -93.06037139892578, 'ave_value': -63.87630985519597} step=11286
2022-04-20 16:13.20 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161117/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:13.24 [info     ] TD3PlusBC_20220420161117: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.00039281273445887873, 'time_algorithm_update': 0.008661459063925939, 'critic_loss': 95.91021506549322, 'actor_loss': 2.564811480672736, 'time_step': 0.009127996818364015, 'td_error': 20.88883852300863, 'init_value': -93.79125213623047, 'ave_value': -64.5002273316282} step=11628
2022-04-20 16:13.24 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161117/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:13.28 [info     ] TD3PlusBC_20220420161117: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.00040103817543788264, 'time_algorithm_update': 0.008942717697188171, 'critic_loss': 97.03874149657132, 'actor_loss': 2.563919686434562, 'time_step': 0.009423861029552437, 'td_error': 21.140526199545278, 'init_value': -94.60172271728516, 'ave_value': -65.20094912300054} step=11970
2022-04-20 16:13.28 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161117/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:13.31 [info     ] TD3PlusBC_20220420161117: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.00040131354192544146, 'time_algorithm_update': 0.008592987618251153, 'critic_loss': 98.05835761800844, 'actor_loss': 2.564154170409978, 'time_step': 0.009075227536653218, 'td_error': 21.38920267623957, 'init_value': -95.26593017578125, 'ave_value': -65.71139702447137} step=12312
2022-04-20 16:13.31 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161117/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:13.35 [info     ] TD3PlusBC_20220420161117: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.0004007307409543043, 'time_algorithm_update': 0.008936520905522575, 'critic_loss': 98.98136278342085, 'actor_loss': 2.5646841428433245, 'time_step': 0.009414813671892846, 'td_error': 21.63182260099841, 'init_value': -95.94744873046875, 'ave_value': -66.26168360132019} step=12654
2022-04-20 16:13.35 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161117/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:13.39 [info     ] TD3PlusBC_20220420161117: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.00039929256104586416, 'time_algorithm_update': 0.008940924677932472, 'critic_loss': 100.07819881773831, 'actor_loss': 2.5650321196394357, 'time_step': 0.009418677865413198, 'td_error': 21.950229256857828, 'init_value': -96.63362884521484, 'ave_value': -66.748979853639} step=12996
2022-04-20 16:13.39 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161117/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:13.42 [info     ] TD3PlusBC_20220420161117: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.00040258441055030154, 'time_algorithm_update': 0.008721240779809784, 'critic_loss': 100.8795806037055, 'actor_loss': 2.5647439524444224, 'time_step': 0.00920338100857205, 'td_error': 22.039165906074203, 'init_value': -97.00860595703125, 'ave_value': -67.2196680217384} step=13338
2022-04-20 16:13.42 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161117/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:13.46 [info     ] TD3PlusBC_20220420161117: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.0003966657738936575, 'time_algorithm_update': 0.009028990366305525, 'critic_loss': 101.59232294629192, 'actor_loss': 2.5648065366243062, 'time_step': 0.009500845831040054, 'td_error': 22.2667175448124, 'init_value': -97.49270629882812, 'ave_value': -67.59574868433597} step=13680
2022-04-20 16:13.46 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161117/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:13.50 [info     ] TD3PlusBC_20220420161117: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.00039595818659018354, 'time_algorithm_update': 0.008628041423552217, 'critic_loss': 102.3862472444947, 'actor_loss': 2.5648106775785746, 'time_step': 0.009097265221222102, 'td_error': 22.34509614941947, 'init_value': -97.72257995605469, 'ave_value': -67.90911178358076} step=14022
2022-04-20 16:13.50 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161117/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:13.54 [info     ] TD3PlusBC_20220420161117: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.00039853478035731623, 'time_algorithm_update': 0.008950579933255737, 'critic_loss': 103.09201485929434, 'actor_loss': 2.564347625475878, 'time_step': 0.00941925020942911, 'td_error': 22.594155078880725, 'init_value': -98.29083251953125, 'ave_value': -68.40314396181655} step=14364
2022-04-20 16:13.54 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161117/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:13.57 [info     ] TD3PlusBC_20220420161117: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.00039444541373448064, 'time_algorithm_update': 0.009013921893828096, 'critic_loss': 103.80887455410428, 'actor_loss': 2.5648505534344945, 'time_step': 0.009471291687056335, 'td_error': 22.838577022008128, 'init_value': -98.87440490722656, 'ave_value': -68.83562721239302} step=14706
2022-04-20 16:13.57 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161117/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:14.01 [info     ] TD3PlusBC_20220420161117: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.00040087853258813334, 'time_algorithm_update': 0.0085621326290376, 'critic_loss': 104.30816214265879, 'actor_loss': 2.565476686633818, 'time_step': 0.009029127003853782, 'td_error': 22.888064376451325, 'init_value': -99.13392639160156, 'ave_value': -69.06994096894421} step=15048
2022-04-20 16:14.01 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161117/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:14.05 [info     ] TD3PlusBC_20220420161117: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.0003999144013165033, 'time_algorithm_update': 0.008881152024743154, 'critic_loss': 104.99575231228656, 'actor_loss': 2.5645835650594613, 'time_step': 0.009348833770082709, 'td_error': 23.05156261092291, 'init_value': -99.47245788574219, 'ave_value': -69.48055733602511} step=15390
2022-04-20 16:14.05 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161117/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:14.08 [info     ] TD3PlusBC_20220420161117: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.000400621291489629, 'time_algorithm_update': 0.008624565531635842, 'critic_loss': 105.38197190580313, 'actor_loss': 2.5653010279114485, 'time_step': 0.009090627146046065, 'td_error': 23.12120070451451, 'init_value': -99.69140625, 'ave_value': -69.66466262008181} step=15732
2022-04-20 16:14.08 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161117/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:14.12 [info     ] TD3PlusBC_20220420161117: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.0004029308843333819, 'time_algorithm_update': 0.00903100855866371, 'critic_loss': 105.9153877949854, 'actor_loss': 2.5650829557786907, 'time_step': 0.009500693856624135, 'td_error': 23.29914902854775, 'init_value': -100.15122985839844, 'ave_value': -70.09292926136908} step=16074
2022-04-20 16:14.12 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161117/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:14.16 [info     ] TD3PlusBC_20220420161117: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.00040325574707566645, 'time_algorithm_update': 0.00901385287792362, 'critic_loss': 106.50314644484492, 'actor_loss': 2.566198494002136, 'time_step': 0.00948747696235166, 'td_error': 23.4056266871197, 'init_value': -100.59796142578125, 'ave_value': -70.41662364429543} step=16416
2022-04-20 16:14.16 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161117/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:14.19 [info     ] TD3PlusBC_20220420161117: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.00040611816428558174, 'time_algorithm_update': 0.008600065582676938, 'critic_loss': 107.1993978734602, 'actor_loss': 2.5663623307880603, 'time_step': 0.009078986463490982, 'td_error': 23.585740426403614, 'init_value': -100.92584991455078, 'ave_value': -70.66760197118957} step=16758
2022-04-20 16:14.19 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161117/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:14.23 [info     ] TD3PlusBC_20220420161117: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.0004008067281622636, 'time_algorithm_update': 0.00901955400991161, 'critic_loss': 107.61190774705675, 'actor_loss': 2.5659798181544966, 'time_step': 0.009486836299561617, 'td_error': 23.577405457465225, 'init_value': -101.04194641113281, 'ave_value': -70.89978523825346} step=17100
2022-04-20 16:14.23 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161117/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 102 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.28189108e-01 

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 16:14.24 [info     ] FQE_20220420161423: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.0001703112958425499, 'time_algorithm_update': 0.005064303616443312, 'loss': 0.008792233215757164, 'time_step': 0.005308730056486934, 'init_value': -0.2495371401309967, 'ave_value': -0.13202023981222966, 'soft_opc': nan} step=166




2022-04-20 16:14.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161423/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:14.25 [info     ] FQE_20220420161423: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00016460217625261788, 'time_algorithm_update': 0.005054575851164669, 'loss': 0.006227459235352476, 'time_step': 0.005295614162123347, 'init_value': -0.4121527075767517, 'ave_value': -0.22921218670535465, 'soft_opc': nan} step=332




2022-04-20 16:14.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161423/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:14.26 [info     ] FQE_20220420161423: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.0001616779580173722, 'time_algorithm_update': 0.004329701504075384, 'loss': 0.005541419228875494, 'time_step': 0.00456395063055567, 'init_value': -0.4522031247615814, 'ave_value': -0.23326424743879487, 'soft_opc': nan} step=498




2022-04-20 16:14.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161423/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:14.27 [info     ] FQE_20220420161423: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00016916085438555982, 'time_algorithm_update': 0.005015617393585573, 'loss': 0.005747269234530269, 'time_step': 0.005257549056087632, 'init_value': -0.570514440536499, 'ave_value': -0.2981754247099161, 'soft_opc': nan} step=664




2022-04-20 16:14.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161423/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:14.28 [info     ] FQE_20220420161423: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00016727217708725528, 'time_algorithm_update': 0.005154313811336656, 'loss': 0.005456291353722742, 'time_step': 0.005395652299903962, 'init_value': -0.647151529788971, 'ave_value': -0.3383383980348102, 'soft_opc': nan} step=830




2022-04-20 16:14.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161423/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:14.29 [info     ] FQE_20220420161423: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00016963625528726233, 'time_algorithm_update': 0.0050166816596525264, 'loss': 0.005250287381090583, 'time_step': 0.0052613982235092715, 'init_value': -0.6468671560287476, 'ave_value': -0.3255414764103186, 'soft_opc': nan} step=996




2022-04-20 16:14.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161423/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:14.30 [info     ] FQE_20220420161423: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00016979424350232962, 'time_algorithm_update': 0.005113914788487446, 'loss': 0.0050984203986576705, 'time_step': 0.005357422024370676, 'init_value': -0.7119274139404297, 'ave_value': -0.36068282020571935, 'soft_opc': nan} step=1162




2022-04-20 16:14.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161423/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:14.31 [info     ] FQE_20220420161423: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.0001684039472097374, 'time_algorithm_update': 0.005045712712299393, 'loss': 0.004880738402548504, 'time_step': 0.005291308265134513, 'init_value': -0.7642203569412231, 'ave_value': -0.3788268529684157, 'soft_opc': nan} step=1328




2022-04-20 16:14.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161423/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:14.32 [info     ] FQE_20220420161423: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.0001690272825310029, 'time_algorithm_update': 0.0052365538585616885, 'loss': 0.004507292433731617, 'time_step': 0.00548255156321698, 'init_value': -0.822843074798584, 'ave_value': -0.41623062433440006, 'soft_opc': nan} step=1494




2022-04-20 16:14.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161423/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:14.33 [info     ] FQE_20220420161423: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.0001687213598963726, 'time_algorithm_update': 0.0051894949143191415, 'loss': 0.004974593879677445, 'time_step': 0.005434415426598974, 'init_value': -0.8761410713195801, 'ave_value': -0.4437041557393968, 'soft_opc': nan} step=1660




2022-04-20 16:14.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161423/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:14.34 [info     ] FQE_20220420161423: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00016769156398543393, 'time_algorithm_update': 0.005105932074857046, 'loss': 0.005007739570861435, 'time_step': 0.005346200552331396, 'init_value': -0.929726243019104, 'ave_value': -0.46307866290413047, 'soft_opc': nan} step=1826




2022-04-20 16:14.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161423/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:14.35 [info     ] FQE_20220420161423: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00016740287642881093, 'time_algorithm_update': 0.0047624570777617305, 'loss': 0.005458183818726789, 'time_step': 0.005003182284803276, 'init_value': -1.005598545074463, 'ave_value': -0.5177228339352049, 'soft_opc': nan} step=1992




2022-04-20 16:14.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161423/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:14.36 [info     ] FQE_20220420161423: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00016583304807364223, 'time_algorithm_update': 0.004800476223589426, 'loss': 0.005927957161694361, 'time_step': 0.0050403741468866186, 'init_value': -1.0005059242248535, 'ave_value': -0.4761903540222897, 'soft_opc': nan} step=2158




2022-04-20 16:14.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161423/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:14.37 [info     ] FQE_20220420161423: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.0001638625041548028, 'time_algorithm_update': 0.005066624606948301, 'loss': 0.006208877377941397, 'time_step': 0.005304345165390566, 'init_value': -1.0500885248184204, 'ave_value': -0.4802219354706976, 'soft_opc': nan} step=2324




2022-04-20 16:14.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161423/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:14.38 [info     ] FQE_20220420161423: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00017103516911885826, 'time_algorithm_update': 0.0051054150225168255, 'loss': 0.006316532407528216, 'time_step': 0.005350140203912574, 'init_value': -1.1106950044631958, 'ave_value': -0.5054625281325734, 'soft_opc': nan} step=2490




2022-04-20 16:14.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161423/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:14.39 [info     ] FQE_20220420161423: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00016589193459016732, 'time_algorithm_update': 0.005124560321669981, 'loss': 0.007408677283337856, 'time_step': 0.005362394344375794, 'init_value': -1.2112317085266113, 'ave_value': -0.5685703743626741, 'soft_opc': nan} step=2656




2022-04-20 16:14.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161423/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:14.40 [info     ] FQE_20220420161423: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00016891956329345703, 'time_algorithm_update': 0.005074187933680523, 'loss': 0.008014337919899318, 'time_step': 0.005316431263843215, 'init_value': -1.2888685464859009, 'ave_value': -0.6175204776739283, 'soft_opc': nan} step=2822




2022-04-20 16:14.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161423/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:14.41 [info     ] FQE_20220420161423: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00016608152044824808, 'time_algorithm_update': 0.004966633865632206, 'loss': 0.008866119171150523, 'time_step': 0.005206212939986263, 'init_value': -1.2829573154449463, 'ave_value': -0.5921975603729889, 'soft_opc': nan} step=2988




2022-04-20 16:14.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161423/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:14.42 [info     ] FQE_20220420161423: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00016781651830098717, 'time_algorithm_update': 0.005045490092541798, 'loss': 0.010117947131659028, 'time_step': 0.005287907209741064, 'init_value': -1.3461147546768188, 'ave_value': -0.6131067878373706, 'soft_opc': nan} step=3154




2022-04-20 16:14.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161423/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:14.43 [info     ] FQE_20220420161423: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.0001727672944585961, 'time_algorithm_update': 0.005022024533834802, 'loss': 0.010957394794600242, 'time_step': 0.005269062088196536, 'init_value': -1.457563877105713, 'ave_value': -0.6809148974876079, 'soft_opc': nan} step=3320




2022-04-20 16:14.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161423/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:14.44 [info     ] FQE_20220420161423: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00016713286020669592, 'time_algorithm_update': 0.00507756026394396, 'loss': 0.01161738331513816, 'time_step': 0.005317139338298017, 'init_value': -1.4973819255828857, 'ave_value': -0.6988930646191678, 'soft_opc': nan} step=3486




2022-04-20 16:14.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161423/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:14.44 [info     ] FQE_20220420161423: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00016203271337302336, 'time_algorithm_update': 0.004408033497362252, 'loss': 0.012663722062113145, 'time_step': 0.004640721413026373, 'init_value': -1.5464112758636475, 'ave_value': -0.7275612438340915, 'soft_opc': nan} step=3652




2022-04-20 16:14.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161423/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:14.45 [info     ] FQE_20220420161423: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00016892387206295887, 'time_algorithm_update': 0.0051669327609510304, 'loss': 0.01394979021349271, 'time_step': 0.005413118615207902, 'init_value': -1.6510009765625, 'ave_value': -0.8138202341843968, 'soft_opc': nan} step=3818




2022-04-20 16:14.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161423/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:14.46 [info     ] FQE_20220420161423: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00017012745501047158, 'time_algorithm_update': 0.005100909485874406, 'loss': 0.014433970974326268, 'time_step': 0.005346186189766389, 'init_value': -1.7045646905899048, 'ave_value': -0.8239902726500421, 'soft_opc': nan} step=3984




2022-04-20 16:14.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161423/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:14.47 [info     ] FQE_20220420161423: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.0001702150666570089, 'time_algorithm_update': 0.00508673937923937, 'loss': 0.016386206087891955, 'time_step': 0.005330071391829525, 'init_value': -1.7706551551818848, 'ave_value': -0.8904562424750881, 'soft_opc': nan} step=4150




2022-04-20 16:14.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161423/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:14.48 [info     ] FQE_20220420161423: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00016750054187085256, 'time_algorithm_update': 0.005078874438642019, 'loss': 0.017380833357897956, 'time_step': 0.005318246692059988, 'init_value': -1.8932044506072998, 'ave_value': -0.9741152506422353, 'soft_opc': nan} step=4316




2022-04-20 16:14.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161423/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:14.49 [info     ] FQE_20220420161423: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00016711131635918674, 'time_algorithm_update': 0.005133636026497346, 'loss': 0.01875221897379484, 'time_step': 0.005376860319849956, 'init_value': -1.912582516670227, 'ave_value': -0.970707139270464, 'soft_opc': nan} step=4482




2022-04-20 16:14.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161423/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:14.50 [info     ] FQE_20220420161423: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.0001746114478053817, 'time_algorithm_update': 0.005032346909304699, 'loss': 0.019782123078047914, 'time_step': 0.005283724830811282, 'init_value': -1.9313782453536987, 'ave_value': -0.9757529491348914, 'soft_opc': nan} step=4648




2022-04-20 16:14.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161423/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:14.51 [info     ] FQE_20220420161423: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.0001645562160445983, 'time_algorithm_update': 0.005073073398636048, 'loss': 0.019138411254094666, 'time_step': 0.005311211907719991, 'init_value': -2.0422017574310303, 'ave_value': -1.039769219638159, 'soft_opc': nan} step=4814




2022-04-20 16:14.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161423/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:14.52 [info     ] FQE_20220420161423: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00016503879822880388, 'time_algorithm_update': 0.005021890961980245, 'loss': 0.022288930833098446, 'time_step': 0.0052598212138716, 'init_value': -2.032484531402588, 'ave_value': -1.0236890977164645, 'soft_opc': nan} step=4980




2022-04-20 16:14.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161423/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:14.53 [info     ] FQE_20220420161423: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00016177418720291322, 'time_algorithm_update': 0.00405639625457396, 'loss': 0.023222145533290165, 'time_step': 0.004289311098765178, 'init_value': -2.112516164779663, 'ave_value': -1.055673025916792, 'soft_opc': nan} step=5146




2022-04-20 16:14.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161423/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:14.54 [info     ] FQE_20220420161423: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00016585890069065323, 'time_algorithm_update': 0.0050740371267479585, 'loss': 0.025279198598582298, 'time_step': 0.005308445677699813, 'init_value': -2.112335205078125, 'ave_value': -1.0174522040806173, 'soft_opc': nan} step=5312




2022-04-20 16:14.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161423/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:14.55 [info     ] FQE_20220420161423: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00016555728682552474, 'time_algorithm_update': 0.005132237112665751, 'loss': 0.02609192300728152, 'time_step': 0.0053731734494128856, 'init_value': -2.0822105407714844, 'ave_value': -0.9846831796212575, 'soft_opc': nan} step=5478




2022-04-20 16:14.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161423/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:14.56 [info     ] FQE_20220420161423: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00017319673515227903, 'time_algorithm_update': 0.005083693079201572, 'loss': 0.028930123436758406, 'time_step': 0.005332356475921999, 'init_value': -2.0872178077697754, 'ave_value': -0.9726346331371649, 'soft_opc': nan} step=5644




2022-04-20 16:14.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161423/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:14.57 [info     ] FQE_20220420161423: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00016906749771302006, 'time_algorithm_update': 0.005087858223053346, 'loss': 0.030346229969781924, 'time_step': 0.0053286236452769085, 'init_value': -2.3325321674346924, 'ave_value': -1.1670812760454576, 'soft_opc': nan} step=5810




2022-04-20 16:14.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161423/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:14.58 [info     ] FQE_20220420161423: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00016722047185323326, 'time_algorithm_update': 0.004994525966874088, 'loss': 0.03258373683489612, 'time_step': 0.005239733730454043, 'init_value': -2.325932502746582, 'ave_value': -1.1327675958608723, 'soft_opc': nan} step=5976




2022-04-20 16:14.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161423/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:14.59 [info     ] FQE_20220420161423: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00016670054700001176, 'time_algorithm_update': 0.005047124552439494, 'loss': 0.032805690775395385, 'time_step': 0.005287655864853457, 'init_value': -2.4061803817749023, 'ave_value': -1.1857657652297937, 'soft_opc': nan} step=6142




2022-04-20 16:14.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161423/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:15.00 [info     ] FQE_20220420161423: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.0001675421933093703, 'time_algorithm_update': 0.005021017718027873, 'loss': 0.03526930487434189, 'time_step': 0.00526458671294063, 'init_value': -2.4616875648498535, 'ave_value': -1.2208990509512725, 'soft_opc': nan} step=6308




2022-04-20 16:15.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161423/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:15.01 [info     ] FQE_20220420161423: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.0001729841691901885, 'time_algorithm_update': 0.005055082849709384, 'loss': 0.038510355607505486, 'time_step': 0.005306430609829454, 'init_value': -2.416351795196533, 'ave_value': -1.1494461458520377, 'soft_opc': nan} step=6474




2022-04-20 16:15.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161423/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:15.02 [info     ] FQE_20220420161423: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00016839676592723433, 'time_algorithm_update': 0.0041795581220144245, 'loss': 0.03926706341016723, 'time_step': 0.004419433065207608, 'init_value': -2.4824881553649902, 'ave_value': -1.1848165365137362, 'soft_opc': nan} step=6640




2022-04-20 16:15.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161423/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:15.02 [info     ] FQE_20220420161423: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00016997952059090855, 'time_algorithm_update': 0.005001866673848715, 'loss': 0.042011996274617644, 'time_step': 0.005245327949523926, 'init_value': -2.6312451362609863, 'ave_value': -1.2854844981418536, 'soft_opc': nan} step=6806




2022-04-20 16:15.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161423/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:15.03 [info     ] FQE_20220420161423: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00017286639615713832, 'time_algorithm_update': 0.005196094512939453, 'loss': 0.04346457898128688, 'time_step': 0.005442965461547117, 'init_value': -2.630267381668091, 'ave_value': -1.2909252762207175, 'soft_opc': nan} step=6972




2022-04-20 16:15.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161423/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:15.04 [info     ] FQE_20220420161423: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00016958024128373847, 'time_algorithm_update': 0.005195537245417216, 'loss': 0.04567932561516807, 'time_step': 0.005439738193190241, 'init_value': -2.7014899253845215, 'ave_value': -1.3212327563984168, 'soft_opc': nan} step=7138




2022-04-20 16:15.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161423/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:15.05 [info     ] FQE_20220420161423: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00016780071947948043, 'time_algorithm_update': 0.005066545612840767, 'loss': 0.047770933647842025, 'time_step': 0.005307905645255583, 'init_value': -2.8245668411254883, 'ave_value': -1.4011800538731722, 'soft_opc': nan} step=7304




2022-04-20 16:15.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161423/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:15.06 [info     ] FQE_20220420161423: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00016946964953319137, 'time_algorithm_update': 0.005161183426179081, 'loss': 0.04950812433184271, 'time_step': 0.005405078451317477, 'init_value': -2.892333745956421, 'ave_value': -1.4005717304325453, 'soft_opc': nan} step=7470




2022-04-20 16:15.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161423/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:15.07 [info     ] FQE_20220420161423: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.000163377049457596, 'time_algorithm_update': 0.0050826043967741085, 'loss': 0.05325509891957494, 'time_step': 0.005319315266896443, 'init_value': -2.898245334625244, 'ave_value': -1.422572934661094, 'soft_opc': nan} step=7636




2022-04-20 16:15.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161423/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:15.08 [info     ] FQE_20220420161423: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00016648941729442183, 'time_algorithm_update': 0.005157497991998512, 'loss': 0.05529420730287591, 'time_step': 0.005401242210204343, 'init_value': -2.9403457641601562, 'ave_value': -1.4067401532822095, 'soft_opc': nan} step=7802




2022-04-20 16:15.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161423/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:15.09 [info     ] FQE_20220420161423: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.0001671529677977045, 'time_algorithm_update': 0.005124100719589785, 'loss': 0.05632290813555739, 'time_step': 0.005366858229579696, 'init_value': -3.0407121181488037, 'ave_value': -1.4861100070413438, 'soft_opc': nan} step=7968




2022-04-20 16:15.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161423/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:15.10 [info     ] FQE_20220420161423: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00016521545777837914, 'time_algorithm_update': 0.004547927753034845, 'loss': 0.05847888241146119, 'time_step': 0.004783656223710761, 'init_value': -3.155582904815674, 'ave_value': -1.5707448205667305, 'soft_opc': nan} step=8134




2022-04-20 16:15.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161423/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:15.11 [info     ] FQE_20220420161423: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00016575549022260918, 'time_algorithm_update': 0.00488447855754071, 'loss': 0.061126241600701965, 'time_step': 0.005123688513974109, 'init_value': -3.1719167232513428, 'ave_value': -1.602746074639999, 'soft_opc': nan} step=8300




2022-04-20 16:15.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161423/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.43210892e-01 -3.25999953e-02
 -1.38000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.28049971e-01  6.00000000e-01  4.67532035e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.87189108e-01  2.46000047e-02
 -8.00013420e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.61927906e-02  2.08952959e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.53789108e-01  1.32000047e-02
  8.79998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -9.71586383e-02 -6.00000000e-01 -5.33093061e-02]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.39489108e-01 -4.75999953e-02
 -9.30001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  1.41298638e-01  5.43892365e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.9573146

2022-04-20 16:15.11 [info     ] Directory is created at d3rlpy_logs/FQE_20220420161511
2022-04-20 16:15.11 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 16:15.11 [debug    ] Building models...
2022-04-20 16:15.11 [debug    ] Models have been built.
2022-04-20 16:15.11 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420161511/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/355 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 16:15.13 [info     ] FQE_20220420161511: epoch=1 step=355 epoch=1 metrics={'time_sample_batch': 0.0001684800000257895, 'time_algorithm_update': 0.005063967637612786, 'loss': 0.028826678491814035, 'time_step': 0.005308222434890103, 'init_value': -1.0570061206817627, 'ave_value': -1.0511216725430126, 'soft_opc': nan} step=355




2022-04-20 16:15.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161511/model_355.pt


Epoch 2/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:15.15 [info     ] FQE_20220420161511: epoch=2 step=710 epoch=2 metrics={'time_sample_batch': 0.00016994274837869993, 'time_algorithm_update': 0.005039915568392042, 'loss': 0.026230035968859432, 'time_step': 0.005286428961955325, 'init_value': -2.0794947147369385, 'ave_value': -2.0592413698691043, 'soft_opc': nan} step=710




2022-04-20 16:15.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161511/model_710.pt


Epoch 3/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:15.18 [info     ] FQE_20220420161511: epoch=3 step=1065 epoch=3 metrics={'time_sample_batch': 0.00017122886550258583, 'time_algorithm_update': 0.005118703842163086, 'loss': 0.030355851008543665, 'time_step': 0.005364371689272599, 'init_value': -2.8382420539855957, 'ave_value': -2.778388648289358, 'soft_opc': nan} step=1065




2022-04-20 16:15.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161511/model_1065.pt


Epoch 4/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:15.19 [info     ] FQE_20220420161511: epoch=4 step=1420 epoch=4 metrics={'time_sample_batch': 0.00016918249533210002, 'time_algorithm_update': 0.004609126776037082, 'loss': 0.03599152323120917, 'time_step': 0.004852693853243975, 'init_value': -3.7769367694854736, 'ave_value': -3.6872354448436155, 'soft_opc': nan} step=1420




2022-04-20 16:15.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161511/model_1420.pt


Epoch 5/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:15.21 [info     ] FQE_20220420161511: epoch=5 step=1775 epoch=5 metrics={'time_sample_batch': 0.0001705942019610338, 'time_algorithm_update': 0.004996805459680692, 'loss': 0.042522930109899645, 'time_step': 0.005243948143972477, 'init_value': -4.564408302307129, 'ave_value': -4.441330569781641, 'soft_opc': nan} step=1775




2022-04-20 16:15.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161511/model_1775.pt


Epoch 6/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:15.23 [info     ] FQE_20220420161511: epoch=6 step=2130 epoch=6 metrics={'time_sample_batch': 0.00017083732175155424, 'time_algorithm_update': 0.005055692833913884, 'loss': 0.05380834915058714, 'time_step': 0.005305741538464184, 'init_value': -5.589487552642822, 'ave_value': -5.4063061267066805, 'soft_opc': nan} step=2130




2022-04-20 16:15.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161511/model_2130.pt


Epoch 7/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:15.25 [info     ] FQE_20220420161511: epoch=7 step=2485 epoch=7 metrics={'time_sample_batch': 0.00017282391937685685, 'time_algorithm_update': 0.005016086470912879, 'loss': 0.06381622472069633, 'time_step': 0.005267123773064412, 'init_value': -6.3024091720581055, 'ave_value': -6.070889724447527, 'soft_opc': nan} step=2485




2022-04-20 16:15.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161511/model_2485.pt


Epoch 8/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:15.27 [info     ] FQE_20220420161511: epoch=8 step=2840 epoch=8 metrics={'time_sample_batch': 0.0001688950498339156, 'time_algorithm_update': 0.0050082710427297675, 'loss': 0.07774379940205058, 'time_step': 0.005252637325877874, 'init_value': -7.238692283630371, 'ave_value': -6.928575469126525, 'soft_opc': nan} step=2840




2022-04-20 16:15.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161511/model_2840.pt


Epoch 9/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:15.29 [info     ] FQE_20220420161511: epoch=9 step=3195 epoch=9 metrics={'time_sample_batch': 0.00016947195563517825, 'time_algorithm_update': 0.004614827330683319, 'loss': 0.09306681117856166, 'time_step': 0.004860661063395755, 'init_value': -7.970500946044922, 'ave_value': -7.643561558342003, 'soft_opc': nan} step=3195




2022-04-20 16:15.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161511/model_3195.pt


Epoch 10/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:15.31 [info     ] FQE_20220420161511: epoch=10 step=3550 epoch=10 metrics={'time_sample_batch': 0.00016937188699211873, 'time_algorithm_update': 0.00499855229552363, 'loss': 0.10863526277244091, 'time_step': 0.005244282601584851, 'init_value': -8.603606224060059, 'ave_value': -8.25719245963585, 'soft_opc': nan} step=3550




2022-04-20 16:15.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161511/model_3550.pt


Epoch 11/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:15.33 [info     ] FQE_20220420161511: epoch=11 step=3905 epoch=11 metrics={'time_sample_batch': 0.00016962239440058318, 'time_algorithm_update': 0.005020097947456468, 'loss': 0.12284162897144404, 'time_step': 0.005266041822836433, 'init_value': -9.437299728393555, 'ave_value': -9.074828712884313, 'soft_opc': nan} step=3905




2022-04-20 16:15.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161511/model_3905.pt


Epoch 12/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:15.35 [info     ] FQE_20220420161511: epoch=12 step=4260 epoch=12 metrics={'time_sample_batch': 0.00017274265557947293, 'time_algorithm_update': 0.005087490484748088, 'loss': 0.14083059984193722, 'time_step': 0.005334136855434364, 'init_value': -9.928199768066406, 'ave_value': -9.500890733946013, 'soft_opc': nan} step=4260




2022-04-20 16:15.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161511/model_4260.pt


Epoch 13/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:15.37 [info     ] FQE_20220420161511: epoch=13 step=4615 epoch=13 metrics={'time_sample_batch': 0.0001677385518248652, 'time_algorithm_update': 0.004625814061769298, 'loss': 0.15854328874970824, 'time_step': 0.004865847842794069, 'init_value': -10.622491836547852, 'ave_value': -10.195512035355797, 'soft_opc': nan} step=4615




2022-04-20 16:15.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161511/model_4615.pt


Epoch 14/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:15.39 [info     ] FQE_20220420161511: epoch=14 step=4970 epoch=14 metrics={'time_sample_batch': 0.00016725029743893045, 'time_algorithm_update': 0.005060408820568676, 'loss': 0.17481389893612392, 'time_step': 0.005301978554524167, 'init_value': -10.972823143005371, 'ave_value': -10.547893087406724, 'soft_opc': nan} step=4970




2022-04-20 16:15.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161511/model_4970.pt


Epoch 15/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:15.41 [info     ] FQE_20220420161511: epoch=15 step=5325 epoch=15 metrics={'time_sample_batch': 0.0001682133741781745, 'time_algorithm_update': 0.005003778699418189, 'loss': 0.19173883349228074, 'time_step': 0.005250666846691723, 'init_value': -11.691787719726562, 'ave_value': -11.306276986270452, 'soft_opc': nan} step=5325




2022-04-20 16:15.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161511/model_5325.pt


Epoch 16/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:15.43 [info     ] FQE_20220420161511: epoch=16 step=5680 epoch=16 metrics={'time_sample_batch': 0.00017083530694666043, 'time_algorithm_update': 0.005105695590166978, 'loss': 0.2147084652433093, 'time_step': 0.0053520404117208134, 'init_value': -11.966479301452637, 'ave_value': -11.6625749669528, 'soft_opc': nan} step=5680




2022-04-20 16:15.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161511/model_5680.pt


Epoch 17/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:15.45 [info     ] FQE_20220420161511: epoch=17 step=6035 epoch=17 metrics={'time_sample_batch': 0.00017083329214176662, 'time_algorithm_update': 0.004987691825544331, 'loss': 0.23681412321371093, 'time_step': 0.0052321978018317426, 'init_value': -12.508039474487305, 'ave_value': -12.170428680656347, 'soft_opc': nan} step=6035




2022-04-20 16:15.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161511/model_6035.pt


Epoch 18/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:15.47 [info     ] FQE_20220420161511: epoch=18 step=6390 epoch=18 metrics={'time_sample_batch': 0.00016759751548229808, 'time_algorithm_update': 0.0048576274388273, 'loss': 0.26036500145327035, 'time_step': 0.005101700903664173, 'init_value': -12.703093528747559, 'ave_value': -12.506397049109113, 'soft_opc': nan} step=6390




2022-04-20 16:15.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161511/model_6390.pt


Epoch 19/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:15.49 [info     ] FQE_20220420161511: epoch=19 step=6745 epoch=19 metrics={'time_sample_batch': 0.0001729495088819047, 'time_algorithm_update': 0.005114295449055417, 'loss': 0.28613047970222755, 'time_step': 0.005363923059382909, 'init_value': -13.220694541931152, 'ave_value': -13.034241691735145, 'soft_opc': nan} step=6745




2022-04-20 16:15.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161511/model_6745.pt


Epoch 20/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:15.51 [info     ] FQE_20220420161511: epoch=20 step=7100 epoch=20 metrics={'time_sample_batch': 0.00017257609837491747, 'time_algorithm_update': 0.005057664656303298, 'loss': 0.3106340095505748, 'time_step': 0.0053037932221318635, 'init_value': -13.79710865020752, 'ave_value': -13.594271604008638, 'soft_opc': nan} step=7100




2022-04-20 16:15.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161511/model_7100.pt


Epoch 21/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:15.53 [info     ] FQE_20220420161511: epoch=21 step=7455 epoch=21 metrics={'time_sample_batch': 0.0001681079327220648, 'time_algorithm_update': 0.0050524866077261915, 'loss': 0.3387043807498166, 'time_step': 0.005297149067193689, 'init_value': -14.731602668762207, 'ave_value': -14.558936628106709, 'soft_opc': nan} step=7455




2022-04-20 16:15.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161511/model_7455.pt


Epoch 22/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:15.55 [info     ] FQE_20220420161511: epoch=22 step=7810 epoch=22 metrics={'time_sample_batch': 0.00016808375506333903, 'time_algorithm_update': 0.004679261462789186, 'loss': 0.3593884593794044, 'time_step': 0.004919455085002201, 'init_value': -15.159859657287598, 'ave_value': -14.9442305329909, 'soft_opc': nan} step=7810




2022-04-20 16:15.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161511/model_7810.pt


Epoch 23/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:15.57 [info     ] FQE_20220420161511: epoch=23 step=8165 epoch=23 metrics={'time_sample_batch': 0.00017255662192761057, 'time_algorithm_update': 0.005021455254353268, 'loss': 0.3914945162727799, 'time_step': 0.005269822268418863, 'init_value': -15.449395179748535, 'ave_value': -15.244007907768033, 'soft_opc': nan} step=8165




2022-04-20 16:15.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161511/model_8165.pt


Epoch 24/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:15.59 [info     ] FQE_20220420161511: epoch=24 step=8520 epoch=24 metrics={'time_sample_batch': 0.00016986752899599746, 'time_algorithm_update': 0.00513159389227209, 'loss': 0.410691988195332, 'time_step': 0.005377069661315058, 'init_value': -16.19412612915039, 'ave_value': -15.91380380933656, 'soft_opc': nan} step=8520




2022-04-20 16:15.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161511/model_8520.pt


Epoch 25/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:16.01 [info     ] FQE_20220420161511: epoch=25 step=8875 epoch=25 metrics={'time_sample_batch': 0.00016945247918787138, 'time_algorithm_update': 0.005026343171025666, 'loss': 0.43535314143543513, 'time_step': 0.005270694007336254, 'init_value': -16.615190505981445, 'ave_value': -16.199995093542405, 'soft_opc': nan} step=8875




2022-04-20 16:16.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161511/model_8875.pt


Epoch 26/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:16.03 [info     ] FQE_20220420161511: epoch=26 step=9230 epoch=26 metrics={'time_sample_batch': 0.00016792660028162136, 'time_algorithm_update': 0.004666445288859623, 'loss': 0.45937312161418753, 'time_step': 0.004910718890982615, 'init_value': -17.415302276611328, 'ave_value': -17.07468071193884, 'soft_opc': nan} step=9230




2022-04-20 16:16.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161511/model_9230.pt


Epoch 27/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:16.05 [info     ] FQE_20220420161511: epoch=27 step=9585 epoch=27 metrics={'time_sample_batch': 0.00017288772153182768, 'time_algorithm_update': 0.005011149527321399, 'loss': 0.46899095815672004, 'time_step': 0.005259846297787948, 'init_value': -17.361377716064453, 'ave_value': -16.974686482522344, 'soft_opc': nan} step=9585




2022-04-20 16:16.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161511/model_9585.pt


Epoch 28/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:16.07 [info     ] FQE_20220420161511: epoch=28 step=9940 epoch=28 metrics={'time_sample_batch': 0.0001695834415059694, 'time_algorithm_update': 0.005039692596650459, 'loss': 0.4881036209545925, 'time_step': 0.005286257703539352, 'init_value': -18.027311325073242, 'ave_value': -17.451022917790244, 'soft_opc': nan} step=9940




2022-04-20 16:16.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161511/model_9940.pt


Epoch 29/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:16.09 [info     ] FQE_20220420161511: epoch=29 step=10295 epoch=29 metrics={'time_sample_batch': 0.00017100992337079117, 'time_algorithm_update': 0.005116481512365207, 'loss': 0.5038786866505381, 'time_step': 0.00536536498808525, 'init_value': -18.389894485473633, 'ave_value': -17.76220522442739, 'soft_opc': nan} step=10295




2022-04-20 16:16.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161511/model_10295.pt


Epoch 30/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:16.11 [info     ] FQE_20220420161511: epoch=30 step=10650 epoch=30 metrics={'time_sample_batch': 0.00017066874974210497, 'time_algorithm_update': 0.005109303434130172, 'loss': 0.5271217049637311, 'time_step': 0.005359007607043629, 'init_value': -19.01188850402832, 'ave_value': -18.16302743818769, 'soft_opc': nan} step=10650




2022-04-20 16:16.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161511/model_10650.pt


Epoch 31/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:16.13 [info     ] FQE_20220420161511: epoch=31 step=11005 epoch=31 metrics={'time_sample_batch': 0.00017233365018602827, 'time_algorithm_update': 0.004751207459140832, 'loss': 0.5357048400008763, 'time_step': 0.0049982782820580714, 'init_value': -19.280675888061523, 'ave_value': -18.646871679036984, 'soft_opc': nan} step=11005




2022-04-20 16:16.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161511/model_11005.pt


Epoch 32/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:16.15 [info     ] FQE_20220420161511: epoch=32 step=11360 epoch=32 metrics={'time_sample_batch': 0.00017216776458310408, 'time_algorithm_update': 0.005078458114409111, 'loss': 0.5376076153361462, 'time_step': 0.005328072964305609, 'init_value': -18.92042350769043, 'ave_value': -18.28835209564928, 'soft_opc': nan} step=11360




2022-04-20 16:16.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161511/model_11360.pt


Epoch 33/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:16.17 [info     ] FQE_20220420161511: epoch=33 step=11715 epoch=33 metrics={'time_sample_batch': 0.00017139945231692892, 'time_algorithm_update': 0.005042188939913897, 'loss': 0.5384718345924162, 'time_step': 0.005289678842249051, 'init_value': -19.42481803894043, 'ave_value': -18.85836640142355, 'soft_opc': nan} step=11715




2022-04-20 16:16.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161511/model_11715.pt


Epoch 34/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:16.19 [info     ] FQE_20220420161511: epoch=34 step=12070 epoch=34 metrics={'time_sample_batch': 0.00017111737963179468, 'time_algorithm_update': 0.005062220130168217, 'loss': 0.5558457480192605, 'time_step': 0.005310192914076255, 'init_value': -19.75699806213379, 'ave_value': -19.400540511453695, 'soft_opc': nan} step=12070




2022-04-20 16:16.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161511/model_12070.pt


Epoch 35/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:16.21 [info     ] FQE_20220420161511: epoch=35 step=12425 epoch=35 metrics={'time_sample_batch': 0.00017013146843708737, 'time_algorithm_update': 0.004651117324829102, 'loss': 0.5728334799933601, 'time_step': 0.004893821393939811, 'init_value': -20.149002075195312, 'ave_value': -19.701486839530713, 'soft_opc': nan} step=12425




2022-04-20 16:16.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161511/model_12425.pt


Epoch 36/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:16.23 [info     ] FQE_20220420161511: epoch=36 step=12780 epoch=36 metrics={'time_sample_batch': 0.0001711690929574026, 'time_algorithm_update': 0.005110858191906566, 'loss': 0.5975413161631621, 'time_step': 0.005359264830468406, 'init_value': -20.56929588317871, 'ave_value': -20.240588989148115, 'soft_opc': nan} step=12780




2022-04-20 16:16.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161511/model_12780.pt


Epoch 37/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:16.25 [info     ] FQE_20220420161511: epoch=37 step=13135 epoch=37 metrics={'time_sample_batch': 0.00017010729077836158, 'time_algorithm_update': 0.005078318421269806, 'loss': 0.5859559422122761, 'time_step': 0.00532554438416387, 'init_value': -20.956331253051758, 'ave_value': -20.739850990606552, 'soft_opc': nan} step=13135




2022-04-20 16:16.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161511/model_13135.pt


Epoch 38/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:16.27 [info     ] FQE_20220420161511: epoch=38 step=13490 epoch=38 metrics={'time_sample_batch': 0.00017027384798291704, 'time_algorithm_update': 0.005102573985784826, 'loss': 0.5885988623743326, 'time_step': 0.005346662225857587, 'init_value': -21.162996292114258, 'ave_value': -21.10391731020327, 'soft_opc': nan} step=13490




2022-04-20 16:16.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161511/model_13490.pt


Epoch 39/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:16.29 [info     ] FQE_20220420161511: epoch=39 step=13845 epoch=39 metrics={'time_sample_batch': 0.00016916973490110586, 'time_algorithm_update': 0.005059001815151161, 'loss': 0.5869537171501089, 'time_step': 0.005304902036425093, 'init_value': -21.423974990844727, 'ave_value': -21.486598861202157, 'soft_opc': nan} step=13845




2022-04-20 16:16.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161511/model_13845.pt


Epoch 40/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:16.31 [info     ] FQE_20220420161511: epoch=40 step=14200 epoch=40 metrics={'time_sample_batch': 0.0001707540431492765, 'time_algorithm_update': 0.004639378399916098, 'loss': 0.5673501993358975, 'time_step': 0.00488810807886258, 'init_value': -21.136404037475586, 'ave_value': -21.527655955378155, 'soft_opc': nan} step=14200




2022-04-20 16:16.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161511/model_14200.pt


Epoch 41/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:16.33 [info     ] FQE_20220420161511: epoch=41 step=14555 epoch=41 metrics={'time_sample_batch': 0.00017699658031194982, 'time_algorithm_update': 0.005045510681582169, 'loss': 0.5854840867903451, 'time_step': 0.005298163857258542, 'init_value': -20.92441177368164, 'ave_value': -21.389408287454632, 'soft_opc': nan} step=14555




2022-04-20 16:16.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161511/model_14555.pt


Epoch 42/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:16.35 [info     ] FQE_20220420161511: epoch=42 step=14910 epoch=42 metrics={'time_sample_batch': 0.00016874931227992958, 'time_algorithm_update': 0.005065649999699122, 'loss': 0.585734982358318, 'time_step': 0.005310605949079486, 'init_value': -20.8950138092041, 'ave_value': -21.65156985280827, 'soft_opc': nan} step=14910




2022-04-20 16:16.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161511/model_14910.pt


Epoch 43/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:16.37 [info     ] FQE_20220420161511: epoch=43 step=15265 epoch=43 metrics={'time_sample_batch': 0.00017008982913594852, 'time_algorithm_update': 0.005023376706620337, 'loss': 0.5756635150491771, 'time_step': 0.005270113743526835, 'init_value': -20.901321411132812, 'ave_value': -21.65137894028172, 'soft_opc': nan} step=15265




2022-04-20 16:16.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161511/model_15265.pt


Epoch 44/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:16.39 [info     ] FQE_20220420161511: epoch=44 step=15620 epoch=44 metrics={'time_sample_batch': 0.00016700046163209726, 'time_algorithm_update': 0.004635196336558167, 'loss': 0.5792384664619893, 'time_step': 0.00487571635716398, 'init_value': -20.986862182617188, 'ave_value': -21.960331887566753, 'soft_opc': nan} step=15620




2022-04-20 16:16.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161511/model_15620.pt


Epoch 45/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:16.41 [info     ] FQE_20220420161511: epoch=45 step=15975 epoch=45 metrics={'time_sample_batch': 0.00017172316430320202, 'time_algorithm_update': 0.005087674503595057, 'loss': 0.5796094360118601, 'time_step': 0.005336538502867793, 'init_value': -20.78382682800293, 'ave_value': -21.832296969447203, 'soft_opc': nan} step=15975




2022-04-20 16:16.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161511/model_15975.pt


Epoch 46/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:16.43 [info     ] FQE_20220420161511: epoch=46 step=16330 epoch=46 metrics={'time_sample_batch': 0.00017285279824700154, 'time_algorithm_update': 0.005127064610870791, 'loss': 0.5651489983666951, 'time_step': 0.005376101883364395, 'init_value': -20.601261138916016, 'ave_value': -22.08298089634651, 'soft_opc': nan} step=16330




2022-04-20 16:16.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161511/model_16330.pt


Epoch 47/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:16.45 [info     ] FQE_20220420161511: epoch=47 step=16685 epoch=47 metrics={'time_sample_batch': 0.00017433367984395632, 'time_algorithm_update': 0.005132239973041373, 'loss': 0.550991735355535, 'time_step': 0.005383122806817713, 'init_value': -20.648998260498047, 'ave_value': -22.543279442854676, 'soft_opc': nan} step=16685




2022-04-20 16:16.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161511/model_16685.pt


Epoch 48/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:16.47 [info     ] FQE_20220420161511: epoch=48 step=17040 epoch=48 metrics={'time_sample_batch': 0.0001538525164966852, 'time_algorithm_update': 0.004739687476359623, 'loss': 0.5455083538187852, 'time_step': 0.004961765316170706, 'init_value': -20.665130615234375, 'ave_value': -22.940728399007938, 'soft_opc': nan} step=17040




2022-04-20 16:16.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161511/model_17040.pt


Epoch 49/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:16.49 [info     ] FQE_20220420161511: epoch=49 step=17395 epoch=49 metrics={'time_sample_batch': 0.00015142467659963688, 'time_algorithm_update': 0.004793596267700195, 'loss': 0.5316130028749017, 'time_step': 0.005012070293157873, 'init_value': -20.273067474365234, 'ave_value': -22.793622430902037, 'soft_opc': nan} step=17395




2022-04-20 16:16.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161511/model_17395.pt


Epoch 50/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:16.51 [info     ] FQE_20220420161511: epoch=50 step=17750 epoch=50 metrics={'time_sample_batch': 0.0001547732823331591, 'time_algorithm_update': 0.004966043418561909, 'loss': 0.5139411152510995, 'time_step': 0.005193326842616981, 'init_value': -20.206697463989258, 'ave_value': -23.067737447064815, 'soft_opc': nan} step=17750




2022-04-20 16:16.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161511/model_17750.pt
search iteration:  10
using hyper params:  [5.612365880321054e-05, 0.008246078513163432, 4.711500477678406e-05, 5]
2022-04-20 16:16.51 [debug    ] RoundIterator is selected.
2022-04-20 16:16.51 [info     ] Directory is created at d3rlpy_logs/TD3PlusBC_20220420161651
2022-04-20 16:16.51 [debug    ] Fitting scaler...              scaler=standard
2022-04-20 16:16.51 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 16:16.51 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 16:16.51 [info     ] Parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161651/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 5.6123658803210

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:16.55 [info     ] TD3PlusBC_20220420161651: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.0003787690435933788, 'time_algorithm_update': 0.008994039039165652, 'critic_loss': 5.5604117887410505, 'actor_loss': 2.7707699362994633, 'time_step': 0.009451605423152099, 'td_error': 0.8769406127715598, 'init_value': -7.757528781890869, 'ave_value': -4.783159706427648} step=342
2022-04-20 16:16.55 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161651/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:16.58 [info     ] TD3PlusBC_20220420161651: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.000377674548946626, 'time_algorithm_update': 0.008449175204449927, 'critic_loss': 3.4342454291923703, 'actor_loss': 2.6413151562562462, 'time_step': 0.008904012323122972, 'td_error': 1.0141379585095667, 'init_value': -11.271905899047852, 'ave_value': -6.9878897689871895} step=684
2022-04-20 16:16.58 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161651/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:17.02 [info     ] TD3PlusBC_20220420161651: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.00037298913587603655, 'time_algorithm_update': 0.009029176500108507, 'critic_loss': 5.375135222025085, 'actor_loss': 2.6002393800612777, 'time_step': 0.009480995044373629, 'td_error': 1.2291718444129087, 'init_value': -15.019732475280762, 'ave_value': -9.275177893595904} step=1026
2022-04-20 16:17.02 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161651/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:17.06 [info     ] TD3PlusBC_20220420161651: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.0003755517870362042, 'time_algorithm_update': 0.008544122963620905, 'critic_loss': 7.524085249120032, 'actor_loss': 2.578630498975341, 'time_step': 0.009001133037589447, 'td_error': 1.4696535898926517, 'init_value': -18.499624252319336, 'ave_value': -11.472548599738905} step=1368
2022-04-20 16:17.06 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161651/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:17.09 [info     ] TD3PlusBC_20220420161651: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.00038091062802320336, 'time_algorithm_update': 0.008913091748778582, 'critic_loss': 10.04703598621993, 'actor_loss': 2.5669916763640286, 'time_step': 0.009369982613457574, 'td_error': 1.8212583231386168, 'init_value': -22.37273597717285, 'ave_value': -13.847289520512495} step=1710
2022-04-20 16:17.09 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161651/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:17.13 [info     ] TD3PlusBC_20220420161651: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.00037430252945213987, 'time_algorithm_update': 0.008890801005893283, 'critic_loss': 12.832470700057627, 'actor_loss': 2.5581762525770397, 'time_step': 0.009336748318365442, 'td_error': 2.066910338109524, 'init_value': -26.17471694946289, 'ave_value': -16.261375970403236} step=2052
2022-04-20 16:17.13 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161651/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:17.17 [info     ] TD3PlusBC_20220420161651: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.00038226166663811223, 'time_algorithm_update': 0.008649870666146975, 'critic_loss': 15.884263886345757, 'actor_loss': 2.5524492765727795, 'time_step': 0.009105024979128474, 'td_error': 2.47157043750685, 'init_value': -29.731891632080078, 'ave_value': -18.24474403651767} step=2394
2022-04-20 16:17.17 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161651/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:17.21 [info     ] TD3PlusBC_20220420161651: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.00037556503251282095, 'time_algorithm_update': 0.008883797634414763, 'critic_loss': 19.143983127080904, 'actor_loss': 2.548889353958487, 'time_step': 0.00933360216910379, 'td_error': 2.652096176634654, 'init_value': -32.85274124145508, 'ave_value': -20.203628983459108} step=2736
2022-04-20 16:17.21 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161651/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:17.24 [info     ] TD3PlusBC_20220420161651: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.000378543870490894, 'time_algorithm_update': 0.008529650537591232, 'critic_loss': 22.553761811284293, 'actor_loss': 2.5442669182492974, 'time_step': 0.008978483969705147, 'td_error': 2.968761096562115, 'init_value': -35.59661102294922, 'ave_value': -22.098374666130205} step=3078
2022-04-20 16:17.24 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161651/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:17.28 [info     ] TD3PlusBC_20220420161651: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.00037875300959536905, 'time_algorithm_update': 0.00896338273210135, 'critic_loss': 26.34394517140082, 'actor_loss': 2.540527004944651, 'time_step': 0.009417271753500777, 'td_error': 3.4844492764420907, 'init_value': -38.9305419921875, 'ave_value': -24.364926909146966} step=3420
2022-04-20 16:17.28 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161651/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:17.32 [info     ] TD3PlusBC_20220420161651: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.00038061783327693826, 'time_algorithm_update': 0.008998559929474055, 'critic_loss': 30.45968222478677, 'actor_loss': 2.537459044428597, 'time_step': 0.009452324164541144, 'td_error': 3.9175925307034922, 'init_value': -42.85271453857422, 'ave_value': -26.408618361529065} step=3762
2022-04-20 16:17.32 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161651/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:17.35 [info     ] TD3PlusBC_20220420161651: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.0003807712019535533, 'time_algorithm_update': 0.00854837615587558, 'critic_loss': 34.24834766164858, 'actor_loss': 2.537964624271058, 'time_step': 0.009001157437151635, 'td_error': 4.19445854340657, 'init_value': -44.37979507446289, 'ave_value': -27.933534854824092} step=4104
2022-04-20 16:17.35 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161651/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:17.39 [info     ] TD3PlusBC_20220420161651: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.00038116229207892164, 'time_algorithm_update': 0.008870578648751242, 'critic_loss': 38.49802397008528, 'actor_loss': 2.5351613000122426, 'time_step': 0.009329648045768515, 'td_error': 4.6416973506516355, 'init_value': -47.40106201171875, 'ave_value': -29.775158585885723} step=4446
2022-04-20 16:17.39 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161651/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:17.43 [info     ] TD3PlusBC_20220420161651: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.00038285074178238363, 'time_algorithm_update': 0.008652296679758887, 'critic_loss': 42.91254803730033, 'actor_loss': 2.5338208800867985, 'time_step': 0.009108236658642863, 'td_error': 5.104118037793222, 'init_value': -49.89778518676758, 'ave_value': -31.373188326797273} step=4788
2022-04-20 16:17.43 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161651/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:17.46 [info     ] TD3PlusBC_20220420161651: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.0003776090186938905, 'time_algorithm_update': 0.009009479779249046, 'critic_loss': 47.24693991566262, 'actor_loss': 2.532148015429402, 'time_step': 0.009462659819084302, 'td_error': 5.4664430855366195, 'init_value': -52.13726806640625, 'ave_value': -32.907171111104105} step=5130
2022-04-20 16:17.46 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161651/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:17.50 [info     ] TD3PlusBC_20220420161651: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.0003809322390639991, 'time_algorithm_update': 0.008876518199318334, 'critic_loss': 51.627397498192146, 'actor_loss': 2.5332889417458695, 'time_step': 0.009333800851253042, 'td_error': 5.969323011302226, 'init_value': -55.4093017578125, 'ave_value': -34.75195737671577} step=5472
2022-04-20 16:17.50 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161651/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:17.54 [info     ] TD3PlusBC_20220420161651: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.0003782259790520919, 'time_algorithm_update': 0.00866366548147815, 'critic_loss': 56.05909724542272, 'actor_loss': 2.532443290565446, 'time_step': 0.00912194293841981, 'td_error': 6.25875934561408, 'init_value': -56.67314529418945, 'ave_value': -36.21019667715237} step=5814
2022-04-20 16:17.54 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161651/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:17.57 [info     ] TD3PlusBC_20220420161651: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.00037822249340035063, 'time_algorithm_update': 0.008916647810685007, 'critic_loss': 60.911324573539154, 'actor_loss': 2.528788784094024, 'time_step': 0.009373356724343104, 'td_error': 6.644032195252188, 'init_value': -58.342811584472656, 'ave_value': -37.667004240707634} step=6156
2022-04-20 16:17.57 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161651/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:18.01 [info     ] TD3PlusBC_20220420161651: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.00038063107875355504, 'time_algorithm_update': 0.00883333655128702, 'critic_loss': 64.80948626088818, 'actor_loss': 2.529293930321409, 'time_step': 0.009293766746744078, 'td_error': 6.816776606729199, 'init_value': -59.84667205810547, 'ave_value': -38.928106309533} step=6498
2022-04-20 16:18.01 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161651/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:18.05 [info     ] TD3PlusBC_20220420161651: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.00038131356936449195, 'time_algorithm_update': 0.009003301112972505, 'critic_loss': 69.33728311772933, 'actor_loss': 2.5290672346862437, 'time_step': 0.00946422627097682, 'td_error': 7.509041689813933, 'init_value': -62.91961669921875, 'ave_value': -39.943606143070404} step=6840
2022-04-20 16:18.05 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161651/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:18.08 [info     ] TD3PlusBC_20220420161651: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.00038830090684500356, 'time_algorithm_update': 0.00888337238490233, 'critic_loss': 73.10754443609227, 'actor_loss': 2.5299844909132574, 'time_step': 0.009347990939491674, 'td_error': 7.320054021773266, 'init_value': -62.7027702331543, 'ave_value': -41.02235841045869} step=7182
2022-04-20 16:18.08 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161651/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:18.12 [info     ] TD3PlusBC_20220420161651: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.00037693349938643604, 'time_algorithm_update': 0.008490160194753904, 'critic_loss': 77.09581388506973, 'actor_loss': 2.5270990586420248, 'time_step': 0.008946308615612008, 'td_error': 8.083620620995525, 'init_value': -65.21180725097656, 'ave_value': -42.363110217231046} step=7524
2022-04-20 16:18.12 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161651/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:18.16 [info     ] TD3PlusBC_20220420161651: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.00038169699105602956, 'time_algorithm_update': 0.008926243809928671, 'critic_loss': 81.36307949490018, 'actor_loss': 2.528068871525993, 'time_step': 0.00938791629166631, 'td_error': 8.517497552430827, 'init_value': -68.60077667236328, 'ave_value': -44.14909708261759} step=7866
2022-04-20 16:18.16 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161651/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:18.19 [info     ] TD3PlusBC_20220420161651: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.0003435772064833613, 'time_algorithm_update': 0.008502007925022415, 'critic_loss': 85.16638047513906, 'actor_loss': 2.5273905065324573, 'time_step': 0.008915386701885023, 'td_error': 8.717817994568048, 'init_value': -69.10618591308594, 'ave_value': -44.80566349758221} step=8208
2022-04-20 16:18.19 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161651/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:18.22 [info     ] TD3PlusBC_20220420161651: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.00033254023880986445, 'time_algorithm_update': 0.0061065802100109074, 'critic_loss': 89.22708033957677, 'actor_loss': 2.5288404958289967, 'time_step': 0.0065014236851742395, 'td_error': 9.01538297301015, 'init_value': -70.52449798583984, 'ave_value': -45.99200127211693} step=8550
2022-04-20 16:18.22 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161651/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:18.25 [info     ] TD3PlusBC_20220420161651: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.0003724397971616154, 'time_algorithm_update': 0.0067981746461656355, 'critic_loss': 92.34974817644085, 'actor_loss': 2.528097360454805, 'time_step': 0.007246700643795972, 'td_error': 9.587410447047299, 'init_value': -72.80475616455078, 'ave_value': -47.226185398447235} step=8892
2022-04-20 16:18.25 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161651/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:18.28 [info     ] TD3PlusBC_20220420161651: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.00037319478932877035, 'time_algorithm_update': 0.0068867255372610705, 'critic_loss': 96.17451308624089, 'actor_loss': 2.5279050988760607, 'time_step': 0.007337819065964013, 'td_error': 9.646936897995044, 'init_value': -71.36363220214844, 'ave_value': -47.73700909655382} step=9234
2022-04-20 16:18.28 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161651/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:18.30 [info     ] TD3PlusBC_20220420161651: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.0003753558934083459, 'time_algorithm_update': 0.006871377515513995, 'critic_loss': 99.2496037957264, 'actor_loss': 2.527608485249748, 'time_step': 0.00732429055442587, 'td_error': 10.177341047233327, 'init_value': -73.40925598144531, 'ave_value': -48.76343638358604} step=9576
2022-04-20 16:18.30 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161651/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:18.33 [info     ] TD3PlusBC_20220420161651: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.00037668183533071776, 'time_algorithm_update': 0.006854053129229629, 'critic_loss': 102.66487793057982, 'actor_loss': 2.527856421052364, 'time_step': 0.007296850109658046, 'td_error': 10.037224030487204, 'init_value': -74.43546295166016, 'ave_value': -49.48945433241768} step=9918
2022-04-20 16:18.33 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161651/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:18.36 [info     ] TD3PlusBC_20220420161651: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.0003745555877685547, 'time_algorithm_update': 0.006870034145332916, 'critic_loss': 106.14447387338382, 'actor_loss': 2.5270865925571373, 'time_step': 0.007304917302047997, 'td_error': 10.364261153476962, 'init_value': -75.0150375366211, 'ave_value': -50.4807776893922} step=10260
2022-04-20 16:18.36 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161651/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:18.39 [info     ] TD3PlusBC_20220420161651: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.00037479679486904926, 'time_algorithm_update': 0.00687349470038163, 'critic_loss': 109.01552151239406, 'actor_loss': 2.5290331254925644, 'time_step': 0.007313450177510579, 'td_error': 10.998862577009124, 'init_value': -76.63690185546875, 'ave_value': -51.51655588898044} step=10602
2022-04-20 16:18.39 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161651/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:18.42 [info     ] TD3PlusBC_20220420161651: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.0003761799014799776, 'time_algorithm_update': 0.006895664142586334, 'critic_loss': 112.12008832072654, 'actor_loss': 2.528189670272738, 'time_step': 0.0073342023537172904, 'td_error': 11.12040368894432, 'init_value': -78.0699462890625, 'ave_value': -52.30647674900874} step=10944
2022-04-20 16:18.42 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161651/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:18.45 [info     ] TD3PlusBC_20220420161651: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.0003786233433505945, 'time_algorithm_update': 0.006903475488138478, 'critic_loss': 115.37142183627302, 'actor_loss': 2.5280016151785154, 'time_step': 0.007342596500240571, 'td_error': 10.98715707388124, 'init_value': -77.08387756347656, 'ave_value': -52.703642296920876} step=11286
2022-04-20 16:18.45 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161651/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:18.48 [info     ] TD3PlusBC_20220420161651: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.0003776905829446358, 'time_algorithm_update': 0.006824396507084718, 'critic_loss': 117.53178844117282, 'actor_loss': 2.5297704384340878, 'time_step': 0.007265056782995748, 'td_error': 11.70806850195746, 'init_value': -79.29904174804688, 'ave_value': -53.6357839361887} step=11628
2022-04-20 16:18.48 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161651/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:18.51 [info     ] TD3PlusBC_20220420161651: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.00037953658410680224, 'time_algorithm_update': 0.00686458676879169, 'critic_loss': 120.44798883360032, 'actor_loss': 2.5305728215223167, 'time_step': 0.007307948424802189, 'td_error': 11.310168211176823, 'init_value': -78.47065734863281, 'ave_value': -54.175401718234056} step=11970
2022-04-20 16:18.51 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161651/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:18.54 [info     ] TD3PlusBC_20220420161651: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.00037144499215466237, 'time_algorithm_update': 0.006838341902571115, 'critic_loss': 122.93673721670407, 'actor_loss': 2.5297384833731846, 'time_step': 0.00727482079065334, 'td_error': 11.765580753227935, 'init_value': -78.7469711303711, 'ave_value': -54.577212911339664} step=12312
2022-04-20 16:18.54 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161651/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:18.57 [info     ] TD3PlusBC_20220420161651: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.00037017482066015054, 'time_algorithm_update': 0.006830186871757285, 'critic_loss': 125.37146156712582, 'actor_loss': 2.5293271973816274, 'time_step': 0.007261160521479378, 'td_error': 12.301581701122712, 'init_value': -79.42362213134766, 'ave_value': -55.46785043182221} step=12654
2022-04-20 16:18.57 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161651/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:18.59 [info     ] TD3PlusBC_20220420161651: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.00037975757442719753, 'time_algorithm_update': 0.006920516142371105, 'critic_loss': 127.43775752552769, 'actor_loss': 2.5280084874894886, 'time_step': 0.007360553880881148, 'td_error': 12.877215979097981, 'init_value': -80.62974548339844, 'ave_value': -56.08383785048164} step=12996
2022-04-20 16:18.59 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161651/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:19.02 [info     ] TD3PlusBC_20220420161651: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.000373873794287966, 'time_algorithm_update': 0.006648526554219207, 'critic_loss': 129.57104409647266, 'actor_loss': 2.5301564618160852, 'time_step': 0.0070841633088407465, 'td_error': 12.722423695951582, 'init_value': -82.07073211669922, 'ave_value': -56.839900569993716} step=13338
2022-04-20 16:19.02 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161651/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:19.05 [info     ] TD3PlusBC_20220420161651: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.00037469292244716, 'time_algorithm_update': 0.0068918794219256846, 'critic_loss': 131.54587244848062, 'actor_loss': 2.530532067282158, 'time_step': 0.007331049233152156, 'td_error': 12.865990215703738, 'init_value': -80.51512908935547, 'ave_value': -57.0087151219674} step=13680
2022-04-20 16:19.05 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161651/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:19.08 [info     ] TD3PlusBC_20220420161651: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.00038438164002714103, 'time_algorithm_update': 0.0069375323970415435, 'critic_loss': 133.70208004064727, 'actor_loss': 2.530530997884204, 'time_step': 0.00738317018363908, 'td_error': 14.021243329512604, 'init_value': -82.90187072753906, 'ave_value': -57.798434133671456} step=14022
2022-04-20 16:19.08 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161651/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:19.11 [info     ] TD3PlusBC_20220420161651: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.0003778767167476186, 'time_algorithm_update': 0.0069319902107729545, 'critic_loss': 135.7264388681155, 'actor_loss': 2.5307310254950273, 'time_step': 0.007371819507308871, 'td_error': 14.509430413572593, 'init_value': -81.7910385131836, 'ave_value': -58.23470944148966} step=14364
2022-04-20 16:19.11 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161651/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:19.14 [info     ] TD3PlusBC_20220420161651: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.0003783479768630357, 'time_algorithm_update': 0.006886559620238187, 'critic_loss': 137.54919823986745, 'actor_loss': 2.5317289201836837, 'time_step': 0.0073320600721571184, 'td_error': 14.207043337294007, 'init_value': -81.26231384277344, 'ave_value': -58.55853846488806} step=14706
2022-04-20 16:19.14 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161651/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:19.17 [info     ] TD3PlusBC_20220420161651: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.00037780282093070406, 'time_algorithm_update': 0.006919444653025845, 'critic_loss': 138.80961071259793, 'actor_loss': 2.530054434001097, 'time_step': 0.007361885399846306, 'td_error': 14.107529973673309, 'init_value': -83.63587951660156, 'ave_value': -59.52627445287852} step=15048
2022-04-20 16:19.17 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161651/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:19.20 [info     ] TD3PlusBC_20220420161651: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.0003815889358520508, 'time_algorithm_update': 0.006925357015509354, 'critic_loss': 140.5858283461186, 'actor_loss': 2.530977495929651, 'time_step': 0.007371173964606391, 'td_error': 13.450836168415627, 'init_value': -83.234130859375, 'ave_value': -59.70619630607147} step=15390
2022-04-20 16:19.20 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161651/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:19.23 [info     ] TD3PlusBC_20220420161651: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.0003799848389207271, 'time_algorithm_update': 0.0069751885899326255, 'critic_loss': 142.13530142265455, 'actor_loss': 2.531315834201567, 'time_step': 0.00741816194433915, 'td_error': 13.786035399257036, 'init_value': -81.92814636230469, 'ave_value': -59.79847837114304} step=15732
2022-04-20 16:19.23 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161651/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:19.26 [info     ] TD3PlusBC_20220420161651: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.0003781360492371676, 'time_algorithm_update': 0.00698534229345489, 'critic_loss': 144.05137982284813, 'actor_loss': 2.5325178667815806, 'time_step': 0.007429077611332051, 'td_error': 14.022850921708628, 'init_value': -83.8326416015625, 'ave_value': -60.631798658253174} step=16074
2022-04-20 16:19.26 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161651/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:19.29 [info     ] TD3PlusBC_20220420161651: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.00037945223133466395, 'time_algorithm_update': 0.0069101400542677495, 'critic_loss': 144.98139431602075, 'actor_loss': 2.531975999910232, 'time_step': 0.007350474073175798, 'td_error': 13.558377436761443, 'init_value': -81.37635040283203, 'ave_value': -60.45255082442467} step=16416
2022-04-20 16:19.29 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161651/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:19.32 [info     ] TD3PlusBC_20220420161651: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.00037970738104212354, 'time_algorithm_update': 0.006873263253106011, 'critic_loss': 145.62181633397154, 'actor_loss': 2.530372763237758, 'time_step': 0.007314889751679716, 'td_error': 13.88653963612178, 'init_value': -81.78729248046875, 'ave_value': -60.77450598094495} step=16758
2022-04-20 16:19.32 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161651/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:19.35 [info     ] TD3PlusBC_20220420161651: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.0003731752696790193, 'time_algorithm_update': 0.006926157321149146, 'critic_loss': 146.90896958914416, 'actor_loss': 2.532828004736649, 'time_step': 0.007361537531802529, 'td_error': 13.178015147256271, 'init_value': -80.74983215332031, 'ave_value': -60.872632777892086} step=17100
2022-04-20 16:19.35 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420161651/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 102 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.28189108e-01 

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 16:19.35 [info     ] FQE_20220420161935: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00015836307801396013, 'time_algorithm_update': 0.0036807519843779415, 'loss': 0.008600974716358336, 'time_step': 0.003911679049572313, 'init_value': -0.15281717479228973, 'ave_value': -0.11014878026100698, 'soft_opc': nan} step=166




2022-04-20 16:19.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161935/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:19.36 [info     ] FQE_20220420161935: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.0001578259180827313, 'time_algorithm_update': 0.0034471893885049476, 'loss': 0.00604908499047609, 'time_step': 0.0036793487617768437, 'init_value': -0.29248571395874023, 'ave_value': -0.19272984712099372, 'soft_opc': nan} step=332




2022-04-20 16:19.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161935/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:19.37 [info     ] FQE_20220420161935: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.0001572528517389872, 'time_algorithm_update': 0.0035762499613934255, 'loss': 0.00539514301653323, 'time_step': 0.003805523895355592, 'init_value': -0.3240649700164795, 'ave_value': -0.19647769563483192, 'soft_opc': nan} step=498




2022-04-20 16:19.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161935/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:19.38 [info     ] FQE_20220420161935: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00016001764550266495, 'time_algorithm_update': 0.0034753859761249587, 'loss': 0.00526595476969897, 'time_step': 0.0037072322454797216, 'init_value': -0.41352540254592896, 'ave_value': -0.2501382228396078, 'soft_opc': nan} step=664




2022-04-20 16:19.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161935/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:19.38 [info     ] FQE_20220420161935: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00015898210456572384, 'time_algorithm_update': 0.003627113549106092, 'loss': 0.004900865942654929, 'time_step': 0.0038591278604714267, 'init_value': -0.47448796033859253, 'ave_value': -0.2718441818345774, 'soft_opc': nan} step=830




2022-04-20 16:19.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161935/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:19.39 [info     ] FQE_20220420161935: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00015747690775308264, 'time_algorithm_update': 0.003457267600369741, 'loss': 0.0044868583061608925, 'time_step': 0.0036894169198461325, 'init_value': -0.47785982489585876, 'ave_value': -0.26163513087625573, 'soft_opc': nan} step=996




2022-04-20 16:19.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161935/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:19.40 [info     ] FQE_20220420161935: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00015854835510253906, 'time_algorithm_update': 0.003575118191270943, 'loss': 0.0045236032315900164, 'time_step': 0.003806046692721815, 'init_value': -0.550516664981842, 'ave_value': -0.31336184771404274, 'soft_opc': nan} step=1162




2022-04-20 16:19.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161935/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:19.40 [info     ] FQE_20220420161935: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.0001587350684476186, 'time_algorithm_update': 0.003580354782472174, 'loss': 0.004191761125995587, 'time_step': 0.0038114872323461325, 'init_value': -0.6049350500106812, 'ave_value': -0.36036405211906075, 'soft_opc': nan} step=1328




2022-04-20 16:19.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161935/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:19.41 [info     ] FQE_20220420161935: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00015620007572403872, 'time_algorithm_update': 0.003578614039593432, 'loss': 0.004035115863353643, 'time_step': 0.0038105536656207347, 'init_value': -0.5993213653564453, 'ave_value': -0.3423847586914964, 'soft_opc': nan} step=1494




2022-04-20 16:19.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161935/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:19.42 [info     ] FQE_20220420161935: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00015347118837287627, 'time_algorithm_update': 0.003491792334131448, 'loss': 0.004186195035251868, 'time_step': 0.0037169154868068465, 'init_value': -0.6486661434173584, 'ave_value': -0.38354016859042417, 'soft_opc': nan} step=1660




2022-04-20 16:19.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161935/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:19.42 [info     ] FQE_20220420161935: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00015511570206607682, 'time_algorithm_update': 0.003574789288532303, 'loss': 0.004439306611627206, 'time_step': 0.003800570246684982, 'init_value': -0.7509976625442505, 'ave_value': -0.47507458889504545, 'soft_opc': nan} step=1826




2022-04-20 16:19.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161935/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:19.43 [info     ] FQE_20220420161935: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00015432863350374153, 'time_algorithm_update': 0.0034682693251644275, 'loss': 0.0042497126535647155, 'time_step': 0.0036958326776343657, 'init_value': -0.757663369178772, 'ave_value': -0.475371398071985, 'soft_opc': nan} step=1992




2022-04-20 16:19.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161935/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:19.44 [info     ] FQE_20220420161935: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00015873219593461738, 'time_algorithm_update': 0.003461955541587738, 'loss': 0.004494296220630542, 'time_step': 0.0036967834794377707, 'init_value': -0.7682976722717285, 'ave_value': -0.4907175132551709, 'soft_opc': nan} step=2158




2022-04-20 16:19.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161935/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:19.45 [info     ] FQE_20220420161935: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00015333330774881752, 'time_algorithm_update': 0.0034309324011745222, 'loss': 0.004677601433527398, 'time_step': 0.003656181944421975, 'init_value': -0.8092131614685059, 'ave_value': -0.5172058998951101, 'soft_opc': nan} step=2324




2022-04-20 16:19.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161935/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:19.45 [info     ] FQE_20220420161935: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00015455268951783697, 'time_algorithm_update': 0.0036193577640027887, 'loss': 0.0048371563544244425, 'time_step': 0.0038464816219835394, 'init_value': -0.922299325466156, 'ave_value': -0.6135787572678145, 'soft_opc': nan} step=2490




2022-04-20 16:19.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161935/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:19.46 [info     ] FQE_20220420161935: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00016039394470582525, 'time_algorithm_update': 0.003577679036611534, 'loss': 0.005359697019602789, 'time_step': 0.0038100926272840387, 'init_value': -0.9819707870483398, 'ave_value': -0.6575862608145218, 'soft_opc': nan} step=2656




2022-04-20 16:19.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161935/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:19.47 [info     ] FQE_20220420161935: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00015430421714323112, 'time_algorithm_update': 0.0036105290952935278, 'loss': 0.005873283702341548, 'time_step': 0.0038373944271041685, 'init_value': -1.0565497875213623, 'ave_value': -0.7315489432631849, 'soft_opc': nan} step=2822




2022-04-20 16:19.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161935/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:19.47 [info     ] FQE_20220420161935: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00015754584806511202, 'time_algorithm_update': 0.003528939672263272, 'loss': 0.0060882004537529885, 'time_step': 0.003756824746189347, 'init_value': -1.0841761827468872, 'ave_value': -0.7560384417137316, 'soft_opc': nan} step=2988




2022-04-20 16:19.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161935/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:19.48 [info     ] FQE_20220420161935: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00015733471835952206, 'time_algorithm_update': 0.003574214785932058, 'loss': 0.006424801678365904, 'time_step': 0.003803727138473327, 'init_value': -1.1771254539489746, 'ave_value': -0.8207658473234456, 'soft_opc': nan} step=3154




2022-04-20 16:19.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161935/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:19.49 [info     ] FQE_20220420161935: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00016790125743452325, 'time_algorithm_update': 0.003528639494654644, 'loss': 0.007263584527066426, 'time_step': 0.0037722515772624188, 'init_value': -1.2025445699691772, 'ave_value': -0.8358013344039251, 'soft_opc': nan} step=3320




2022-04-20 16:19.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161935/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:19.49 [info     ] FQE_20220420161935: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.0001583343528839479, 'time_algorithm_update': 0.0035590191921555854, 'loss': 0.007621018017546824, 'time_step': 0.0037909071129488656, 'init_value': -1.3139169216156006, 'ave_value': -0.9336663522900225, 'soft_opc': nan} step=3486




2022-04-20 16:19.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161935/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:19.50 [info     ] FQE_20220420161935: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00015374982213399498, 'time_algorithm_update': 0.003467600029635142, 'loss': 0.008539969879987997, 'time_step': 0.0036938161735075065, 'init_value': -1.3782479763031006, 'ave_value': -0.9845305868634232, 'soft_opc': nan} step=3652




2022-04-20 16:19.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161935/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:19.51 [info     ] FQE_20220420161935: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00016264168612928275, 'time_algorithm_update': 0.003736238881766078, 'loss': 0.008857008642713394, 'time_step': 0.003975240581006889, 'init_value': -1.4898046255111694, 'ave_value': -1.0549162560620824, 'soft_opc': nan} step=3818




2022-04-20 16:19.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161935/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:19.52 [info     ] FQE_20220420161935: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00015965714512101138, 'time_algorithm_update': 0.003427140684012907, 'loss': 0.009449399568362117, 'time_step': 0.00365915068660874, 'init_value': -1.5016330480575562, 'ave_value': -1.0532733941467496, 'soft_opc': nan} step=3984




2022-04-20 16:19.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161935/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:19.52 [info     ] FQE_20220420161935: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00015697278172136788, 'time_algorithm_update': 0.0035620741097323865, 'loss': 0.010131096202285444, 'time_step': 0.003791177129170981, 'init_value': -1.5561721324920654, 'ave_value': -1.0847990212777445, 'soft_opc': nan} step=4150




2022-04-20 16:19.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161935/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:19.53 [info     ] FQE_20220420161935: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00015590851565441453, 'time_algorithm_update': 0.003430263105645237, 'loss': 0.010612105486454465, 'time_step': 0.00365678086338273, 'init_value': -1.617397665977478, 'ave_value': -1.1241016665389678, 'soft_opc': nan} step=4316




2022-04-20 16:19.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161935/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:19.54 [info     ] FQE_20220420161935: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.0001597217766635389, 'time_algorithm_update': 0.0035459162241005034, 'loss': 0.010792622442991215, 'time_step': 0.0037783140159515015, 'init_value': -1.6278823614120483, 'ave_value': -1.11378993538653, 'soft_opc': nan} step=4482




2022-04-20 16:19.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161935/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:19.54 [info     ] FQE_20220420161935: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00016132894768772354, 'time_algorithm_update': 0.003541455211409603, 'loss': 0.011268775432555745, 'time_step': 0.0037747276834694736, 'init_value': -1.7809014320373535, 'ave_value': -1.225398541006956, 'soft_opc': nan} step=4648




2022-04-20 16:19.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161935/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:19.55 [info     ] FQE_20220420161935: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00015876379357763083, 'time_algorithm_update': 0.0036337950143469386, 'loss': 0.012223816260762781, 'time_step': 0.0038644175931631802, 'init_value': -1.8354253768920898, 'ave_value': -1.2552501046107158, 'soft_opc': nan} step=4814




2022-04-20 16:19.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161935/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:19.56 [info     ] FQE_20220420161935: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.0001605447516383895, 'time_algorithm_update': 0.0036238503743367023, 'loss': 0.012767358138843114, 'time_step': 0.0038566273379038616, 'init_value': -1.8506011962890625, 'ave_value': -1.2716705187618196, 'soft_opc': nan} step=4980




2022-04-20 16:19.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161935/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:19.56 [info     ] FQE_20220420161935: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.0001562331096235528, 'time_algorithm_update': 0.00362839899867414, 'loss': 0.013089454352732253, 'time_step': 0.003856714949550399, 'init_value': -1.95174241065979, 'ave_value': -1.3080450747464154, 'soft_opc': nan} step=5146




2022-04-20 16:19.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161935/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:19.57 [info     ] FQE_20220420161935: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00016190919531397074, 'time_algorithm_update': 0.003446281674396561, 'loss': 0.013403202947060835, 'time_step': 0.0036760927682899566, 'init_value': -2.0539658069610596, 'ave_value': -1.3943858160628928, 'soft_opc': nan} step=5312




2022-04-20 16:19.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161935/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:19.58 [info     ] FQE_20220420161935: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00015774405146219643, 'time_algorithm_update': 0.0035753695361585504, 'loss': 0.014559918082549226, 'time_step': 0.003803398235734687, 'init_value': -2.1047418117523193, 'ave_value': -1.4047101033230622, 'soft_opc': nan} step=5478




2022-04-20 16:19.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161935/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:19.59 [info     ] FQE_20220420161935: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00015647152820265437, 'time_algorithm_update': 0.0035722571683217243, 'loss': 0.01520823416894818, 'time_step': 0.003806051001491317, 'init_value': -2.0990681648254395, 'ave_value': -1.3969272172162392, 'soft_opc': nan} step=5644




2022-04-20 16:19.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161935/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:19.59 [info     ] FQE_20220420161935: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00015762771468564687, 'time_algorithm_update': 0.0035135932715542346, 'loss': 0.015837682216815335, 'time_step': 0.0037463056035788663, 'init_value': -2.1803581714630127, 'ave_value': -1.448650600096664, 'soft_opc': nan} step=5810




2022-04-20 16:19.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161935/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:20.00 [info     ] FQE_20220420161935: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00016312857708299016, 'time_algorithm_update': 0.003485926662582949, 'loss': 0.01683763903986483, 'time_step': 0.0037191431206392953, 'init_value': -2.2977991104125977, 'ave_value': -1.5320223496438146, 'soft_opc': nan} step=5976




2022-04-20 16:20.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161935/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:20.01 [info     ] FQE_20220420161935: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00015877671988613634, 'time_algorithm_update': 0.003620058657175087, 'loss': 0.017204380769518215, 'time_step': 0.0038527609354042144, 'init_value': -2.3907899856567383, 'ave_value': -1.590672932612198, 'soft_opc': nan} step=6142




2022-04-20 16:20.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161935/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:20.01 [info     ] FQE_20220420161935: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.0001571681126054511, 'time_algorithm_update': 0.0035239170832806325, 'loss': 0.017979151090320916, 'time_step': 0.003748415464378265, 'init_value': -2.468212842941284, 'ave_value': -1.6434022083677151, 'soft_opc': nan} step=6308




2022-04-20 16:20.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161935/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:20.02 [info     ] FQE_20220420161935: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00015475376542792264, 'time_algorithm_update': 0.003606715834284403, 'loss': 0.018441454196034605, 'time_step': 0.0038310921335794838, 'init_value': -2.527885675430298, 'ave_value': -1.6683570244693542, 'soft_opc': nan} step=6474




2022-04-20 16:20.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161935/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:20.03 [info     ] FQE_20220420161935: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.0001589763595397214, 'time_algorithm_update': 0.003605033977922187, 'loss': 0.019588098915978278, 'time_step': 0.003837278090327619, 'init_value': -2.6136717796325684, 'ave_value': -1.7397402716582422, 'soft_opc': nan} step=6640




2022-04-20 16:20.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161935/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:20.03 [info     ] FQE_20220420161935: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00016520253146987363, 'time_algorithm_update': 0.0035815655467021897, 'loss': 0.020083107557063586, 'time_step': 0.003820531339530485, 'init_value': -2.7306008338928223, 'ave_value': -1.8190226502783664, 'soft_opc': nan} step=6806




2022-04-20 16:20.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161935/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:20.04 [info     ] FQE_20220420161935: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00015637529901711336, 'time_algorithm_update': 0.003558835351323507, 'loss': 0.021412585953850836, 'time_step': 0.0037881897156497083, 'init_value': -2.745453357696533, 'ave_value': -1.8112366017189112, 'soft_opc': nan} step=6972




2022-04-20 16:20.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161935/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:20.05 [info     ] FQE_20220420161935: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00015678606837628837, 'time_algorithm_update': 0.003592238368758236, 'loss': 0.022243554052386254, 'time_step': 0.003821671727191971, 'init_value': -2.8509716987609863, 'ave_value': -1.8895845730293979, 'soft_opc': nan} step=7138




2022-04-20 16:20.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161935/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:20.06 [info     ] FQE_20220420161935: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00015863022172307395, 'time_algorithm_update': 0.003522515296936035, 'loss': 0.022647765152302223, 'time_step': 0.003754186342997723, 'init_value': -2.9375152587890625, 'ave_value': -1.9640818819873505, 'soft_opc': nan} step=7304




2022-04-20 16:20.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161935/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:20.06 [info     ] FQE_20220420161935: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00015711066234542663, 'time_algorithm_update': 0.003612018493284662, 'loss': 0.02289425051513027, 'time_step': 0.0038406920720295734, 'init_value': -2.9522886276245117, 'ave_value': -1.96654296153569, 'soft_opc': nan} step=7470




2022-04-20 16:20.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161935/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:20.07 [info     ] FQE_20220420161935: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.0001619206853659756, 'time_algorithm_update': 0.0035662880863051817, 'loss': 0.022923630638418336, 'time_step': 0.0038007196173610457, 'init_value': -2.942196846008301, 'ave_value': -1.9609022657747741, 'soft_opc': nan} step=7636




2022-04-20 16:20.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161935/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:20.08 [info     ] FQE_20220420161935: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.0001584090382219797, 'time_algorithm_update': 0.003578308116958802, 'loss': 0.02212524267771921, 'time_step': 0.0038098082484969175, 'init_value': -3.050569772720337, 'ave_value': -2.0409457680811216, 'soft_opc': nan} step=7802




2022-04-20 16:20.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161935/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:20.08 [info     ] FQE_20220420161935: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00015884853271116693, 'time_algorithm_update': 0.0035367873777826147, 'loss': 0.023800131742820054, 'time_step': 0.0037658803434257046, 'init_value': -3.1558923721313477, 'ave_value': -2.1129961880074966, 'soft_opc': nan} step=7968




2022-04-20 16:20.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161935/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:20.09 [info     ] FQE_20220420161935: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00016146108328577983, 'time_algorithm_update': 0.003592346087995782, 'loss': 0.023583192871995718, 'time_step': 0.003827465585915439, 'init_value': -3.0493202209472656, 'ave_value': -2.0153708583331325, 'soft_opc': nan} step=8134




2022-04-20 16:20.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161935/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:20.10 [info     ] FQE_20220420161935: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00016098999115357916, 'time_algorithm_update': 0.0036641804568738824, 'loss': 0.02428384588158925, 'time_step': 0.0038946464837315573, 'init_value': -3.230201482772827, 'ave_value': -2.126485263448846, 'soft_opc': nan} step=8300




2022-04-20 16:20.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161935/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.43210892e-01 -3.25999953e-02
 -1.38000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.28049971e-01  6.00000000e-01  4.67532035e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.87189108e-01  2.46000047e-02
 -8.00013420e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.61927906e-02  2.08952959e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.53789108e-01  1.32000047e-02
  8.79998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -9.71586383e-02 -6.00000000e-01 -5.33093061e-02]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.39489108e-01 -4.75999953e-02
 -9.30001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  1.41298638e-01  5.43892365e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.9573146

2022-04-20 16:20.10 [debug    ] RoundIterator is selected.
2022-04-20 16:20.10 [info     ] Directory is created at d3rlpy_logs/FQE_20220420162010
2022-04-20 16:20.10 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 16:20.10 [debug    ] Building models...
2022-04-20 16:20.10 [debug    ] Models have been built.
2022-04-20 16:20.10 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420162010/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}},

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 16:20.12 [info     ] FQE_20220420162010: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00016150710194609886, 'time_algorithm_update': 0.0035181198009224825, 'loss': 0.03042231098109813, 'time_step': 0.0037528803182202714, 'init_value': -0.8612945675849915, 'ave_value': -0.8565240229974996, 'soft_opc': nan} step=344




2022-04-20 16:20.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162010/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:20.13 [info     ] FQE_20220420162010: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00015995183656382006, 'time_algorithm_update': 0.0035834922346957895, 'loss': 0.025924679953171763, 'time_step': 0.0038147506325743917, 'init_value': -1.75844144821167, 'ave_value': -1.7393892824515567, 'soft_opc': nan} step=688




2022-04-20 16:20.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162010/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:20.15 [info     ] FQE_20220420162010: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00016302771346513614, 'time_algorithm_update': 0.0036190727422403734, 'loss': 0.029368504898102825, 'time_step': 0.0038551071355509203, 'init_value': -2.8555920124053955, 'ave_value': -2.8327173631201994, 'soft_opc': nan} step=1032




2022-04-20 16:20.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162010/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:20.16 [info     ] FQE_20220420162010: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00016432723333669263, 'time_algorithm_update': 0.003807794216067292, 'loss': 0.03322864109704401, 'time_step': 0.00404641586680745, 'init_value': -3.818870782852173, 'ave_value': -3.740750365274715, 'soft_opc': nan} step=1376




2022-04-20 16:20.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162010/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:20.18 [info     ] FQE_20220420162010: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00017001462537188862, 'time_algorithm_update': 0.005019081886424575, 'loss': 0.04259855628046099, 'time_step': 0.005263295284537382, 'init_value': -5.102663993835449, 'ave_value': -4.921795079461089, 'soft_opc': nan} step=1720




2022-04-20 16:20.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162010/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:20.20 [info     ] FQE_20220420162010: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00016891124636627907, 'time_algorithm_update': 0.005121141672134399, 'loss': 0.053262811668520406, 'time_step': 0.005366043989048447, 'init_value': -6.006372928619385, 'ave_value': -5.7103749528393015, 'soft_opc': nan} step=2064




2022-04-20 16:20.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162010/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:20.22 [info     ] FQE_20220420162010: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00017154771228169286, 'time_algorithm_update': 0.005023682533308517, 'loss': 0.06606172490323525, 'time_step': 0.005272073801173721, 'init_value': -7.2353386878967285, 'ave_value': -6.837293540317196, 'soft_opc': nan} step=2408




2022-04-20 16:20.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162010/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:20.24 [info     ] FQE_20220420162010: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.000169650066730588, 'time_algorithm_update': 0.005120013342347256, 'loss': 0.08295699583669734, 'time_step': 0.005364798529203548, 'init_value': -8.205254554748535, 'ave_value': -7.606699492883038, 'soft_opc': nan} step=2752




2022-04-20 16:20.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162010/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:20.26 [info     ] FQE_20220420162010: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00016817727754282397, 'time_algorithm_update': 0.004707759895990061, 'loss': 0.09811796723366824, 'time_step': 0.004954010941261469, 'init_value': -9.141510009765625, 'ave_value': -8.45018172443987, 'soft_opc': nan} step=3096




2022-04-20 16:20.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162010/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:20.28 [info     ] FQE_20220420162010: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00017049215560735657, 'time_algorithm_update': 0.005108301722726157, 'loss': 0.12090611406656113, 'time_step': 0.005353134731913722, 'init_value': -10.16771411895752, 'ave_value': -9.278655305558496, 'soft_opc': nan} step=3440




2022-04-20 16:20.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162010/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:20.30 [info     ] FQE_20220420162010: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00017086225886677587, 'time_algorithm_update': 0.005066798176876334, 'loss': 0.13696813143161668, 'time_step': 0.005316363517628159, 'init_value': -11.010629653930664, 'ave_value': -9.996361794584507, 'soft_opc': nan} step=3784




2022-04-20 16:20.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162010/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:20.32 [info     ] FQE_20220420162010: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.000167850838151089, 'time_algorithm_update': 0.0050210086412208026, 'loss': 0.1599042307758747, 'time_step': 0.005265941453534503, 'init_value': -12.176042556762695, 'ave_value': -10.900070513623792, 'soft_opc': nan} step=4128




2022-04-20 16:20.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162010/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:20.34 [info     ] FQE_20220420162010: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00016633091970931653, 'time_algorithm_update': 0.0045773366162943285, 'loss': 0.18446944998902118, 'time_step': 0.004819806232008823, 'init_value': -13.065059661865234, 'ave_value': -11.623097621938129, 'soft_opc': nan} step=4472




2022-04-20 16:20.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162010/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:20.36 [info     ] FQE_20220420162010: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00017001670460368312, 'time_algorithm_update': 0.005074403313703315, 'loss': 0.21519718167566976, 'time_step': 0.005318887011949406, 'init_value': -14.262383460998535, 'ave_value': -12.722356707012063, 'soft_opc': nan} step=4816




2022-04-20 16:20.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162010/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:20.38 [info     ] FQE_20220420162010: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00016894936561584473, 'time_algorithm_update': 0.005036881496739942, 'loss': 0.23834454490218399, 'time_step': 0.005283820074658061, 'init_value': -14.926864624023438, 'ave_value': -13.309265845725397, 'soft_opc': nan} step=5160




2022-04-20 16:20.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162010/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:20.40 [info     ] FQE_20220420162010: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.0001655366531638212, 'time_algorithm_update': 0.0049988870010819544, 'loss': 0.2614338015596014, 'time_step': 0.005241473746854205, 'init_value': -15.935707092285156, 'ave_value': -14.244066134343544, 'soft_opc': nan} step=5504




2022-04-20 16:20.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162010/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:20.42 [info     ] FQE_20220420162010: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00016948788665061774, 'time_algorithm_update': 0.005049665306889733, 'loss': 0.28918266120951536, 'time_step': 0.005296051502227783, 'init_value': -16.681072235107422, 'ave_value': -14.97439989517401, 'soft_opc': nan} step=5848




2022-04-20 16:20.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162010/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:20.43 [info     ] FQE_20220420162010: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.0001662318096604458, 'time_algorithm_update': 0.004554982102194498, 'loss': 0.31251372732113786, 'time_step': 0.004795938730239868, 'init_value': -17.049610137939453, 'ave_value': -15.300453534843149, 'soft_opc': nan} step=6192




2022-04-20 16:20.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162010/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:20.45 [info     ] FQE_20220420162010: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.0001697200675343358, 'time_algorithm_update': 0.00511406327402869, 'loss': 0.33375980387723375, 'time_step': 0.005360227684641994, 'init_value': -17.68592071533203, 'ave_value': -15.92075877524325, 'soft_opc': nan} step=6536




2022-04-20 16:20.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162010/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:20.47 [info     ] FQE_20220420162010: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00016892510791157567, 'time_algorithm_update': 0.0051405928855718565, 'loss': 0.3460108588048972, 'time_step': 0.005384082017942916, 'init_value': -17.9786376953125, 'ave_value': -16.259790810862942, 'soft_opc': nan} step=6880




2022-04-20 16:20.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162010/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:20.49 [info     ] FQE_20220420162010: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00017091909120249194, 'time_algorithm_update': 0.005073373400887778, 'loss': 0.3670923059513836, 'time_step': 0.005320878916008528, 'init_value': -18.428138732910156, 'ave_value': -16.76383069591211, 'soft_opc': nan} step=7224




2022-04-20 16:20.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162010/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:20.51 [info     ] FQE_20220420162010: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00016613200653431027, 'time_algorithm_update': 0.0045989862708158274, 'loss': 0.38911572717628334, 'time_step': 0.004840573599172193, 'init_value': -18.8692684173584, 'ave_value': -17.180733450503624, 'soft_opc': nan} step=7568




2022-04-20 16:20.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162010/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:20.53 [info     ] FQE_20220420162010: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00017115473747253418, 'time_algorithm_update': 0.0051109873971273735, 'loss': 0.4073673624946021, 'time_step': 0.005356417145839957, 'init_value': -19.257465362548828, 'ave_value': -17.565870033577294, 'soft_opc': nan} step=7912




2022-04-20 16:20.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162010/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:20.55 [info     ] FQE_20220420162010: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00016707597776900892, 'time_algorithm_update': 0.005081250917079837, 'loss': 0.42631534350559463, 'time_step': 0.005325200252754744, 'init_value': -19.442089080810547, 'ave_value': -17.76844749135607, 'soft_opc': nan} step=8256




2022-04-20 16:20.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162010/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:20.57 [info     ] FQE_20220420162010: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00017079918883567633, 'time_algorithm_update': 0.004997974911401438, 'loss': 0.44262513933638326, 'time_step': 0.005244959232419036, 'init_value': -19.695892333984375, 'ave_value': -18.121200245430877, 'soft_opc': nan} step=8600




2022-04-20 16:20.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162010/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:20.59 [info     ] FQE_20220420162010: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00016956135284068973, 'time_algorithm_update': 0.005081209332443947, 'loss': 0.46608603160914985, 'time_step': 0.005328518706698751, 'init_value': -20.280628204345703, 'ave_value': -18.756665494931895, 'soft_opc': nan} step=8944




2022-04-20 16:20.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162010/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:21.01 [info     ] FQE_20220420162010: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.0001679180666457775, 'time_algorithm_update': 0.00463338577470114, 'loss': 0.48360961420613147, 'time_step': 0.004877828581388606, 'init_value': -20.54823112487793, 'ave_value': -18.986129296417474, 'soft_opc': nan} step=9288




2022-04-20 16:21.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162010/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:21.03 [info     ] FQE_20220420162010: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.0001690775849098383, 'time_algorithm_update': 0.005049190548963325, 'loss': 0.4959643763046019, 'time_step': 0.0052942723728889645, 'init_value': -20.94280433654785, 'ave_value': -19.38798072586457, 'soft_opc': nan} step=9632




2022-04-20 16:21.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162010/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:21.05 [info     ] FQE_20220420162010: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.0001689050086708956, 'time_algorithm_update': 0.0050297587416892825, 'loss': 0.5166746092429602, 'time_step': 0.005275225223496903, 'init_value': -21.234806060791016, 'ave_value': -19.541407822408747, 'soft_opc': nan} step=9976




2022-04-20 16:21.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162010/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:21.07 [info     ] FQE_20220420162010: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00017064671183741369, 'time_algorithm_update': 0.005088976649350898, 'loss': 0.5238661711039239, 'time_step': 0.005335887504178424, 'init_value': -21.477407455444336, 'ave_value': -19.826370928768775, 'soft_opc': nan} step=10320




2022-04-20 16:21.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162010/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:21.09 [info     ] FQE_20220420162010: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00016782519429229026, 'time_algorithm_update': 0.00464662701584572, 'loss': 0.5323986850819702, 'time_step': 0.004891008831733881, 'init_value': -21.767902374267578, 'ave_value': -20.213721620365305, 'soft_opc': nan} step=10664




2022-04-20 16:21.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162010/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:21.11 [info     ] FQE_20220420162010: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00017191088476846384, 'time_algorithm_update': 0.005078884058220442, 'loss': 0.5469029246569547, 'time_step': 0.005326658487319946, 'init_value': -22.081707000732422, 'ave_value': -20.498992712321684, 'soft_opc': nan} step=11008




2022-04-20 16:21.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162010/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:21.13 [info     ] FQE_20220420162010: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00016908312952795693, 'time_algorithm_update': 0.005104006722916004, 'loss': 0.5635324009250157, 'time_step': 0.00535022311432417, 'init_value': -22.408193588256836, 'ave_value': -20.887033903205072, 'soft_opc': nan} step=11352




2022-04-20 16:21.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162010/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:21.15 [info     ] FQE_20220420162010: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00016999521920847338, 'time_algorithm_update': 0.005097790512927743, 'loss': 0.5680222272721314, 'time_step': 0.005343273628589718, 'init_value': -22.623254776000977, 'ave_value': -21.00691871876799, 'soft_opc': nan} step=11696




2022-04-20 16:21.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162010/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:21.17 [info     ] FQE_20220420162010: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00017013937927955804, 'time_algorithm_update': 0.005094602357509525, 'loss': 0.5817667208124645, 'time_step': 0.005340696074241816, 'init_value': -22.982738494873047, 'ave_value': -21.238974881504856, 'soft_opc': nan} step=12040




2022-04-20 16:21.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162010/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:21.19 [info     ] FQE_20220420162010: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00016595873721810274, 'time_algorithm_update': 0.00458034873008728, 'loss': 0.5983689711808206, 'time_step': 0.004821044761081075, 'init_value': -23.341215133666992, 'ave_value': -21.611469883858728, 'soft_opc': nan} step=12384




2022-04-20 16:21.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162010/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:21.21 [info     ] FQE_20220420162010: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00016936105351115382, 'time_algorithm_update': 0.005053448122601176, 'loss': 0.6144563346386476, 'time_step': 0.005299826001012048, 'init_value': -23.58132553100586, 'ave_value': -21.934200313277877, 'soft_opc': nan} step=12728




2022-04-20 16:21.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162010/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:21.23 [info     ] FQE_20220420162010: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00017199336096297864, 'time_algorithm_update': 0.005149243875991466, 'loss': 0.616182719595557, 'time_step': 0.005399069813794868, 'init_value': -23.509906768798828, 'ave_value': -21.744755773794175, 'soft_opc': nan} step=13072




2022-04-20 16:21.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162010/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:21.25 [info     ] FQE_20220420162010: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00016769974730735602, 'time_algorithm_update': 0.005101305107737697, 'loss': 0.6167847821326536, 'time_step': 0.005346169998479444, 'init_value': -23.660945892333984, 'ave_value': -22.082784521113243, 'soft_opc': nan} step=13416




2022-04-20 16:21.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162010/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:21.26 [info     ] FQE_20220420162010: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00016910600107769634, 'time_algorithm_update': 0.004614120305970658, 'loss': 0.6205462772875677, 'time_step': 0.00485864004423452, 'init_value': -23.740636825561523, 'ave_value': -22.17466754294294, 'soft_opc': nan} step=13760




2022-04-20 16:21.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162010/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:21.28 [info     ] FQE_20220420162010: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00016986353452815565, 'time_algorithm_update': 0.00514822851779849, 'loss': 0.6226399461359738, 'time_step': 0.005394735308580621, 'init_value': -23.76520347595215, 'ave_value': -22.278179387850603, 'soft_opc': nan} step=14104




2022-04-20 16:21.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162010/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:21.30 [info     ] FQE_20220420162010: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00016871025395947834, 'time_algorithm_update': 0.005012362502342047, 'loss': 0.6226832883779047, 'time_step': 0.005255581334579823, 'init_value': -23.806819915771484, 'ave_value': -22.369163565122868, 'soft_opc': nan} step=14448




2022-04-20 16:21.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162010/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:21.32 [info     ] FQE_20220420162010: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00017319030539934024, 'time_algorithm_update': 0.0050666339175645695, 'loss': 0.6143566088718471, 'time_step': 0.005314641913702321, 'init_value': -24.078920364379883, 'ave_value': -22.579569466908772, 'soft_opc': nan} step=14792




2022-04-20 16:21.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162010/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:21.34 [info     ] FQE_20220420162010: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00017026690549628678, 'time_algorithm_update': 0.005098295766253804, 'loss': 0.6378553669715603, 'time_step': 0.005343074022337448, 'init_value': -24.183448791503906, 'ave_value': -22.983880657977885, 'soft_opc': nan} step=15136




2022-04-20 16:21.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162010/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:21.36 [info     ] FQE_20220420162010: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00016871926396392112, 'time_algorithm_update': 0.004569358603898869, 'loss': 0.6522180331535204, 'time_step': 0.004813633685888246, 'init_value': -24.800392150878906, 'ave_value': -23.592103346833238, 'soft_opc': nan} step=15480




2022-04-20 16:21.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162010/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:21.38 [info     ] FQE_20220420162010: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00017117622286774392, 'time_algorithm_update': 0.005080663187559261, 'loss': 0.6590488519205517, 'time_step': 0.00532990624738294, 'init_value': -24.803058624267578, 'ave_value': -23.810810794392683, 'soft_opc': nan} step=15824




2022-04-20 16:21.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162010/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:21.40 [info     ] FQE_20220420162010: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00017053928486136503, 'time_algorithm_update': 0.0050716261531031405, 'loss': 0.6607555907793603, 'time_step': 0.005317159170328185, 'init_value': -24.742080688476562, 'ave_value': -23.744955931832123, 'soft_opc': nan} step=16168




2022-04-20 16:21.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162010/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:21.42 [info     ] FQE_20220420162010: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00017094196275223132, 'time_algorithm_update': 0.005046933196311773, 'loss': 0.6619980870421196, 'time_step': 0.0052938142488169115, 'init_value': -24.893625259399414, 'ave_value': -24.013834296583056, 'soft_opc': nan} step=16512




2022-04-20 16:21.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162010/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:21.44 [info     ] FQE_20220420162010: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.0001698898714642192, 'time_algorithm_update': 0.004778704670972602, 'loss': 0.6606559261116524, 'time_step': 0.00502322510231373, 'init_value': -24.780868530273438, 'ave_value': -24.023536290671373, 'soft_opc': nan} step=16856




2022-04-20 16:21.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162010/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:21.46 [info     ] FQE_20220420162010: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00016888352327568587, 'time_algorithm_update': 0.005070152670838112, 'loss': 0.6557047835789448, 'time_step': 0.005315860343533893, 'init_value': -24.685592651367188, 'ave_value': -24.112207574498008, 'soft_opc': nan} step=17200




2022-04-20 16:21.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162010/model_17200.pt
search iteration:  11
using hyper params:  [0.0023345027592110473, 0.009716334839862815, 6.191457891724195e-05, 5]
2022-04-20 16:21.46 [debug    ] RoundIterator is selected.
2022-04-20 16:21.46 [info     ] Directory is created at d3rlpy_logs/TD3PlusBC_20220420162146
2022-04-20 16:21.46 [debug    ] Fitting scaler...              scaler=standard
2022-04-20 16:21.46 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 16:21.46 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 16:21.46 [info     ] Parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162146/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0023345027592

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:21.50 [info     ] TD3PlusBC_20220420162146: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.0003773963939376742, 'time_algorithm_update': 0.008928329623930635, 'critic_loss': 5.493150718030874, 'actor_loss': 2.6462677905434058, 'time_step': 0.0093882787994474, 'td_error': 0.9103384812040632, 'init_value': -8.211771965026855, 'ave_value': -4.999274895765718} step=342
2022-04-20 16:21.50 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162146/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:21.53 [info     ] TD3PlusBC_20220420162146: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.00037910227189984235, 'time_algorithm_update': 0.008655349413553873, 'critic_loss': 3.6148889486552678, 'actor_loss': 2.54118684021353, 'time_step': 0.009113976132800008, 'td_error': 1.0334635658662483, 'init_value': -11.411827087402344, 'ave_value': -7.037601730007435} step=684
2022-04-20 16:21.53 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162146/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:21.57 [info     ] TD3PlusBC_20220420162146: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.00037977151703416255, 'time_algorithm_update': 0.008972090587281344, 'critic_loss': 5.667930700277028, 'actor_loss': 2.5331671447084663, 'time_step': 0.00943647351181298, 'td_error': 1.2469671417346522, 'init_value': -15.20434856414795, 'ave_value': -9.357899566975355} step=1026
2022-04-20 16:21.57 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162146/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:22.01 [info     ] TD3PlusBC_20220420162146: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.00037424466763323513, 'time_algorithm_update': 0.008958451929148178, 'critic_loss': 7.95698338572742, 'actor_loss': 2.5270129658325375, 'time_step': 0.009415074398643091, 'td_error': 1.4914729870969647, 'init_value': -18.470664978027344, 'ave_value': -11.541628740393758} step=1368
2022-04-20 16:22.01 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162146/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:22.05 [info     ] TD3PlusBC_20220420162146: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.0003764824560511182, 'time_algorithm_update': 0.008568482789379812, 'critic_loss': 10.510598440616452, 'actor_loss': 2.5255682370816057, 'time_step': 0.00902200790873745, 'td_error': 1.7670407118519245, 'init_value': -22.17392349243164, 'ave_value': -13.957805216755332} step=1710
2022-04-20 16:22.05 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162146/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:22.08 [info     ] TD3PlusBC_20220420162146: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.00037867283960532026, 'time_algorithm_update': 0.008964653600726211, 'critic_loss': 13.304129751105057, 'actor_loss': 2.5227567814944085, 'time_step': 0.009414895236143592, 'td_error': 2.0391747687764554, 'init_value': -25.425594329833984, 'ave_value': -16.0277901135659} step=2052
2022-04-20 16:22.08 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162146/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:22.12 [info     ] TD3PlusBC_20220420162146: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.000379460596898843, 'time_algorithm_update': 0.00890748333512691, 'critic_loss': 16.183629457016437, 'actor_loss': 2.521042289789657, 'time_step': 0.009359702032211929, 'td_error': 2.423219700364835, 'init_value': -29.49090003967285, 'ave_value': -18.491725845059364} step=2394
2022-04-20 16:22.12 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162146/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:22.16 [info     ] TD3PlusBC_20220420162146: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.00037692234530086404, 'time_algorithm_update': 0.008957500346222816, 'critic_loss': 19.439220924823605, 'actor_loss': 2.519713768484997, 'time_step': 0.009411000368887918, 'td_error': 2.6736385912532814, 'init_value': -33.989723205566406, 'ave_value': -20.617164598767065} step=2736
2022-04-20 16:22.16 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162146/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:22.19 [info     ] TD3PlusBC_20220420162146: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.00037167713656062967, 'time_algorithm_update': 0.008944201887699595, 'critic_loss': 22.787745358651144, 'actor_loss': 2.51844576924865, 'time_step': 0.009386646817302146, 'td_error': 2.9389516503397863, 'init_value': -35.572914123535156, 'ave_value': -22.142993941652747} step=3078
2022-04-20 16:22.19 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162146/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:22.23 [info     ] TD3PlusBC_20220420162146: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.0003785055283217402, 'time_algorithm_update': 0.008490901244314094, 'critic_loss': 26.522118398320604, 'actor_loss': 2.5193777586284436, 'time_step': 0.00894235170375534, 'td_error': 3.38140432220071, 'init_value': -38.88426971435547, 'ave_value': -24.132381416257758} step=3420
2022-04-20 16:22.23 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162146/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:22.27 [info     ] TD3PlusBC_20220420162146: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.0003790555641665096, 'time_algorithm_update': 0.008939771624336465, 'critic_loss': 30.455145590486584, 'actor_loss': 2.518325430607935, 'time_step': 0.00939381262015181, 'td_error': 3.682064356577503, 'init_value': -40.84720230102539, 'ave_value': -25.869275532083183} step=3762
2022-04-20 16:22.27 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162146/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:22.30 [info     ] TD3PlusBC_20220420162146: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.00037435481422825863, 'time_algorithm_update': 0.009008199847929658, 'critic_loss': 34.57013309768766, 'actor_loss': 2.517846874326293, 'time_step': 0.009457571464672423, 'td_error': 4.264054345051008, 'init_value': -45.39330291748047, 'ave_value': -28.063039148741126} step=4104
2022-04-20 16:22.30 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162146/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:22.34 [info     ] TD3PlusBC_20220420162146: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.00037674736558345324, 'time_algorithm_update': 0.008769024882400245, 'critic_loss': 38.10114644145408, 'actor_loss': 2.516963914123892, 'time_step': 0.00922070678911711, 'td_error': 4.523569544729198, 'init_value': -46.43994140625, 'ave_value': -29.571527014645405} step=4446
2022-04-20 16:22.34 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162146/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:22.38 [info     ] TD3PlusBC_20220420162146: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.00037961675409685104, 'time_algorithm_update': 0.00897326734330919, 'critic_loss': 42.90158988997253, 'actor_loss': 2.516493288397092, 'time_step': 0.009431184383860806, 'td_error': 5.241464019857719, 'init_value': -51.11384963989258, 'ave_value': -31.730999345086463} step=4788
2022-04-20 16:22.38 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162146/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:22.41 [info     ] TD3PlusBC_20220420162146: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.0003752185587297406, 'time_algorithm_update': 0.008491349499128019, 'critic_loss': 46.767915642052365, 'actor_loss': 2.516584623626798, 'time_step': 0.008942520409299616, 'td_error': 5.361717294093863, 'init_value': -51.61677169799805, 'ave_value': -32.83357545757457} step=5130
2022-04-20 16:22.41 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162146/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:22.45 [info     ] TD3PlusBC_20220420162146: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.0003751878849944176, 'time_algorithm_update': 0.008904750584161769, 'critic_loss': 51.1576691956548, 'actor_loss': 2.5165271996057523, 'time_step': 0.009357418233191061, 'td_error': 5.741610781844753, 'init_value': -54.610023498535156, 'ave_value': -34.58165182707328} step=5472
2022-04-20 16:22.45 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162146/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:22.49 [info     ] TD3PlusBC_20220420162146: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.0003767536397565875, 'time_algorithm_update': 0.008992679634986565, 'critic_loss': 55.18914412894444, 'actor_loss': 2.517339891857571, 'time_step': 0.009448692812557108, 'td_error': 6.309868914015758, 'init_value': -57.59252166748047, 'ave_value': -36.17582551544465} step=5814
2022-04-20 16:22.49 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162146/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:22.53 [info     ] TD3PlusBC_20220420162146: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.0003749362209386993, 'time_algorithm_update': 0.008715404404534234, 'critic_loss': 59.26278762371219, 'actor_loss': 2.5160520090694316, 'time_step': 0.009164586401822274, 'td_error': 6.580520360324264, 'init_value': -58.454322814941406, 'ave_value': -37.420483182047214} step=6156
2022-04-20 16:22.53 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162146/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:22.56 [info     ] TD3PlusBC_20220420162146: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.00037881993410880107, 'time_algorithm_update': 0.008954920266803942, 'critic_loss': 63.3617783317789, 'actor_loss': 2.5177182914220797, 'time_step': 0.009414410730551558, 'td_error': 6.7460000191740495, 'init_value': -59.38222122192383, 'ave_value': -38.66510982026925} step=6498
2022-04-20 16:22.56 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162146/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:23.00 [info     ] TD3PlusBC_20220420162146: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.00037526178081133215, 'time_algorithm_update': 0.00851496618393569, 'critic_loss': 67.43136260941712, 'actor_loss': 2.516770512039898, 'time_step': 0.008965796197366994, 'td_error': 7.59800371307613, 'init_value': -63.21891403198242, 'ave_value': -40.364427514125985} step=6840
2022-04-20 16:23.00 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162146/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:23.04 [info     ] TD3PlusBC_20220420162146: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.00037428231267204064, 'time_algorithm_update': 0.008968811983253524, 'critic_loss': 70.97343499479238, 'actor_loss': 2.518998834821913, 'time_step': 0.00941944192027488, 'td_error': 7.577912408953075, 'init_value': -63.1234130859375, 'ave_value': -41.22594723567358} step=7182
2022-04-20 16:23.04 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162146/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:23.07 [info     ] TD3PlusBC_20220420162146: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.00037452561116357993, 'time_algorithm_update': 0.008937808505275794, 'critic_loss': 74.8695901681108, 'actor_loss': 2.5176692984954654, 'time_step': 0.009390626734460306, 'td_error': 8.163510553984077, 'init_value': -66.4218521118164, 'ave_value': -42.79238689468175} step=7524
2022-04-20 16:23.07 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162146/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:23.11 [info     ] TD3PlusBC_20220420162146: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.0003788819787097953, 'time_algorithm_update': 0.008643071553860491, 'critic_loss': 78.5632171965482, 'actor_loss': 2.517393820467051, 'time_step': 0.009100338868927537, 'td_error': 8.531031468838522, 'init_value': -67.12452697753906, 'ave_value': -43.7040792605791} step=7866
2022-04-20 16:23.11 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162146/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:23.15 [info     ] TD3PlusBC_20220420162146: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.00037458417011283296, 'time_algorithm_update': 0.009026270163686652, 'critic_loss': 82.47908027827391, 'actor_loss': 2.5185893237242225, 'time_step': 0.009480218441165679, 'td_error': 8.632260523913093, 'init_value': -67.67105865478516, 'ave_value': -44.82664043612921} step=8208
2022-04-20 16:23.15 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162146/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:23.18 [info     ] TD3PlusBC_20220420162146: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.0003735837880630939, 'time_algorithm_update': 0.008553039957905373, 'critic_loss': 86.04097262600013, 'actor_loss': 2.5194810273354515, 'time_step': 0.008997794480351677, 'td_error': 9.136936179204607, 'init_value': -70.39099884033203, 'ave_value': -45.77309906722029} step=8550
2022-04-20 16:23.18 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162146/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:23.22 [info     ] TD3PlusBC_20220420162146: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.00038088831985205935, 'time_algorithm_update': 0.008885150764420716, 'critic_loss': 89.31810245179294, 'actor_loss': 2.52005476282354, 'time_step': 0.009344176939356397, 'td_error': 9.829302030160711, 'init_value': -72.73274230957031, 'ave_value': -47.07952476927138} step=8892
2022-04-20 16:23.22 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162146/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:23.26 [info     ] TD3PlusBC_20220420162146: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.0003742209652013946, 'time_algorithm_update': 0.008927951779281884, 'critic_loss': 92.39110108983446, 'actor_loss': 2.520608126768592, 'time_step': 0.009382287661234537, 'td_error': 9.740352269673782, 'init_value': -72.76164245605469, 'ave_value': -47.86818833160441} step=9234
2022-04-20 16:23.26 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162146/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:23.29 [info     ] TD3PlusBC_20220420162146: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.0003765340436968887, 'time_algorithm_update': 0.008477721995080423, 'critic_loss': 95.80919530098899, 'actor_loss': 2.5199152042991235, 'time_step': 0.008931495989972388, 'td_error': 10.059270686218195, 'init_value': -74.45004272460938, 'ave_value': -48.8945390649327} step=9576
2022-04-20 16:23.29 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162146/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:23.33 [info     ] TD3PlusBC_20220420162146: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.000380508383812263, 'time_algorithm_update': 0.00893935543751856, 'critic_loss': 98.60494604724192, 'actor_loss': 2.522069491838154, 'time_step': 0.009396967134977641, 'td_error': 10.991806202584478, 'init_value': -76.2632064819336, 'ave_value': -50.071208714353475} step=9918
2022-04-20 16:23.33 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162146/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:23.37 [info     ] TD3PlusBC_20220420162146: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.0003714477806760554, 'time_algorithm_update': 0.008464606184708444, 'critic_loss': 101.94884837858858, 'actor_loss': 2.519235874477186, 'time_step': 0.008893845374124092, 'td_error': 10.699939713577974, 'init_value': -77.29044342041016, 'ave_value': -50.97140369753781} step=10260
2022-04-20 16:23.37 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162146/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:23.41 [info     ] TD3PlusBC_20220420162146: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.00037851110536452624, 'time_algorithm_update': 0.009037102175037764, 'critic_loss': 104.29632325200309, 'actor_loss': 2.5202132665623003, 'time_step': 0.00947822604263038, 'td_error': 10.925293078029512, 'init_value': -76.07415771484375, 'ave_value': -51.33607670780867} step=10602
2022-04-20 16:23.41 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162146/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:23.44 [info     ] TD3PlusBC_20220420162146: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.00037744310167100694, 'time_algorithm_update': 0.008900484843560827, 'critic_loss': 107.71183272690801, 'actor_loss': 2.522231415698403, 'time_step': 0.009343826282791227, 'td_error': 11.149538806117198, 'init_value': -77.80549621582031, 'ave_value': -52.307939196437225} step=10944
2022-04-20 16:23.44 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162146/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:23.48 [info     ] TD3PlusBC_20220420162146: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.000375838307609335, 'time_algorithm_update': 0.008509293634291978, 'critic_loss': 109.76308530394795, 'actor_loss': 2.5221714234491537, 'time_step': 0.00895146250027662, 'td_error': 11.337509278241258, 'init_value': -79.2616195678711, 'ave_value': -53.32255272537311} step=11286
2022-04-20 16:23.48 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162146/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:23.52 [info     ] TD3PlusBC_20220420162146: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.00037980079650878906, 'time_algorithm_update': 0.008949519597996048, 'critic_loss': 112.9004061626412, 'actor_loss': 2.522526134524429, 'time_step': 0.009396239330894068, 'td_error': 11.95080010709099, 'init_value': -78.63883209228516, 'ave_value': -53.35306351517587} step=11628
2022-04-20 16:23.52 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162146/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:23.55 [info     ] TD3PlusBC_20220420162146: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.00038117205190379717, 'time_algorithm_update': 0.00860807282185694, 'critic_loss': 115.1432314532542, 'actor_loss': 2.523679296872769, 'time_step': 0.00905957347468326, 'td_error': 11.874918104708371, 'init_value': -79.88258361816406, 'ave_value': -54.42055242502047} step=11970
2022-04-20 16:23.55 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162146/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:23.59 [info     ] TD3PlusBC_20220420162146: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.0003788317853247213, 'time_algorithm_update': 0.008978593419169822, 'critic_loss': 117.97514198258607, 'actor_loss': 2.5232758828771042, 'time_step': 0.009428153958236962, 'td_error': 12.467241804955012, 'init_value': -80.224365234375, 'ave_value': -55.016969365346256} step=12312
2022-04-20 16:23.59 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162146/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:24.03 [info     ] TD3PlusBC_20220420162146: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.00037380198986209626, 'time_algorithm_update': 0.008984390755145871, 'critic_loss': 120.47222358302066, 'actor_loss': 2.523968916887428, 'time_step': 0.00942330611379523, 'td_error': 12.452295015657809, 'init_value': -80.58489990234375, 'ave_value': -55.50590429259112} step=12654
2022-04-20 16:24.03 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162146/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:24.06 [info     ] TD3PlusBC_20220420162146: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.0003784929799754717, 'time_algorithm_update': 0.008613840181228013, 'critic_loss': 122.58703972442805, 'actor_loss': 2.5246814315082036, 'time_step': 0.009052813401696279, 'td_error': 12.809247077899553, 'init_value': -81.92547607421875, 'ave_value': -55.92584934424408} step=12996
2022-04-20 16:24.06 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162146/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:24.10 [info     ] TD3PlusBC_20220420162146: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.0003765563518680327, 'time_algorithm_update': 0.008957671143158137, 'critic_loss': 124.72727651763381, 'actor_loss': 2.5255624146489373, 'time_step': 0.009399260693823385, 'td_error': 12.824976479813644, 'init_value': -81.91967010498047, 'ave_value': -56.66348744815224} step=13338
2022-04-20 16:24.10 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162146/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:24.14 [info     ] TD3PlusBC_20220420162146: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.00037739221115558466, 'time_algorithm_update': 0.008597554519162541, 'critic_loss': 126.60419805426346, 'actor_loss': 2.5259432011877583, 'time_step': 0.009037615262974075, 'td_error': 12.44482138094716, 'init_value': -81.44715881347656, 'ave_value': -56.97611628112023} step=13680
2022-04-20 16:24.14 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162146/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:24.17 [info     ] TD3PlusBC_20220420162146: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.0003774228848909077, 'time_algorithm_update': 0.008989662454839339, 'critic_loss': 129.1547094646253, 'actor_loss': 2.524413759945429, 'time_step': 0.009432730618973224, 'td_error': 13.052627292698004, 'init_value': -82.08927917480469, 'ave_value': -57.93695228991124} step=14022
2022-04-20 16:24.17 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162146/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:24.21 [info     ] TD3PlusBC_20220420162146: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.0003704885293168631, 'time_algorithm_update': 0.008814757330375806, 'critic_loss': 130.67603650009423, 'actor_loss': 2.5264705747191667, 'time_step': 0.009250886956153557, 'td_error': 13.106449926335706, 'init_value': -82.9498291015625, 'ave_value': -58.30094815722188} step=14364
2022-04-20 16:24.21 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162146/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:24.25 [info     ] TD3PlusBC_20220420162146: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.00038167328862418907, 'time_algorithm_update': 0.008501745106881125, 'critic_loss': 132.41435504935637, 'actor_loss': 2.527289503499081, 'time_step': 0.008947375922175179, 'td_error': 12.848840265406864, 'init_value': -79.82954406738281, 'ave_value': -58.20865044533317} step=14706
2022-04-20 16:24.25 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162146/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:24.28 [info     ] TD3PlusBC_20220420162146: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.00038259001503213806, 'time_algorithm_update': 0.008832647089372602, 'critic_loss': 134.28623232925148, 'actor_loss': 2.526097089923613, 'time_step': 0.009277557071886565, 'td_error': 13.179961247874118, 'init_value': -81.22413635253906, 'ave_value': -58.7666164959853} step=15048
2022-04-20 16:24.28 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162146/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:24.32 [info     ] TD3PlusBC_20220420162146: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.00037651661543818243, 'time_algorithm_update': 0.008879189602812828, 'critic_loss': 135.99996361537288, 'actor_loss': 2.527263213319388, 'time_step': 0.009321902230469107, 'td_error': 13.530512655042198, 'init_value': -83.40724182128906, 'ave_value': -59.883577452651686} step=15390
2022-04-20 16:24.32 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162146/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:24.36 [info     ] TD3PlusBC_20220420162146: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.00037764038955956174, 'time_algorithm_update': 0.008981189532586706, 'critic_loss': 137.31820866099577, 'actor_loss': 2.5273512943446286, 'time_step': 0.00942523855912058, 'td_error': 13.085276672439052, 'init_value': -79.70809173583984, 'ave_value': -59.387580655007326} step=15732
2022-04-20 16:24.36 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162146/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:24.39 [info     ] TD3PlusBC_20220420162146: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.0003804310023436072, 'time_algorithm_update': 0.008884813353332162, 'critic_loss': 138.7825517264026, 'actor_loss': 2.5285541314130637, 'time_step': 0.009329187242608322, 'td_error': 13.745712952033207, 'init_value': -83.6044692993164, 'ave_value': -60.58673080020274} step=16074
2022-04-20 16:24.40 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162146/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:24.43 [info     ] TD3PlusBC_20220420162146: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.00037572188684117724, 'time_algorithm_update': 0.008548878786856668, 'critic_loss': 139.97036680701183, 'actor_loss': 2.529801955697132, 'time_step': 0.008987675633346825, 'td_error': 14.131261713852034, 'init_value': -83.39372253417969, 'ave_value': -60.901776468320705} step=16416
2022-04-20 16:24.43 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162146/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:24.47 [info     ] TD3PlusBC_20220420162146: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.0003763451213725129, 'time_algorithm_update': 0.008905083812468233, 'critic_loss': 141.0044257626896, 'actor_loss': 2.528263223101521, 'time_step': 0.009345995055304633, 'td_error': 14.022662238835379, 'init_value': -83.76045989990234, 'ave_value': -61.29138914191072} step=16758
2022-04-20 16:24.47 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162146/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:24.50 [info     ] TD3PlusBC_20220420162146: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.00037721932282921863, 'time_algorithm_update': 0.008939507411934479, 'critic_loss': 142.6766999004877, 'actor_loss': 2.529215056993808, 'time_step': 0.009381713922957929, 'td_error': 14.056157324600425, 'init_value': -83.41905212402344, 'ave_value': -61.48563563823197} step=17100
2022-04-20 16:24.51 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162146/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 102 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.28189108e-01  

Epoch 1/50:   0%|          | 0/177 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 16:24.52 [info     ] FQE_20220420162451: epoch=1 step=177 epoch=1 metrics={'time_sample_batch': 0.00016660609487759864, 'time_algorithm_update': 0.005062664969492767, 'loss': 0.00764467396989128, 'time_step': 0.005304286708939547, 'init_value': -0.067902572453022, 'ave_value': -0.05551250036526192, 'soft_opc': nan} step=177




2022-04-20 16:24.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162451/model_177.pt


Epoch 2/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:24.53 [info     ] FQE_20220420162451: epoch=2 step=354 epoch=2 metrics={'time_sample_batch': 0.00016602419190487618, 'time_algorithm_update': 0.005088415523033358, 'loss': 0.005562348225506517, 'time_step': 0.005328590587034064, 'init_value': -0.2520209848880768, 'ave_value': -0.18084501028284655, 'soft_opc': nan} step=354




2022-04-20 16:24.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162451/model_354.pt


Epoch 3/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:24.54 [info     ] FQE_20220420162451: epoch=3 step=531 epoch=3 metrics={'time_sample_batch': 0.0001687101051632294, 'time_algorithm_update': 0.00509748351102495, 'loss': 0.004860450415003856, 'time_step': 0.005337337989591609, 'init_value': -0.36801788210868835, 'ave_value': -0.2339611545753282, 'soft_opc': nan} step=531




2022-04-20 16:24.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162451/model_531.pt


Epoch 4/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:24.55 [info     ] FQE_20220420162451: epoch=4 step=708 epoch=4 metrics={'time_sample_batch': 0.0001687074111679853, 'time_algorithm_update': 0.005125210110077077, 'loss': 0.004453589532019223, 'time_step': 0.005366693108768786, 'init_value': -0.4706164598464966, 'ave_value': -0.31798633622850203, 'soft_opc': nan} step=708




2022-04-20 16:24.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162451/model_708.pt


Epoch 5/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:24.56 [info     ] FQE_20220420162451: epoch=5 step=885 epoch=5 metrics={'time_sample_batch': 0.00016460984440173134, 'time_algorithm_update': 0.005083446448805642, 'loss': 0.004128942696251354, 'time_step': 0.005316312703709144, 'init_value': -0.5481061339378357, 'ave_value': -0.355898524241315, 'soft_opc': nan} step=885




2022-04-20 16:24.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162451/model_885.pt


Epoch 6/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:24.57 [info     ] FQE_20220420162451: epoch=6 step=1062 epoch=6 metrics={'time_sample_batch': 0.00016835315079338808, 'time_algorithm_update': 0.005027535271509892, 'loss': 0.003865072786502835, 'time_step': 0.0052713162481448075, 'init_value': -0.5997993350028992, 'ave_value': -0.4032417747317939, 'soft_opc': nan} step=1062




2022-04-20 16:24.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162451/model_1062.pt


Epoch 7/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:24.58 [info     ] FQE_20220420162451: epoch=7 step=1239 epoch=7 metrics={'time_sample_batch': 0.00016924755721442442, 'time_algorithm_update': 0.005025339665385963, 'loss': 0.0036817302474867826, 'time_step': 0.005264226999659997, 'init_value': -0.68123859167099, 'ave_value': -0.46988358833455107, 'soft_opc': nan} step=1239




2022-04-20 16:24.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162451/model_1239.pt


Epoch 8/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:24.59 [info     ] FQE_20220420162451: epoch=8 step=1416 epoch=8 metrics={'time_sample_batch': 0.0001714781852765272, 'time_algorithm_update': 0.005040214560126181, 'loss': 0.0035902048430884372, 'time_step': 0.0052833476309048924, 'init_value': -0.7883493900299072, 'ave_value': -0.5738525604812411, 'soft_opc': nan} step=1416




2022-04-20 16:24.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162451/model_1416.pt


Epoch 9/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:25.00 [info     ] FQE_20220420162451: epoch=9 step=1593 epoch=9 metrics={'time_sample_batch': 0.00016262032891397422, 'time_algorithm_update': 0.004114823152789962, 'loss': 0.0034402217591013025, 'time_step': 0.004352689462866487, 'init_value': -0.8358862400054932, 'ave_value': -0.5861793467232415, 'soft_opc': nan} step=1593




2022-04-20 16:25.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162451/model_1593.pt


Epoch 10/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:25.01 [info     ] FQE_20220420162451: epoch=10 step=1770 epoch=10 metrics={'time_sample_batch': 0.0001680581583141607, 'time_algorithm_update': 0.004984689971147958, 'loss': 0.00351999849107348, 'time_step': 0.005225446938121386, 'init_value': -0.9698365330696106, 'ave_value': -0.6857496825278969, 'soft_opc': nan} step=1770




2022-04-20 16:25.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162451/model_1770.pt


Epoch 11/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:25.02 [info     ] FQE_20220420162451: epoch=11 step=1947 epoch=11 metrics={'time_sample_batch': 0.00017108620896851275, 'time_algorithm_update': 0.005004070572933908, 'loss': 0.0037334520497364597, 'time_step': 0.0052503879460911295, 'init_value': -1.0866395235061646, 'ave_value': -0.7897231230059185, 'soft_opc': nan} step=1947




2022-04-20 16:25.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162451/model_1947.pt


Epoch 12/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:25.03 [info     ] FQE_20220420162451: epoch=12 step=2124 epoch=12 metrics={'time_sample_batch': 0.00016688761738060557, 'time_algorithm_update': 0.005178133646647136, 'loss': 0.004005167778649783, 'time_step': 0.005418054128097275, 'init_value': -1.1654354333877563, 'ave_value': -0.8577422592688251, 'soft_opc': nan} step=2124




2022-04-20 16:25.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162451/model_2124.pt


Epoch 13/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:25.04 [info     ] FQE_20220420162451: epoch=13 step=2301 epoch=13 metrics={'time_sample_batch': 0.00017181089368917173, 'time_algorithm_update': 0.005052780700942217, 'loss': 0.004123767016397653, 'time_step': 0.0052978534482966706, 'init_value': -1.2664523124694824, 'ave_value': -0.9219068853268484, 'soft_opc': nan} step=2301




2022-04-20 16:25.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162451/model_2301.pt


Epoch 14/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:25.05 [info     ] FQE_20220420162451: epoch=14 step=2478 epoch=14 metrics={'time_sample_batch': 0.00016709236101915606, 'time_algorithm_update': 0.005156393105027366, 'loss': 0.004825985291315043, 'time_step': 0.0053979313306215795, 'init_value': -1.4208687543869019, 'ave_value': -1.0562156352217311, 'soft_opc': nan} step=2478




2022-04-20 16:25.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162451/model_2478.pt


Epoch 15/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:25.06 [info     ] FQE_20220420162451: epoch=15 step=2655 epoch=15 metrics={'time_sample_batch': 0.00016760017912266617, 'time_algorithm_update': 0.005065031644314696, 'loss': 0.005237708119465049, 'time_step': 0.005305100295503261, 'init_value': -1.5582176446914673, 'ave_value': -1.1570509895190104, 'soft_opc': nan} step=2655




2022-04-20 16:25.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162451/model_2655.pt


Epoch 16/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:25.07 [info     ] FQE_20220420162451: epoch=16 step=2832 epoch=16 metrics={'time_sample_batch': 0.00017265007320770435, 'time_algorithm_update': 0.00511982481358415, 'loss': 0.006166785829805764, 'time_step': 0.005364485379666258, 'init_value': -1.6908596754074097, 'ave_value': -1.2658592060499183, 'soft_opc': nan} step=2832




2022-04-20 16:25.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162451/model_2832.pt


Epoch 17/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:25.08 [info     ] FQE_20220420162451: epoch=17 step=3009 epoch=17 metrics={'time_sample_batch': 0.0001667623466017556, 'time_algorithm_update': 0.005145381399467167, 'loss': 0.006466599345722862, 'time_step': 0.0053856952042229435, 'init_value': -1.8209362030029297, 'ave_value': -1.3895552427684132, 'soft_opc': nan} step=3009




2022-04-20 16:25.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162451/model_3009.pt


Epoch 18/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:25.09 [info     ] FQE_20220420162451: epoch=18 step=3186 epoch=18 metrics={'time_sample_batch': 0.0001665010290630793, 'time_algorithm_update': 0.004181675991769564, 'loss': 0.0072586764811701275, 'time_step': 0.004421795828867767, 'init_value': -1.9890657663345337, 'ave_value': -1.5422294229045286, 'soft_opc': nan} step=3186




2022-04-20 16:25.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162451/model_3186.pt


Epoch 19/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:25.10 [info     ] FQE_20220420162451: epoch=19 step=3363 epoch=19 metrics={'time_sample_batch': 0.00016965300349865929, 'time_algorithm_update': 0.005067920954213978, 'loss': 0.008089065881272367, 'time_step': 0.005308244187953108, 'init_value': -2.0741732120513916, 'ave_value': -1.6095189411786046, 'soft_opc': nan} step=3363




2022-04-20 16:25.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162451/model_3363.pt


Epoch 20/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:25.11 [info     ] FQE_20220420162451: epoch=20 step=3540 epoch=20 metrics={'time_sample_batch': 0.0001664309851867331, 'time_algorithm_update': 0.005067044058762028, 'loss': 0.008625265298342849, 'time_step': 0.005308582284356241, 'init_value': -2.174119710922241, 'ave_value': -1.6982063833814305, 'soft_opc': nan} step=3540




2022-04-20 16:25.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162451/model_3540.pt


Epoch 21/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:25.12 [info     ] FQE_20220420162451: epoch=21 step=3717 epoch=21 metrics={'time_sample_batch': 0.00016567262552552304, 'time_algorithm_update': 0.005077805222764527, 'loss': 0.009376192507518213, 'time_step': 0.005318544678768869, 'init_value': -2.3337485790252686, 'ave_value': -1.8333527611081932, 'soft_opc': nan} step=3717




2022-04-20 16:25.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162451/model_3717.pt


Epoch 22/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:25.13 [info     ] FQE_20220420162451: epoch=22 step=3894 epoch=22 metrics={'time_sample_batch': 0.00016830465887899453, 'time_algorithm_update': 0.005036313655012745, 'loss': 0.01017394805613821, 'time_step': 0.005279126140357411, 'init_value': -2.3550074100494385, 'ave_value': -1.8509258561999768, 'soft_opc': nan} step=3894




2022-04-20 16:25.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162451/model_3894.pt


Epoch 23/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:25.14 [info     ] FQE_20220420162451: epoch=23 step=4071 epoch=23 metrics={'time_sample_batch': 0.00017133136253572452, 'time_algorithm_update': 0.005086034031237586, 'loss': 0.011332730729117106, 'time_step': 0.005331793747379281, 'init_value': -2.4770419597625732, 'ave_value': -1.9432538062121507, 'soft_opc': nan} step=4071




2022-04-20 16:25.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162451/model_4071.pt


Epoch 24/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:25.15 [info     ] FQE_20220420162451: epoch=24 step=4248 epoch=24 metrics={'time_sample_batch': 0.00017665873813090351, 'time_algorithm_update': 0.0050866671201199466, 'loss': 0.012403975884130194, 'time_step': 0.005338994796666722, 'init_value': -2.6166141033172607, 'ave_value': -2.054799160695783, 'soft_opc': nan} step=4248




2022-04-20 16:25.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162451/model_4248.pt


Epoch 25/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:25.16 [info     ] FQE_20220420162451: epoch=25 step=4425 epoch=25 metrics={'time_sample_batch': 0.00016821171604307358, 'time_algorithm_update': 0.005102001341049281, 'loss': 0.012857100327820175, 'time_step': 0.005345344543457031, 'init_value': -2.7802813053131104, 'ave_value': -2.1746602130421735, 'soft_opc': nan} step=4425




2022-04-20 16:25.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162451/model_4425.pt


Epoch 26/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:25.17 [info     ] FQE_20220420162451: epoch=26 step=4602 epoch=26 metrics={'time_sample_batch': 0.00017003420382569737, 'time_algorithm_update': 0.0050432277938066905, 'loss': 0.013509842403375117, 'time_step': 0.005287985343717586, 'init_value': -2.8853542804718018, 'ave_value': -2.2763669150526282, 'soft_opc': nan} step=4602




2022-04-20 16:25.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162451/model_4602.pt


Epoch 27/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:25.18 [info     ] FQE_20220420162451: epoch=27 step=4779 epoch=27 metrics={'time_sample_batch': 0.00016555543673240532, 'time_algorithm_update': 0.004216301912641795, 'loss': 0.014320663991384208, 'time_step': 0.004454696245786161, 'init_value': -2.9394290447235107, 'ave_value': -2.3249294160599554, 'soft_opc': nan} step=4779




2022-04-20 16:25.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162451/model_4779.pt


Epoch 28/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:25.19 [info     ] FQE_20220420162451: epoch=28 step=4956 epoch=28 metrics={'time_sample_batch': 0.00017500193105579096, 'time_algorithm_update': 0.005163639952233956, 'loss': 0.015421277215956883, 'time_step': 0.005412896474202474, 'init_value': -3.043837785720825, 'ave_value': -2.4107281302580157, 'soft_opc': nan} step=4956




2022-04-20 16:25.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162451/model_4956.pt


Epoch 29/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:25.20 [info     ] FQE_20220420162451: epoch=29 step=5133 epoch=29 metrics={'time_sample_batch': 0.00016785072068036612, 'time_algorithm_update': 0.0050104364836956825, 'loss': 0.015613712697279361, 'time_step': 0.005252408442524194, 'init_value': -3.0747897624969482, 'ave_value': -2.4170973930295343, 'soft_opc': nan} step=5133




2022-04-20 16:25.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162451/model_5133.pt


Epoch 30/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:25.21 [info     ] FQE_20220420162451: epoch=30 step=5310 epoch=30 metrics={'time_sample_batch': 0.00017262986824337373, 'time_algorithm_update': 0.005067620573744262, 'loss': 0.016510606594094323, 'time_step': 0.005317957387805658, 'init_value': -3.1777093410491943, 'ave_value': -2.504170433229378, 'soft_opc': nan} step=5310




2022-04-20 16:25.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162451/model_5310.pt


Epoch 31/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:25.22 [info     ] FQE_20220420162451: epoch=31 step=5487 epoch=31 metrics={'time_sample_batch': 0.00017141352939066914, 'time_algorithm_update': 0.005228937008960099, 'loss': 0.0175048807868734, 'time_step': 0.0054793761948407705, 'init_value': -3.327700614929199, 'ave_value': -2.619676353866318, 'soft_opc': nan} step=5487




2022-04-20 16:25.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162451/model_5487.pt


Epoch 32/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:25.23 [info     ] FQE_20220420162451: epoch=32 step=5664 epoch=32 metrics={'time_sample_batch': 0.0001678493736827441, 'time_algorithm_update': 0.005089086327849135, 'loss': 0.01811549627145826, 'time_step': 0.005335403701006356, 'init_value': -3.3978917598724365, 'ave_value': -2.715804369469081, 'soft_opc': nan} step=5664




2022-04-20 16:25.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162451/model_5664.pt


Epoch 33/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:25.24 [info     ] FQE_20220420162451: epoch=33 step=5841 epoch=33 metrics={'time_sample_batch': 0.00017390008700096, 'time_algorithm_update': 0.0050336169657734155, 'loss': 0.0191207927866118, 'time_step': 0.005282111087087858, 'init_value': -3.4468491077423096, 'ave_value': -2.728974091147517, 'soft_opc': nan} step=5841




2022-04-20 16:25.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162451/model_5841.pt


Epoch 34/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:25.25 [info     ] FQE_20220420162451: epoch=34 step=6018 epoch=34 metrics={'time_sample_batch': 0.00017627484380862134, 'time_algorithm_update': 0.00496457256166275, 'loss': 0.020236369680171295, 'time_step': 0.005213648585949914, 'init_value': -3.5578854084014893, 'ave_value': -2.8140046620310786, 'soft_opc': nan} step=6018




2022-04-20 16:25.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162451/model_6018.pt


Epoch 35/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:25.26 [info     ] FQE_20220420162451: epoch=35 step=6195 epoch=35 metrics={'time_sample_batch': 0.00017241704261909097, 'time_algorithm_update': 0.0049863615951969125, 'loss': 0.021217874642645698, 'time_step': 0.005234261690560034, 'init_value': -3.6428451538085938, 'ave_value': -2.879726498520321, 'soft_opc': nan} step=6195




2022-04-20 16:25.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162451/model_6195.pt


Epoch 36/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:25.27 [info     ] FQE_20220420162451: epoch=36 step=6372 epoch=36 metrics={'time_sample_batch': 0.00016263110489495058, 'time_algorithm_update': 0.004373066842892749, 'loss': 0.022398358485187893, 'time_step': 0.004608408879425566, 'init_value': -3.8135673999786377, 'ave_value': -3.029483406993004, 'soft_opc': nan} step=6372




2022-04-20 16:25.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162451/model_6372.pt


Epoch 37/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:25.28 [info     ] FQE_20220420162451: epoch=37 step=6549 epoch=37 metrics={'time_sample_batch': 0.00016909130549026748, 'time_algorithm_update': 0.005115879457549187, 'loss': 0.023275909782255775, 'time_step': 0.005357211592507228, 'init_value': -3.906965970993042, 'ave_value': -3.0735001127585186, 'soft_opc': nan} step=6549




2022-04-20 16:25.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162451/model_6549.pt


Epoch 38/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:25.29 [info     ] FQE_20220420162451: epoch=38 step=6726 epoch=38 metrics={'time_sample_batch': 0.0001731996482374978, 'time_algorithm_update': 0.005074166982187389, 'loss': 0.024201867117357077, 'time_step': 0.005323342684298586, 'init_value': -4.089786052703857, 'ave_value': -3.236341945160259, 'soft_opc': nan} step=6726




2022-04-20 16:25.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162451/model_6726.pt


Epoch 39/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:25.30 [info     ] FQE_20220420162451: epoch=39 step=6903 epoch=39 metrics={'time_sample_batch': 0.00017136099648340947, 'time_algorithm_update': 0.005141312119650975, 'loss': 0.02457912559989356, 'time_step': 0.005388680150953389, 'init_value': -4.091032981872559, 'ave_value': -3.245136343063535, 'soft_opc': nan} step=6903




2022-04-20 16:25.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162451/model_6903.pt


Epoch 40/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:25.31 [info     ] FQE_20220420162451: epoch=40 step=7080 epoch=40 metrics={'time_sample_batch': 0.00017085317837989936, 'time_algorithm_update': 0.005124135205974686, 'loss': 0.024802687355596046, 'time_step': 0.005368929124821377, 'init_value': -4.118875026702881, 'ave_value': -3.2321073340380693, 'soft_opc': nan} step=7080




2022-04-20 16:25.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162451/model_7080.pt


Epoch 41/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:25.32 [info     ] FQE_20220420162451: epoch=41 step=7257 epoch=41 metrics={'time_sample_batch': 0.00016889464383744923, 'time_algorithm_update': 0.004961240089545816, 'loss': 0.025680904292454152, 'time_step': 0.005206156585176112, 'init_value': -4.2261481285095215, 'ave_value': -3.3224150825280683, 'soft_opc': nan} step=7257




2022-04-20 16:25.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162451/model_7257.pt


Epoch 42/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:25.33 [info     ] FQE_20220420162451: epoch=42 step=7434 epoch=42 metrics={'time_sample_batch': 0.00016776047183968926, 'time_algorithm_update': 0.005115021420063945, 'loss': 0.02608639078930357, 'time_step': 0.005360929305944066, 'init_value': -4.355766773223877, 'ave_value': -3.4103856306511897, 'soft_opc': nan} step=7434




2022-04-20 16:25.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162451/model_7434.pt


Epoch 43/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:25.34 [info     ] FQE_20220420162451: epoch=43 step=7611 epoch=43 metrics={'time_sample_batch': 0.00016504896562651725, 'time_algorithm_update': 0.005094961931476486, 'loss': 0.026739488733343066, 'time_step': 0.00533202408397265, 'init_value': -4.438986301422119, 'ave_value': -3.5201270270164127, 'soft_opc': nan} step=7611




2022-04-20 16:25.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162451/model_7611.pt


Epoch 44/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:25.35 [info     ] FQE_20220420162451: epoch=44 step=7788 epoch=44 metrics={'time_sample_batch': 0.00016904550757111802, 'time_algorithm_update': 0.004708412676881262, 'loss': 0.027834523583651852, 'time_step': 0.004948842323432534, 'init_value': -4.418521881103516, 'ave_value': -3.4931897090977286, 'soft_opc': nan} step=7788




2022-04-20 16:25.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162451/model_7788.pt


Epoch 45/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:25.36 [info     ] FQE_20220420162451: epoch=45 step=7965 epoch=45 metrics={'time_sample_batch': 0.00016754629921778447, 'time_algorithm_update': 0.00469857959424035, 'loss': 0.02896654331598873, 'time_step': 0.004940846545548089, 'init_value': -4.444385051727295, 'ave_value': -3.509133308157101, 'soft_opc': nan} step=7965




2022-04-20 16:25.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162451/model_7965.pt


Epoch 46/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:25.37 [info     ] FQE_20220420162451: epoch=46 step=8142 epoch=46 metrics={'time_sample_batch': 0.00017147145028841698, 'time_algorithm_update': 0.005146298704847778, 'loss': 0.030032764673022565, 'time_step': 0.005392718449824274, 'init_value': -4.479239463806152, 'ave_value': -3.5392424286105433, 'soft_opc': nan} step=8142




2022-04-20 16:25.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162451/model_8142.pt


Epoch 47/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:25.38 [info     ] FQE_20220420162451: epoch=47 step=8319 epoch=47 metrics={'time_sample_batch': 0.00017474869550284693, 'time_algorithm_update': 0.005041557516755358, 'loss': 0.03087519886140517, 'time_step': 0.005288425811939994, 'init_value': -4.548138618469238, 'ave_value': -3.577943218481523, 'soft_opc': nan} step=8319




2022-04-20 16:25.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162451/model_8319.pt


Epoch 48/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:25.39 [info     ] FQE_20220420162451: epoch=48 step=8496 epoch=48 metrics={'time_sample_batch': 0.00016836796776723053, 'time_algorithm_update': 0.005053809807125458, 'loss': 0.03112063580784568, 'time_step': 0.005298455556233724, 'init_value': -4.605597972869873, 'ave_value': -3.604218908218113, 'soft_opc': nan} step=8496




2022-04-20 16:25.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162451/model_8496.pt


Epoch 49/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:25.40 [info     ] FQE_20220420162451: epoch=49 step=8673 epoch=49 metrics={'time_sample_batch': 0.0001722001760019421, 'time_algorithm_update': 0.004984920307741327, 'loss': 0.03305102734416092, 'time_step': 0.005234303447486317, 'init_value': -4.67437744140625, 'ave_value': -3.6426599857416955, 'soft_opc': nan} step=8673




2022-04-20 16:25.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162451/model_8673.pt


Epoch 50/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:25.41 [info     ] FQE_20220420162451: epoch=50 step=8850 epoch=50 metrics={'time_sample_batch': 0.00017015408661405918, 'time_algorithm_update': 0.00510608274384407, 'loss': 0.03399044635579369, 'time_step': 0.005350525096311408, 'init_value': -4.754748344421387, 'ave_value': -3.745301908207638, 'soft_opc': nan} step=8850




2022-04-20 16:25.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162451/model_8850.pt
start
[ 0.00000000e+00  7.95731469e+08  1.43210892e-01 -3.25999953e-02
 -1.38000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.28049971e-01  6.00000000e-01  4.67532035e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.87189108e-01  2.46000047e-02
 -8.00013420e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.61927906e-02  2.08952959e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.53789108e-01  1.32000047e-02
  8.79998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -9.71586383e-02 -6.00000000e-01 -5.33093061e-02]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.39489108e-01 -4.75999953e-02
 -9.30001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  1.41298638e-01  5.43892365e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.9573146

2022-04-20 16:25.41 [info     ] Directory is created at d3rlpy_logs/FQE_20220420162541
2022-04-20 16:25.41 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 16:25.41 [debug    ] Building models...
2022-04-20 16:25.41 [debug    ] Models have been built.
2022-04-20 16:25.41 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420162541/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 16:25.43 [info     ] FQE_20220420162541: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.0001682604468146036, 'time_algorithm_update': 0.005037635564804077, 'loss': 0.03179548678074985, 'time_step': 0.005281288263409636, 'init_value': -1.3593670129776, 'ave_value': -1.350597141723375, 'soft_opc': nan} step=344




2022-04-20 16:25.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162541/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:25.45 [info     ] FQE_20220420162541: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.0001692362996034844, 'time_algorithm_update': 0.004577245823172636, 'loss': 0.026189648159129848, 'time_step': 0.004822561907213788, 'init_value': -2.1054444313049316, 'ave_value': -2.11136399130295, 'soft_opc': nan} step=688




2022-04-20 16:25.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162541/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:25.47 [info     ] FQE_20220420162541: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.000169535708981891, 'time_algorithm_update': 0.005060967317847318, 'loss': 0.030935963474993788, 'time_step': 0.005304935366608376, 'init_value': -2.969271659851074, 'ave_value': -3.0290580643861142, 'soft_opc': nan} step=1032




2022-04-20 16:25.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162541/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:25.49 [info     ] FQE_20220420162541: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00017075691112252168, 'time_algorithm_update': 0.005099958458612132, 'loss': 0.035280099341667495, 'time_step': 0.0053467334702957505, 'init_value': -3.5203638076782227, 'ave_value': -3.636512992135039, 'soft_opc': nan} step=1376




2022-04-20 16:25.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162541/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:25.51 [info     ] FQE_20220420162541: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00017083245654438817, 'time_algorithm_update': 0.005055018635683282, 'loss': 0.0429938069198194, 'time_step': 0.005301762458889983, 'init_value': -4.2172698974609375, 'ave_value': -4.37925986873137, 'soft_opc': nan} step=1720




2022-04-20 16:25.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162541/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:25.53 [info     ] FQE_20220420162541: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.0001690387725830078, 'time_algorithm_update': 0.0046671968559886135, 'loss': 0.051005081116590044, 'time_step': 0.004913109679554784, 'init_value': -4.656973838806152, 'ave_value': -4.842696841128237, 'soft_opc': nan} step=2064




2022-04-20 16:25.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162541/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:25.55 [info     ] FQE_20220420162541: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00017106255819631177, 'time_algorithm_update': 0.0051228667414465615, 'loss': 0.060148386520246955, 'time_step': 0.0053690963013227596, 'init_value': -5.3826775550842285, 'ave_value': -5.557005771350216, 'soft_opc': nan} step=2408




2022-04-20 16:25.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162541/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:25.57 [info     ] FQE_20220420162541: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.0001735063486321028, 'time_algorithm_update': 0.0051134311875631645, 'loss': 0.07413162816814038, 'time_step': 0.005364894866943359, 'init_value': -5.949185371398926, 'ave_value': -6.124169536136292, 'soft_opc': nan} step=2752




2022-04-20 16:25.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162541/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:25.59 [info     ] FQE_20220420162541: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00017239395962205043, 'time_algorithm_update': 0.005147341378899508, 'loss': 0.0869805920966576, 'time_step': 0.005397612965384195, 'init_value': -6.4009270668029785, 'ave_value': -6.5182338052370525, 'soft_opc': nan} step=3096




2022-04-20 16:25.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162541/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:26.01 [info     ] FQE_20220420162541: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.0001700097738310348, 'time_algorithm_update': 0.005109449458676715, 'loss': 0.10304172384704269, 'time_step': 0.005354400984076566, 'init_value': -7.079520225524902, 'ave_value': -7.128234644704037, 'soft_opc': nan} step=3440




2022-04-20 16:26.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162541/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:26.02 [info     ] FQE_20220420162541: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00016907134721445483, 'time_algorithm_update': 0.004600124303684678, 'loss': 0.12125340413821992, 'time_step': 0.004845048799071201, 'init_value': -7.52389669418335, 'ave_value': -7.403814212544947, 'soft_opc': nan} step=3784




2022-04-20 16:26.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162541/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:26.04 [info     ] FQE_20220420162541: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00017186791397804437, 'time_algorithm_update': 0.005102457695229109, 'loss': 0.1431007168992141, 'time_step': 0.0053502549958783525, 'init_value': -8.09786605834961, 'ave_value': -7.968139432886246, 'soft_opc': nan} step=4128




2022-04-20 16:26.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162541/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:26.06 [info     ] FQE_20220420162541: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00017211118409799975, 'time_algorithm_update': 0.005055938349213711, 'loss': 0.16166416588131077, 'time_step': 0.005306237658788991, 'init_value': -8.472831726074219, 'ave_value': -8.207776336798126, 'soft_opc': nan} step=4472




2022-04-20 16:26.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162541/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:26.08 [info     ] FQE_20220420162541: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00017074928727260855, 'time_algorithm_update': 0.00508697088374648, 'loss': 0.18181820880881575, 'time_step': 0.005333926095518955, 'init_value': -8.877485275268555, 'ave_value': -8.475787401524885, 'soft_opc': nan} step=4816




2022-04-20 16:26.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162541/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:26.10 [info     ] FQE_20220420162541: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.0001672048901402673, 'time_algorithm_update': 0.004709588233814683, 'loss': 0.1991196030339356, 'time_step': 0.004952466765115428, 'init_value': -9.365680694580078, 'ave_value': -9.043143134005367, 'soft_opc': nan} step=5160




2022-04-20 16:26.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162541/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:26.12 [info     ] FQE_20220420162541: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00017217148182003996, 'time_algorithm_update': 0.0050789755444194, 'loss': 0.22021083737355332, 'time_step': 0.0053288035614545955, 'init_value': -10.078873634338379, 'ave_value': -9.867286241045306, 'soft_opc': nan} step=5504




2022-04-20 16:26.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162541/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:26.14 [info     ] FQE_20220420162541: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00016804074132165244, 'time_algorithm_update': 0.005109162524689076, 'loss': 0.24709462007334412, 'time_step': 0.005351588476535885, 'init_value': -10.276040077209473, 'ave_value': -10.220185219211219, 'soft_opc': nan} step=5848




2022-04-20 16:26.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162541/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:26.16 [info     ] FQE_20220420162541: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00017104869665101517, 'time_algorithm_update': 0.005120401465615561, 'loss': 0.2637894231891043, 'time_step': 0.0053665866685468095, 'init_value': -10.522768020629883, 'ave_value': -10.53227894695996, 'soft_opc': nan} step=6192




2022-04-20 16:26.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162541/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:26.18 [info     ] FQE_20220420162541: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00017319792924925338, 'time_algorithm_update': 0.00512768501459166, 'loss': 0.2808881428330963, 'time_step': 0.005380279102990794, 'init_value': -10.725137710571289, 'ave_value': -10.861046553092699, 'soft_opc': nan} step=6536




2022-04-20 16:26.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162541/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:26.20 [info     ] FQE_20220420162541: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00016913649647734886, 'time_algorithm_update': 0.0047215528266374455, 'loss': 0.2924278891905276, 'time_step': 0.004966433658156284, 'init_value': -10.978858947753906, 'ave_value': -11.282508493015577, 'soft_opc': nan} step=6880




2022-04-20 16:26.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162541/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:26.22 [info     ] FQE_20220420162541: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.0001726531705190969, 'time_algorithm_update': 0.005031247471654138, 'loss': 0.30859285839941614, 'time_step': 0.005277630894683128, 'init_value': -11.306999206542969, 'ave_value': -11.644088492722057, 'soft_opc': nan} step=7224




2022-04-20 16:26.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162541/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:26.24 [info     ] FQE_20220420162541: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00017168355542559956, 'time_algorithm_update': 0.0051758608152700026, 'loss': 0.31712331401920596, 'time_step': 0.005423696235168812, 'init_value': -11.298260688781738, 'ave_value': -11.724734787700848, 'soft_opc': nan} step=7568




2022-04-20 16:26.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162541/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:26.26 [info     ] FQE_20220420162541: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00017126285752584768, 'time_algorithm_update': 0.005080253578895746, 'loss': 0.3227692760505475, 'time_step': 0.005326745815055315, 'init_value': -11.45797348022461, 'ave_value': -12.010632118649008, 'soft_opc': nan} step=7912




2022-04-20 16:26.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162541/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:26.28 [info     ] FQE_20220420162541: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.000170943348906761, 'time_algorithm_update': 0.004769123570863591, 'loss': 0.3340243563315896, 'time_step': 0.005015985217205313, 'init_value': -11.725149154663086, 'ave_value': -12.39344699500372, 'soft_opc': nan} step=8256




2022-04-20 16:26.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162541/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:26.30 [info     ] FQE_20220420162541: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.0001668992430664772, 'time_algorithm_update': 0.005023353321607723, 'loss': 0.34451306103403834, 'time_step': 0.005265195009320281, 'init_value': -11.652541160583496, 'ave_value': -12.373315344922462, 'soft_opc': nan} step=8600




2022-04-20 16:26.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162541/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:26.32 [info     ] FQE_20220420162541: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00017112840053647063, 'time_algorithm_update': 0.005019072876420132, 'loss': 0.3552072733683988, 'time_step': 0.005267039980999259, 'init_value': -11.720260620117188, 'ave_value': -12.574728157469869, 'soft_opc': nan} step=8944




2022-04-20 16:26.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162541/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:26.34 [info     ] FQE_20220420162541: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00017128503599832224, 'time_algorithm_update': 0.005054789920185887, 'loss': 0.3670666293392694, 'time_step': 0.005302031372868737, 'init_value': -11.606695175170898, 'ave_value': -12.627738977743833, 'soft_opc': nan} step=9288




2022-04-20 16:26.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162541/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:26.36 [info     ] FQE_20220420162541: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00017427912978238837, 'time_algorithm_update': 0.005092975705168968, 'loss': 0.38234425054559873, 'time_step': 0.005343326302461846, 'init_value': -11.573722839355469, 'ave_value': -12.805903034637943, 'soft_opc': nan} step=9632




2022-04-20 16:26.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162541/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:26.38 [info     ] FQE_20220420162541: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.0001699175945548124, 'time_algorithm_update': 0.0046027108680370245, 'loss': 0.39497302148794367, 'time_step': 0.00485080688498741, 'init_value': -11.502562522888184, 'ave_value': -12.757920963229477, 'soft_opc': nan} step=9976




2022-04-20 16:26.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162541/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:26.40 [info     ] FQE_20220420162541: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00017113602438638376, 'time_algorithm_update': 0.005079329013824463, 'loss': 0.4076143348762809, 'time_step': 0.005323857069015503, 'init_value': -11.234720230102539, 'ave_value': -12.669800704779725, 'soft_opc': nan} step=10320




2022-04-20 16:26.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162541/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:26.41 [info     ] FQE_20220420162541: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00017596954523130904, 'time_algorithm_update': 0.0050508650236351545, 'loss': 0.4248800605730435, 'time_step': 0.005303481290506762, 'init_value': -11.445674896240234, 'ave_value': -12.71707803753062, 'soft_opc': nan} step=10664




2022-04-20 16:26.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162541/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:26.43 [info     ] FQE_20220420162541: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00017325545466223427, 'time_algorithm_update': 0.00514241567877836, 'loss': 0.44371041571071673, 'time_step': 0.005394539167714673, 'init_value': -11.252103805541992, 'ave_value': -12.462603385421355, 'soft_opc': nan} step=11008




2022-04-20 16:26.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162541/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:26.45 [info     ] FQE_20220420162541: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00016795965128166732, 'time_algorithm_update': 0.004904142645902412, 'loss': 0.46168432877464005, 'time_step': 0.0051475465297698975, 'init_value': -11.53432846069336, 'ave_value': -12.71442995211571, 'soft_opc': nan} step=11352




2022-04-20 16:26.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162541/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:26.47 [info     ] FQE_20220420162541: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.0001707617626633755, 'time_algorithm_update': 0.004942792792652928, 'loss': 0.4889212554255717, 'time_step': 0.005192936852920887, 'init_value': -11.85003662109375, 'ave_value': -13.049760694141073, 'soft_opc': nan} step=11696




2022-04-20 16:26.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162541/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:26.49 [info     ] FQE_20220420162541: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00017527646796647892, 'time_algorithm_update': 0.005051194235335949, 'loss': 0.507123815805413, 'time_step': 0.0053046387295390286, 'init_value': -11.575246810913086, 'ave_value': -12.879529402464176, 'soft_opc': nan} step=12040




2022-04-20 16:26.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162541/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:26.51 [info     ] FQE_20220420162541: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00017347931861877441, 'time_algorithm_update': 0.005029489827710528, 'loss': 0.5290468351828844, 'time_step': 0.005280550829199857, 'init_value': -11.500287055969238, 'ave_value': -12.727024701501135, 'soft_opc': nan} step=12384




2022-04-20 16:26.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162541/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:26.53 [info     ] FQE_20220420162541: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00016956135284068973, 'time_algorithm_update': 0.005062047132225924, 'loss': 0.5405407321725993, 'time_step': 0.005308729271556056, 'init_value': -11.956201553344727, 'ave_value': -13.216163052930202, 'soft_opc': nan} step=12728




2022-04-20 16:26.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162541/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:26.55 [info     ] FQE_20220420162541: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00017257346663364145, 'time_algorithm_update': 0.004712402820587158, 'loss': 0.5714056178450931, 'time_step': 0.004964363436366237, 'init_value': -12.094402313232422, 'ave_value': -13.085381516136296, 'soft_opc': nan} step=13072




2022-04-20 16:26.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162541/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:26.57 [info     ] FQE_20220420162541: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00016990511916404547, 'time_algorithm_update': 0.005096545053082843, 'loss': 0.5816451788143536, 'time_step': 0.00534302550692891, 'init_value': -11.852033615112305, 'ave_value': -12.738145218877255, 'soft_opc': nan} step=13416




2022-04-20 16:26.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162541/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:26.59 [info     ] FQE_20220420162541: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.0001755356788635254, 'time_algorithm_update': 0.005101947590362194, 'loss': 0.6071389234126654, 'time_step': 0.005356818437576294, 'init_value': -12.164536476135254, 'ave_value': -13.029519403989317, 'soft_opc': nan} step=13760




2022-04-20 16:26.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162541/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:27.01 [info     ] FQE_20220420162541: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00017066265261450478, 'time_algorithm_update': 0.005054579224697379, 'loss': 0.6382626335158251, 'time_step': 0.0052984135095463245, 'init_value': -12.800546646118164, 'ave_value': -13.497061891001355, 'soft_opc': nan} step=14104




2022-04-20 16:27.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162541/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:27.03 [info     ] FQE_20220420162541: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00017528547797092173, 'time_algorithm_update': 0.005035712275394174, 'loss': 0.6626989513332414, 'time_step': 0.005288345176120137, 'init_value': -12.96012020111084, 'ave_value': -13.317218222663326, 'soft_opc': nan} step=14448




2022-04-20 16:27.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162541/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:27.05 [info     ] FQE_20220420162541: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.0001673545948294706, 'time_algorithm_update': 0.0048354309658671536, 'loss': 0.6735361812982795, 'time_step': 0.00507942327233248, 'init_value': -12.69970417022705, 'ave_value': -12.928316683071321, 'soft_opc': nan} step=14792




2022-04-20 16:27.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162541/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:27.07 [info     ] FQE_20220420162541: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.0001725651497064635, 'time_algorithm_update': 0.005106149024741594, 'loss': 0.6920479758274417, 'time_step': 0.00535701873690583, 'init_value': -13.096364974975586, 'ave_value': -13.125770060945856, 'soft_opc': nan} step=15136




2022-04-20 16:27.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162541/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:27.09 [info     ] FQE_20220420162541: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00017622667689656102, 'time_algorithm_update': 0.005098241013149882, 'loss': 0.7101423516869545, 'time_step': 0.005351018767024196, 'init_value': -13.330221176147461, 'ave_value': -13.269285264584395, 'soft_opc': nan} step=15480




2022-04-20 16:27.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162541/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:27.11 [info     ] FQE_20220420162541: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00017028215319611306, 'time_algorithm_update': 0.005154454430868459, 'loss': 0.7177308903314, 'time_step': 0.005400933498559996, 'init_value': -13.611637115478516, 'ave_value': -13.324318101622064, 'soft_opc': nan} step=15824




2022-04-20 16:27.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162541/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:27.13 [info     ] FQE_20220420162541: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00016805183055788972, 'time_algorithm_update': 0.00465132192123768, 'loss': 0.7484360604308719, 'time_step': 0.004897689403489579, 'init_value': -13.457511901855469, 'ave_value': -12.965839374746272, 'soft_opc': nan} step=16168




2022-04-20 16:27.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162541/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:27.15 [info     ] FQE_20220420162541: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00017095651737479277, 'time_algorithm_update': 0.005070256632427836, 'loss': 0.760748925055702, 'time_step': 0.00532009296639021, 'init_value': -14.343931198120117, 'ave_value': -13.648364248649461, 'soft_opc': nan} step=16512




2022-04-20 16:27.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162541/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:27.17 [info     ] FQE_20220420162541: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00017123721366704895, 'time_algorithm_update': 0.005132314770720726, 'loss': 0.7849873874639702, 'time_step': 0.005378929681556169, 'init_value': -14.495317459106445, 'ave_value': -13.646201033627632, 'soft_opc': nan} step=16856




2022-04-20 16:27.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162541/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:27.19 [info     ] FQE_20220420162541: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00017228653264600178, 'time_algorithm_update': 0.005124867655510126, 'loss': 0.8087534982667759, 'time_step': 0.005375879448513652, 'init_value': -14.61892032623291, 'ave_value': -13.58244631637794, 'soft_opc': nan} step=17200




2022-04-20 16:27.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162541/model_17200.pt
search iteration:  12
using hyper params:  [0.004886075091088962, 0.001761875027719832, 1.6238437914633317e-05, 1]
2022-04-20 16:27.19 [debug    ] RoundIterator is selected.
2022-04-20 16:27.19 [info     ] Directory is created at d3rlpy_logs/TD3PlusBC_20220420162719
2022-04-20 16:27.19 [debug    ] Fitting scaler...              scaler=standard
2022-04-20 16:27.19 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 16:27.19 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 16:27.19 [info     ] Parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162719/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0048860750910

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:27.23 [info     ] TD3PlusBC_20220420162719: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.0003321045323422081, 'time_algorithm_update': 0.008499132959466232, 'critic_loss': 0.6327148552177942, 'actor_loss': -0.049079553992079014, 'time_step': 0.00890995745073285, 'td_error': 0.8149977189413349, 'init_value': -0.5012974739074707, 'ave_value': 0.17015636428896974} step=342
2022-04-20 16:27.23 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162719/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:27.26 [info     ] TD3PlusBC_20220420162719: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.00032639224626864605, 'time_algorithm_update': 0.009004993048327707, 'critic_loss': 0.1883844830836469, 'actor_loss': -0.01643585801473138, 'time_step': 0.00940772873616358, 'td_error': 0.8056073835953744, 'init_value': -0.7388749122619629, 'ave_value': 0.2337797404405753} step=684
2022-04-20 16:27.26 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162719/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:27.30 [info     ] TD3PlusBC_20220420162719: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.0003246940367403086, 'time_algorithm_update': 0.00897035961262664, 'critic_loss': 0.21854129021889285, 'actor_loss': -0.025446377258900314, 'time_step': 0.009372663776776945, 'td_error': 0.8053857422323331, 'init_value': -1.060887336730957, 'ave_value': 0.2518104197016733} step=1026
2022-04-20 16:27.30 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162719/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:27.34 [info     ] TD3PlusBC_20220420162719: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.0003293466846845303, 'time_algorithm_update': 0.008768872210853979, 'critic_loss': 0.260457860992143, 'actor_loss': -0.02768190865192497, 'time_step': 0.00918045657420019, 'td_error': 0.804751977082669, 'init_value': -1.3033396005630493, 'ave_value': 0.3551089197272426} step=1368
2022-04-20 16:27.34 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162719/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:27.37 [info     ] TD3PlusBC_20220420162719: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.00033311397708647435, 'time_algorithm_update': 0.009036317903395982, 'critic_loss': 0.3119245573704006, 'actor_loss': 0.004514867777538578, 'time_step': 0.009451562201070506, 'td_error': 0.8052745108598754, 'init_value': -1.5615994930267334, 'ave_value': 0.4212470674478063} step=1710
2022-04-20 16:27.37 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162719/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:27.41 [info     ] TD3PlusBC_20220420162719: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.00032932367938303807, 'time_algorithm_update': 0.008434043990241157, 'critic_loss': 0.3692706437748775, 'actor_loss': 0.013664087030099846, 'time_step': 0.008843995674311766, 'td_error': 0.8132825945092724, 'init_value': -1.919219970703125, 'ave_value': 0.4783810607018369} step=2052
2022-04-20 16:27.41 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162719/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:27.45 [info     ] TD3PlusBC_20220420162719: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.0003298186419302957, 'time_algorithm_update': 0.008903338895206564, 'critic_loss': 0.44642921007777514, 'actor_loss': 0.027413581590554868, 'time_step': 0.009311651625828437, 'td_error': 0.8208722592780405, 'init_value': -2.2335965633392334, 'ave_value': 0.5048106710172703} step=2394
2022-04-20 16:27.45 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162719/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:27.48 [info     ] TD3PlusBC_20220420162719: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.00033001941547059177, 'time_algorithm_update': 0.008872410010176095, 'critic_loss': 0.5046517804526446, 'actor_loss': 0.016992769067921833, 'time_step': 0.009282307318079542, 'td_error': 0.832837344453848, 'init_value': -2.435028076171875, 'ave_value': 0.6424838659207563} step=2736
2022-04-20 16:27.48 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162719/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:27.51 [info     ] TD3PlusBC_20220420162719: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.0003233172043025145, 'time_algorithm_update': 0.006737044680188274, 'critic_loss': 0.5582059233160744, 'actor_loss': 0.029019661204159608, 'time_step': 0.0071396939238609625, 'td_error': 0.8467796206488886, 'init_value': -2.780611515045166, 'ave_value': 0.6685705680425305} step=3078
2022-04-20 16:27.51 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162719/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:27.54 [info     ] TD3PlusBC_20220420162719: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.0003264354683502376, 'time_algorithm_update': 0.006873206785547803, 'critic_loss': 0.6297561996643655, 'actor_loss': 0.020866166495266018, 'time_step': 0.007279955852798551, 'td_error': 0.8667863500714866, 'init_value': -3.095144748687744, 'ave_value': 0.7009413186898916} step=3420
2022-04-20 16:27.54 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162719/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:27.57 [info     ] TD3PlusBC_20220420162719: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.00032293866252341465, 'time_algorithm_update': 0.006871305013957776, 'critic_loss': 0.6963498977323374, 'actor_loss': 0.0395038012436956, 'time_step': 0.0072749713708085625, 'td_error': 0.8887128467710615, 'init_value': -3.4266357421875, 'ave_value': 0.7702435244657183} step=3762
2022-04-20 16:27.57 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162719/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:28.00 [info     ] TD3PlusBC_20220420162719: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.000327955212509423, 'time_algorithm_update': 0.006908889402422989, 'critic_loss': 0.7566659612178105, 'actor_loss': 0.04992753319572984, 'time_step': 0.0073169999652438695, 'td_error': 0.9071616086224491, 'init_value': -3.6738579273223877, 'ave_value': 0.8413682900458105} step=4104
2022-04-20 16:28.00 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162719/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:28.03 [info     ] TD3PlusBC_20220420162719: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.00032279435654132684, 'time_algorithm_update': 0.0068888943097744765, 'critic_loss': 0.826478372583961, 'actor_loss': 0.06345742934367113, 'time_step': 0.007290041934677035, 'td_error': 0.937999976725121, 'init_value': -4.018707752227783, 'ave_value': 0.851288295841843} step=4446
2022-04-20 16:28.03 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162719/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:28.06 [info     ] TD3PlusBC_20220420162719: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.0003274233020537081, 'time_algorithm_update': 0.007021130177012661, 'critic_loss': 0.9265763021526281, 'actor_loss': 0.04493567371490406, 'time_step': 0.007428425097326089, 'td_error': 0.9665350229187263, 'init_value': -4.335544586181641, 'ave_value': 0.9056036487062302} step=4788
2022-04-20 16:28.06 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162719/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:28.09 [info     ] TD3PlusBC_20220420162719: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.000328128100835789, 'time_algorithm_update': 0.006883698597288968, 'critic_loss': 0.9808706023365433, 'actor_loss': 0.05045262974692367, 'time_step': 0.007293845477857087, 'td_error': 0.9935041088141536, 'init_value': -4.598555088043213, 'ave_value': 0.979056027984448} step=5130
2022-04-20 16:28.09 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162719/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:28.11 [info     ] TD3PlusBC_20220420162719: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.00029731493944313095, 'time_algorithm_update': 0.006334558565017075, 'critic_loss': 1.051105247908517, 'actor_loss': 0.05353423702525117, 'time_step': 0.00670398675907425, 'td_error': 1.024453785075663, 'init_value': -4.914766788482666, 'ave_value': 1.0692936545339842} step=5472
2022-04-20 16:28.11 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162719/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:28.14 [info     ] TD3PlusBC_20220420162719: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.0002909159799765425, 'time_algorithm_update': 0.006223725993730869, 'critic_loss': 1.1351959046128897, 'actor_loss': 0.06706131155989324, 'time_step': 0.006584826965778195, 'td_error': 1.060015570600839, 'init_value': -5.2322235107421875, 'ave_value': 1.1243237413138756} step=5814
2022-04-20 16:28.14 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162719/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:28.17 [info     ] TD3PlusBC_20220420162719: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.0003220142676816349, 'time_algorithm_update': 0.006909815888655813, 'critic_loss': 1.2216031347450458, 'actor_loss': 0.05039269370380898, 'time_step': 0.007313415320993167, 'td_error': 1.0989243007001492, 'init_value': -5.5113067626953125, 'ave_value': 1.1645942651430095} step=6156
2022-04-20 16:28.17 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162719/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:28.20 [info     ] TD3PlusBC_20220420162719: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.0003250147167005037, 'time_algorithm_update': 0.006964555260730766, 'critic_loss': 1.3564885988482955, 'actor_loss': 0.05107508689077974, 'time_step': 0.007369189931635271, 'td_error': 1.1281483588006427, 'init_value': -5.86895227432251, 'ave_value': 1.1935977577612324} step=6498
2022-04-20 16:28.20 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162719/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:28.23 [info     ] TD3PlusBC_20220420162719: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.00031549400753445097, 'time_algorithm_update': 0.00669213833167539, 'critic_loss': 1.4545164381900029, 'actor_loss': 0.05715067532153158, 'time_step': 0.007085446028681526, 'td_error': 1.1730829564999714, 'init_value': -6.071278095245361, 'ave_value': 1.3286109916909035} step=6840
2022-04-20 16:28.23 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162719/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:28.25 [info     ] TD3PlusBC_20220420162719: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.0002950255633794773, 'time_algorithm_update': 0.0063772605873687925, 'critic_loss': 1.5808050855558518, 'actor_loss': 0.07302193820737955, 'time_step': 0.00674091096509967, 'td_error': 1.200001762605141, 'init_value': -6.4415788650512695, 'ave_value': 1.319616921515674} step=7182
2022-04-20 16:28.25 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162719/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:28.28 [info     ] TD3PlusBC_20220420162719: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.00029780223355655783, 'time_algorithm_update': 0.006415726845724541, 'critic_loss': 1.7109205938047833, 'actor_loss': 0.06606776563570513, 'time_step': 0.006782950713620548, 'td_error': 1.2427771826155127, 'init_value': -6.8531174659729, 'ave_value': 1.331909132946853} step=7524
2022-04-20 16:28.28 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162719/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:28.31 [info     ] TD3PlusBC_20220420162719: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.0003289792969910025, 'time_algorithm_update': 0.006928422297650611, 'critic_loss': 1.8682075933231945, 'actor_loss': 0.06642727845767785, 'time_step': 0.007337805820487396, 'td_error': 1.2925764487400024, 'init_value': -7.081923007965088, 'ave_value': 1.4009905278071055} step=7866
2022-04-20 16:28.31 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162719/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:28.34 [info     ] TD3PlusBC_20220420162719: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.0003292407208715963, 'time_algorithm_update': 0.006514980081926312, 'critic_loss': 1.9890510532242514, 'actor_loss': 0.06344450642055238, 'time_step': 0.006925180641531247, 'td_error': 1.3251463219414048, 'init_value': -7.520010471343994, 'ave_value': 1.3931405836837953} step=8208
2022-04-20 16:28.34 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162719/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:28.37 [info     ] TD3PlusBC_20220420162719: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.000325274746320401, 'time_algorithm_update': 0.0069530777066771745, 'critic_loss': 2.1684268130893596, 'actor_loss': 0.07042130749476583, 'time_step': 0.007358278447424459, 'td_error': 1.3849052407611653, 'init_value': -7.720517635345459, 'ave_value': 1.5190957482316036} step=8550
2022-04-20 16:28.37 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162719/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:28.40 [info     ] TD3PlusBC_20220420162719: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.0003288405680517007, 'time_algorithm_update': 0.0069679272802252515, 'critic_loss': 2.3784192602711114, 'actor_loss': 0.06748261276567191, 'time_step': 0.0073771232749983585, 'td_error': 1.4139720735520933, 'init_value': -8.169140815734863, 'ave_value': 1.4814903679994347} step=8892
2022-04-20 16:28.40 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162719/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:28.43 [info     ] TD3PlusBC_20220420162719: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.00033078556172331874, 'time_algorithm_update': 0.006987645612125508, 'critic_loss': 2.563641445678577, 'actor_loss': 0.06459586411627413, 'time_step': 0.00740103066316125, 'td_error': 1.4591942546541372, 'init_value': -8.384134292602539, 'ave_value': 1.568903488755092} step=9234
2022-04-20 16:28.43 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162719/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:28.45 [info     ] TD3PlusBC_20220420162719: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.00032785203721788194, 'time_algorithm_update': 0.006959965354517887, 'critic_loss': 2.7584399655897016, 'actor_loss': 0.07221785163757397, 'time_step': 0.007368571577016373, 'td_error': 1.4985192580614322, 'init_value': -8.886207580566406, 'ave_value': 1.5132409770181823} step=9576
2022-04-20 16:28.46 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162719/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:28.48 [info     ] TD3PlusBC_20220420162719: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.0003238477204975329, 'time_algorithm_update': 0.006875574937340809, 'critic_loss': 2.940688380895302, 'actor_loss': 0.06136447526732383, 'time_step': 0.0072790997767309, 'td_error': 1.5267633070465927, 'init_value': -9.330053329467773, 'ave_value': 1.6141728460066742} step=9918
2022-04-20 16:28.48 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162719/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:28.51 [info     ] TD3PlusBC_20220420162719: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.000330950781615854, 'time_algorithm_update': 0.006857092617548, 'critic_loss': 3.226849815127445, 'actor_loss': 0.06496772514274943, 'time_step': 0.007268591930991725, 'td_error': 1.582550116224936, 'init_value': -9.562370300292969, 'ave_value': 1.6549417892907252} step=10260
2022-04-20 16:28.51 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162719/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:28.54 [info     ] TD3PlusBC_20220420162719: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.0003284982770507099, 'time_algorithm_update': 0.006920568427147224, 'critic_loss': 3.385465650356304, 'actor_loss': 0.08293152720346088, 'time_step': 0.007328962024889495, 'td_error': 1.6348551188170133, 'init_value': -9.929067611694336, 'ave_value': 1.6631911174014948} step=10602
2022-04-20 16:28.54 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162719/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:28.57 [info     ] TD3PlusBC_20220420162719: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.000325372344569156, 'time_algorithm_update': 0.006946526075664319, 'critic_loss': 3.614175536827734, 'actor_loss': 0.07360019064263294, 'time_step': 0.007351664771810609, 'td_error': 1.680958054387041, 'init_value': -10.236000061035156, 'ave_value': 1.769758556031513} step=10944
2022-04-20 16:28.57 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162719/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:29.00 [info     ] TD3PlusBC_20220420162719: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.0003235402860139546, 'time_algorithm_update': 0.0068985321368390355, 'critic_loss': 3.8614624888576263, 'actor_loss': 0.06646914100437834, 'time_step': 0.007301910578855994, 'td_error': 1.7177368666283024, 'init_value': -10.448112487792969, 'ave_value': 1.8209133321249875} step=11286
2022-04-20 16:29.00 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162719/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:29.03 [info     ] TD3PlusBC_20220420162719: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.0003219933537711874, 'time_algorithm_update': 0.006963428000957645, 'critic_loss': 4.0786495877985365, 'actor_loss': 0.06906941103917813, 'time_step': 0.007365508386266162, 'td_error': 1.767950349881499, 'init_value': -10.895105361938477, 'ave_value': 1.7961101090998666} step=11628
2022-04-20 16:29.03 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162719/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:29.06 [info     ] TD3PlusBC_20220420162719: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.0003236476440875851, 'time_algorithm_update': 0.006960073409721866, 'critic_loss': 4.308629858389235, 'actor_loss': 0.07871504086587164, 'time_step': 0.007363803205434342, 'td_error': 1.8092370609905737, 'init_value': -11.168636322021484, 'ave_value': 1.95419532592207} step=11970
2022-04-20 16:29.06 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162719/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:29.09 [info     ] TD3PlusBC_20220420162719: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.0003249826487044842, 'time_algorithm_update': 0.00690453442913747, 'critic_loss': 4.565482355174963, 'actor_loss': 0.09174804258764836, 'time_step': 0.007308160352428057, 'td_error': 1.8374523276682497, 'init_value': -11.615182876586914, 'ave_value': 1.940526045716141} step=12312
2022-04-20 16:29.09 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162719/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:29.12 [info     ] TD3PlusBC_20220420162719: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.00032560867175721286, 'time_algorithm_update': 0.006999270957812928, 'critic_loss': 4.774010875245981, 'actor_loss': 0.08729944131963434, 'time_step': 0.0074039781302736515, 'td_error': 1.9141470079944247, 'init_value': -11.84877872467041, 'ave_value': 1.93961452074458} step=12654
2022-04-20 16:29.12 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162719/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:29.15 [info     ] TD3PlusBC_20220420162719: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.0003244772292020028, 'time_algorithm_update': 0.006948748527214541, 'critic_loss': 5.073444930607812, 'actor_loss': 0.08598557907586907, 'time_step': 0.007353560269227502, 'td_error': 1.9508950073543887, 'init_value': -12.212575912475586, 'ave_value': 1.9908959051269435} step=12996
2022-04-20 16:29.15 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162719/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:29.18 [info     ] TD3PlusBC_20220420162719: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.0003284264726248401, 'time_algorithm_update': 0.006954224486100047, 'critic_loss': 5.301143042525353, 'actor_loss': 0.07333050393744518, 'time_step': 0.007360500201844333, 'td_error': 1.991064969249452, 'init_value': -12.740742683410645, 'ave_value': 1.9261372858483914} step=13338
2022-04-20 16:29.18 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162719/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:29.21 [info     ] TD3PlusBC_20220420162719: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.00032761152724773566, 'time_algorithm_update': 0.006934582838538097, 'critic_loss': 5.560332632029962, 'actor_loss': 0.08800015978689431, 'time_step': 0.00734266063623261, 'td_error': 2.0446794250264766, 'init_value': -13.140095710754395, 'ave_value': 1.9333056254786447} step=13680
2022-04-20 16:29.21 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162719/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:29.24 [info     ] TD3PlusBC_20220420162719: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.0003293348334686101, 'time_algorithm_update': 0.0069970331693950455, 'critic_loss': 5.8400463667529365, 'actor_loss': 0.0869829752945412, 'time_step': 0.00740610646922686, 'td_error': 2.1186647597018116, 'init_value': -13.236821174621582, 'ave_value': 2.119744384484869} step=14022
2022-04-20 16:29.24 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162719/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:29.27 [info     ] TD3PlusBC_20220420162719: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.0003265414321631716, 'time_algorithm_update': 0.006893217215063976, 'critic_loss': 6.118731780010357, 'actor_loss': 0.08563976880838299, 'time_step': 0.007298205331055045, 'td_error': 2.162302880263246, 'init_value': -13.849027633666992, 'ave_value': 2.0316816302312555} step=14364
2022-04-20 16:29.27 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162719/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:29.29 [info     ] TD3PlusBC_20220420162719: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.0003255898492378101, 'time_algorithm_update': 0.006979300264726605, 'critic_loss': 6.3680368951189585, 'actor_loss': 0.0910747095245367, 'time_step': 0.007385151428088807, 'td_error': 2.212248775902564, 'init_value': -14.029826164245605, 'ave_value': 2.108726810659492} step=14706
2022-04-20 16:29.29 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162719/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:29.32 [info     ] TD3PlusBC_20220420162719: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.00032515832555224324, 'time_algorithm_update': 0.006914331201921429, 'critic_loss': 6.649655783211279, 'actor_loss': 0.07787048483365461, 'time_step': 0.007319913970099555, 'td_error': 2.25055331287425, 'init_value': -14.613850593566895, 'ave_value': 2.0830319856453756} step=15048
2022-04-20 16:29.32 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162719/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:29.35 [info     ] TD3PlusBC_20220420162719: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.00032930764538502833, 'time_algorithm_update': 0.006974881852579396, 'critic_loss': 7.0441693144235, 'actor_loss': 0.07605504174852928, 'time_step': 0.007384719207272892, 'td_error': 2.3047852755222804, 'init_value': -14.821779251098633, 'ave_value': 2.1728092945820174} step=15390
2022-04-20 16:29.35 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162719/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:29.38 [info     ] TD3PlusBC_20220420162719: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.0003279015334726077, 'time_algorithm_update': 0.006915026938008983, 'critic_loss': 7.204258071748834, 'actor_loss': 0.10821034068688315, 'time_step': 0.007322390874226888, 'td_error': 2.372307524760706, 'init_value': -15.106610298156738, 'ave_value': 2.209606799811108} step=15732
2022-04-20 16:29.38 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162719/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:29.41 [info     ] TD3PlusBC_20220420162719: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.0003297837854128832, 'time_algorithm_update': 0.007023549916451438, 'critic_loss': 7.464982882926338, 'actor_loss': 0.10238902608466427, 'time_step': 0.0074336012901618466, 'td_error': 2.433511657226337, 'init_value': -15.588923454284668, 'ave_value': 2.1712009398765897} step=16074
2022-04-20 16:29.41 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162719/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:29.44 [info     ] TD3PlusBC_20220420162719: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.0003297036154228344, 'time_algorithm_update': 0.006985216809992204, 'critic_loss': 7.7935438107328805, 'actor_loss': 0.08773024345350544, 'time_step': 0.007396297148096631, 'td_error': 2.48413372987085, 'init_value': -15.880002975463867, 'ave_value': 2.280480248241828} step=16416
2022-04-20 16:29.44 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162719/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:29.47 [info     ] TD3PlusBC_20220420162719: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.000328100215621859, 'time_algorithm_update': 0.00699784393199006, 'critic_loss': 8.110600146982405, 'actor_loss': 0.09109648495738269, 'time_step': 0.0074070880287571954, 'td_error': 2.532299757278213, 'init_value': -16.244020462036133, 'ave_value': 2.2491196905058466} step=16758
2022-04-20 16:29.47 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162719/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:29.50 [info     ] TD3PlusBC_20220420162719: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.0003347870899222748, 'time_algorithm_update': 0.007041491960224353, 'critic_loss': 8.391551835843694, 'actor_loss': 0.09287658911089451, 'time_step': 0.007453744174444187, 'td_error': 2.598581281060356, 'init_value': -16.777177810668945, 'ave_value': 2.1895726009683165} step=17100
2022-04-20 16:29.50 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420162719/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 102 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.28189108e-01 

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 16:29.51 [info     ] FQE_20220420162950: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00015438177499426417, 'time_algorithm_update': 0.0034902612847017953, 'loss': 0.007844703992918492, 'time_step': 0.003717454082994576, 'init_value': -0.2710482180118561, 'ave_value': -0.24743019902726282, 'soft_opc': nan} step=166




2022-04-20 16:29.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162950/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:29.52 [info     ] FQE_20220420162950: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00015493329749049912, 'time_algorithm_update': 0.0035688676029802806, 'loss': 0.005958676795441523, 'time_step': 0.0037943368934723267, 'init_value': -0.41240668296813965, 'ave_value': -0.32921207301645866, 'soft_opc': nan} step=332




2022-04-20 16:29.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162950/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:29.52 [info     ] FQE_20220420162950: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00016530163316841586, 'time_algorithm_update': 0.0034470156014683737, 'loss': 0.0052008026795663746, 'time_step': 0.0036813408495431923, 'init_value': -0.469624400138855, 'ave_value': -0.3551865136436489, 'soft_opc': nan} step=498




2022-04-20 16:29.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162950/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:29.53 [info     ] FQE_20220420162950: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00015868336321359658, 'time_algorithm_update': 0.0036087897886712865, 'loss': 0.004888839520379363, 'time_step': 0.0038418123521000505, 'init_value': -0.5342185497283936, 'ave_value': -0.3790372803616379, 'soft_opc': nan} step=664




2022-04-20 16:29.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162950/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:29.54 [info     ] FQE_20220420162950: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00015816056584737388, 'time_algorithm_update': 0.0035388828760170073, 'loss': 0.004279245333521661, 'time_step': 0.0037713467356670334, 'init_value': -0.5825687646865845, 'ave_value': -0.41078395675280477, 'soft_opc': nan} step=830




2022-04-20 16:29.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162950/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:29.54 [info     ] FQE_20220420162950: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00015871352460010942, 'time_algorithm_update': 0.003578223377825266, 'loss': 0.00385241471904229, 'time_step': 0.0038065177848540157, 'init_value': -0.6405665874481201, 'ave_value': -0.445754118529484, 'soft_opc': nan} step=996




2022-04-20 16:29.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162950/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:29.55 [info     ] FQE_20220420162950: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00015953075454895756, 'time_algorithm_update': 0.0038567652185279204, 'loss': 0.003793416101575257, 'time_step': 0.00408832136406956, 'init_value': -0.7051374316215515, 'ave_value': -0.4798361984341141, 'soft_opc': nan} step=1162




2022-04-20 16:29.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162950/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:29.56 [info     ] FQE_20220420162950: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.0001651608800313559, 'time_algorithm_update': 0.004962363875055888, 'loss': 0.0036388543814544127, 'time_step': 0.005200585687016866, 'init_value': -0.8119471073150635, 'ave_value': -0.5739804903092492, 'soft_opc': nan} step=1328




2022-04-20 16:29.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162950/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:29.57 [info     ] FQE_20220420162950: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00016277238547083843, 'time_algorithm_update': 0.004996834031070571, 'loss': 0.00351479642530791, 'time_step': 0.005231081721294357, 'init_value': -0.8472046852111816, 'ave_value': -0.5810433016296556, 'soft_opc': nan} step=1494




2022-04-20 16:29.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162950/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:29.58 [info     ] FQE_20220420162950: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00016802477549357587, 'time_algorithm_update': 0.005095667149647173, 'loss': 0.0037117340638448135, 'time_step': 0.005340726978807564, 'init_value': -0.9086819887161255, 'ave_value': -0.620624296182582, 'soft_opc': nan} step=1660




2022-04-20 16:29.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162950/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:29.59 [info     ] FQE_20220420162950: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00016018712376973717, 'time_algorithm_update': 0.005084968474974115, 'loss': 0.003567263616546315, 'time_step': 0.005323342530124159, 'init_value': -1.0035099983215332, 'ave_value': -0.6802666292782257, 'soft_opc': nan} step=1826




2022-04-20 16:29.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162950/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:30.00 [info     ] FQE_20220420162950: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.0001611379255731422, 'time_algorithm_update': 0.005045638026961361, 'loss': 0.0036616622749435523, 'time_step': 0.005278524146022567, 'init_value': -1.0464553833007812, 'ave_value': -0.7124805732403655, 'soft_opc': nan} step=1992




2022-04-20 16:30.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162950/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:30.01 [info     ] FQE_20220420162950: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00016400612980486398, 'time_algorithm_update': 0.005114074212959014, 'loss': 0.00399582570976948, 'time_step': 0.005349860133894955, 'init_value': -1.1224985122680664, 'ave_value': -0.7721714557526079, 'soft_opc': nan} step=2158




2022-04-20 16:30.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162950/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:30.02 [info     ] FQE_20220420162950: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00016308836190097304, 'time_algorithm_update': 0.005006668079330261, 'loss': 0.004144047892579517, 'time_step': 0.005244614130043122, 'init_value': -1.229873776435852, 'ave_value': -0.8526996912954895, 'soft_opc': nan} step=2324




2022-04-20 16:30.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162950/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:30.03 [info     ] FQE_20220420162950: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.0001606811960059476, 'time_algorithm_update': 0.0050330133323209835, 'loss': 0.004325769613749426, 'time_step': 0.005266962281192641, 'init_value': -1.2924331426620483, 'ave_value': -0.8808455391191282, 'soft_opc': nan} step=2490




2022-04-20 16:30.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162950/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:30.04 [info     ] FQE_20220420162950: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00016228118574762918, 'time_algorithm_update': 0.004246290907802352, 'loss': 0.004796099523643413, 'time_step': 0.00448167180440512, 'init_value': -1.3572580814361572, 'ave_value': -0.9229560809176381, 'soft_opc': nan} step=2656




2022-04-20 16:30.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162950/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:30.05 [info     ] FQE_20220420162950: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00016408943268189947, 'time_algorithm_update': 0.00501316857625203, 'loss': 0.0045853003982948256, 'time_step': 0.00525374154010451, 'init_value': -1.4219799041748047, 'ave_value': -0.9594347496978469, 'soft_opc': nan} step=2822




2022-04-20 16:30.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162950/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:30.06 [info     ] FQE_20220420162950: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00016799605036356362, 'time_algorithm_update': 0.0051454420549323756, 'loss': 0.004892456316506961, 'time_step': 0.005390188780175634, 'init_value': -1.4837684631347656, 'ave_value': -0.9887993252508946, 'soft_opc': nan} step=2988




2022-04-20 16:30.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162950/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:30.07 [info     ] FQE_20220420162950: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00016562622713755412, 'time_algorithm_update': 0.005037877933088556, 'loss': 0.00531248214334833, 'time_step': 0.0052783417414469895, 'init_value': -1.6006935834884644, 'ave_value': -1.0671714575206106, 'soft_opc': nan} step=3154




2022-04-20 16:30.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162950/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:30.08 [info     ] FQE_20220420162950: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00016176987843341138, 'time_algorithm_update': 0.005038900547716991, 'loss': 0.005546810688922204, 'time_step': 0.005269722766186817, 'init_value': -1.6570796966552734, 'ave_value': -1.102870098670928, 'soft_opc': nan} step=3320




2022-04-20 16:30.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162950/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:30.09 [info     ] FQE_20220420162950: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00016542084245796664, 'time_algorithm_update': 0.004919908132897802, 'loss': 0.005738265342527933, 'time_step': 0.005159622215362917, 'init_value': -1.6960890293121338, 'ave_value': -1.106232101436671, 'soft_opc': nan} step=3486




2022-04-20 16:30.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162950/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:30.10 [info     ] FQE_20220420162950: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00016445855060255672, 'time_algorithm_update': 0.005181436079094209, 'loss': 0.006131497266806146, 'time_step': 0.0054173196654721915, 'init_value': -1.7528300285339355, 'ave_value': -1.1408445415733097, 'soft_opc': nan} step=3652




2022-04-20 16:30.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162950/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:30.10 [info     ] FQE_20220420162950: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.0001626215785382742, 'time_algorithm_update': 0.005081501351781638, 'loss': 0.006306138144194496, 'time_step': 0.005319820829184659, 'init_value': -1.8210276365280151, 'ave_value': -1.1720958926723346, 'soft_opc': nan} step=3818




2022-04-20 16:30.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162950/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:30.11 [info     ] FQE_20220420162950: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00016491097140024943, 'time_algorithm_update': 0.005113718021346862, 'loss': 0.006944274998982493, 'time_step': 0.005354461899722915, 'init_value': -1.8725969791412354, 'ave_value': -1.222479970728022, 'soft_opc': nan} step=3984




2022-04-20 16:30.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162950/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:30.12 [info     ] FQE_20220420162950: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00016602981521422603, 'time_algorithm_update': 0.004843365715210696, 'loss': 0.006990769844367562, 'time_step': 0.005084198641489787, 'init_value': -1.9685900211334229, 'ave_value': -1.279626642731396, 'soft_opc': nan} step=4150




2022-04-20 16:30.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162950/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:30.13 [info     ] FQE_20220420162950: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00016196377306099398, 'time_algorithm_update': 0.004681176449879107, 'loss': 0.007416822226628302, 'time_step': 0.004916201154869723, 'init_value': -2.0603673458099365, 'ave_value': -1.3598077712865833, 'soft_opc': nan} step=4316




2022-04-20 16:30.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162950/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:30.14 [info     ] FQE_20220420162950: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.000166144715734275, 'time_algorithm_update': 0.004990004631410162, 'loss': 0.008074739330773613, 'time_step': 0.005233119769268726, 'init_value': -2.146355152130127, 'ave_value': -1.4142048482455918, 'soft_opc': nan} step=4482




2022-04-20 16:30.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162950/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:30.15 [info     ] FQE_20220420162950: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00016462084758712584, 'time_algorithm_update': 0.0050779265093516155, 'loss': 0.008267285418346053, 'time_step': 0.005319109882216856, 'init_value': -2.291177272796631, 'ave_value': -1.5206268221676886, 'soft_opc': nan} step=4648




2022-04-20 16:30.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162950/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:30.16 [info     ] FQE_20220420162950: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00016258136335625705, 'time_algorithm_update': 0.00499115076409765, 'loss': 0.008492244825507683, 'time_step': 0.005224255194146949, 'init_value': -2.321859359741211, 'ave_value': -1.5299072715546096, 'soft_opc': nan} step=4814




2022-04-20 16:30.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162950/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:30.17 [info     ] FQE_20220420162950: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00016508332218032284, 'time_algorithm_update': 0.005086608679897814, 'loss': 0.00929603093452415, 'time_step': 0.005326915936297681, 'init_value': -2.3970370292663574, 'ave_value': -1.5905925958945943, 'soft_opc': nan} step=4980




2022-04-20 16:30.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162950/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:30.18 [info     ] FQE_20220420162950: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00016740862145481338, 'time_algorithm_update': 0.005074126174650997, 'loss': 0.009709544666391989, 'time_step': 0.0053169124097709195, 'init_value': -2.53429913520813, 'ave_value': -1.6905383235430933, 'soft_opc': nan} step=5146




2022-04-20 16:30.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162950/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:30.19 [info     ] FQE_20220420162950: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00016579857791762753, 'time_algorithm_update': 0.005035555506327066, 'loss': 0.009730677860256016, 'time_step': 0.0052762936396771164, 'init_value': -2.59702730178833, 'ave_value': -1.723425606544222, 'soft_opc': nan} step=5312




2022-04-20 16:30.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162950/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:30.20 [info     ] FQE_20220420162950: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00016410810401640743, 'time_algorithm_update': 0.005033557673534715, 'loss': 0.010597820670753494, 'time_step': 0.005274433687508824, 'init_value': -2.6988751888275146, 'ave_value': -1.8069406676534059, 'soft_opc': nan} step=5478




2022-04-20 16:30.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162950/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:30.21 [info     ] FQE_20220420162950: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00016216915774058146, 'time_algorithm_update': 0.005043660301760018, 'loss': 0.011245446148151756, 'time_step': 0.00528320346970156, 'init_value': -2.7854692935943604, 'ave_value': -1.8446773507170848, 'soft_opc': nan} step=5644




2022-04-20 16:30.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162950/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:30.22 [info     ] FQE_20220420162950: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00015991567129112152, 'time_algorithm_update': 0.004031658172607422, 'loss': 0.011750102322969407, 'time_step': 0.004268370478986257, 'init_value': -2.833266496658325, 'ave_value': -1.868301177843734, 'soft_opc': nan} step=5810




2022-04-20 16:30.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162950/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:30.23 [info     ] FQE_20220420162950: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00016310272446597916, 'time_algorithm_update': 0.005037692655999976, 'loss': 0.010780295173017622, 'time_step': 0.00527536150920822, 'init_value': -2.8164994716644287, 'ave_value': -1.8508224795933241, 'soft_opc': nan} step=5976




2022-04-20 16:30.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162950/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:30.24 [info     ] FQE_20220420162950: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00016702801348215127, 'time_algorithm_update': 0.00513740045478545, 'loss': 0.011667383870485812, 'time_step': 0.005376840212258948, 'init_value': -2.8994407653808594, 'ave_value': -1.9025344296014524, 'soft_opc': nan} step=6142




2022-04-20 16:30.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162950/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:30.25 [info     ] FQE_20220420162950: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00016418278935443923, 'time_algorithm_update': 0.005128280226006566, 'loss': 0.012276681460557977, 'time_step': 0.00536419253751456, 'init_value': -2.940855026245117, 'ave_value': -1.9006596681703856, 'soft_opc': nan} step=6308




2022-04-20 16:30.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162950/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:30.26 [info     ] FQE_20220420162950: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00016362408557570125, 'time_algorithm_update': 0.0050130019704979585, 'loss': 0.012618362680582487, 'time_step': 0.005249105304120535, 'init_value': -3.0102221965789795, 'ave_value': -1.947035934870039, 'soft_opc': nan} step=6474




2022-04-20 16:30.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162950/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:30.27 [info     ] FQE_20220420162950: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.0001661576420427805, 'time_algorithm_update': 0.005143913878015725, 'loss': 0.012445434174747262, 'time_step': 0.005384956497743905, 'init_value': -3.110973834991455, 'ave_value': -2.0424866619477937, 'soft_opc': nan} step=6640




2022-04-20 16:30.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162950/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:30.28 [info     ] FQE_20220420162950: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00016814111227012542, 'time_algorithm_update': 0.005115418549043587, 'loss': 0.013137138114838177, 'time_step': 0.005359533321426575, 'init_value': -3.096334934234619, 'ave_value': -2.0299592816010787, 'soft_opc': nan} step=6806




2022-04-20 16:30.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162950/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:30.29 [info     ] FQE_20220420162950: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00016327507524605258, 'time_algorithm_update': 0.005029036338070789, 'loss': 0.013280961229830576, 'time_step': 0.005266996751348656, 'init_value': -3.162813425064087, 'ave_value': -2.0505849959479794, 'soft_opc': nan} step=6972




2022-04-20 16:30.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162950/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:30.30 [info     ] FQE_20220420162950: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00016533466706792992, 'time_algorithm_update': 0.005119671304541898, 'loss': 0.013741671680669155, 'time_step': 0.005358282342014542, 'init_value': -3.215423822402954, 'ave_value': -2.075823940017873, 'soft_opc': nan} step=7138




2022-04-20 16:30.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162950/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:30.30 [info     ] FQE_20220420162950: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.0001626833375678005, 'time_algorithm_update': 0.004151834062783115, 'loss': 0.013706128669100398, 'time_step': 0.004383566867874329, 'init_value': -3.331622362136841, 'ave_value': -2.1634215483928587, 'soft_opc': nan} step=7304




2022-04-20 16:30.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162950/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:30.31 [info     ] FQE_20220420162950: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00016644202082990165, 'time_algorithm_update': 0.004946580852370664, 'loss': 0.014337506565315851, 'time_step': 0.005191986819347703, 'init_value': -3.400191307067871, 'ave_value': -2.2074290283695532, 'soft_opc': nan} step=7470




2022-04-20 16:30.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162950/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:30.32 [info     ] FQE_20220420162950: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.0001657569264791098, 'time_algorithm_update': 0.0050059068633849365, 'loss': 0.01501568116202771, 'time_step': 0.0052450464432498055, 'init_value': -3.492809772491455, 'ave_value': -2.2808479271493516, 'soft_opc': nan} step=7636




2022-04-20 16:30.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162950/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:30.33 [info     ] FQE_20220420162950: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00016356232654617494, 'time_algorithm_update': 0.005031035607119641, 'loss': 0.01513274598387014, 'time_step': 0.0052677996187324985, 'init_value': -3.479243278503418, 'ave_value': -2.2786810922159537, 'soft_opc': nan} step=7802




2022-04-20 16:30.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162950/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:30.34 [info     ] FQE_20220420162950: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00016852890152529063, 'time_algorithm_update': 0.0050536609557737784, 'loss': 0.0157108215982633, 'time_step': 0.005297950951449843, 'init_value': -3.6061339378356934, 'ave_value': -2.3629703005020684, 'soft_opc': nan} step=7968




2022-04-20 16:30.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162950/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:30.35 [info     ] FQE_20220420162950: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00016589480710316854, 'time_algorithm_update': 0.005071556711771402, 'loss': 0.0160556273438115, 'time_step': 0.005311845296836761, 'init_value': -3.7098960876464844, 'ave_value': -2.4870179218986816, 'soft_opc': nan} step=8134




2022-04-20 16:30.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162950/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:30.36 [info     ] FQE_20220420162950: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00016692747552710842, 'time_algorithm_update': 0.005123004855879818, 'loss': 0.016163054890846497, 'time_step': 0.00536332503858819, 'init_value': -3.7701079845428467, 'ave_value': -2.5030175040467633, 'soft_opc': nan} step=8300




2022-04-20 16:30.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162950/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.43210892e-01 -3.25999953e-02
 -1.38000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.28049971e-01  6.00000000e-01  4.67532035e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.87189108e-01  2.46000047e-02
 -8.00013420e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.61927906e-02  2.08952959e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.53789108e-01  1.32000047e-02
  8.79998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -9.71586383e-02 -6.00000000e-01 -5.33093061e-02]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.39489108e-01 -4.75999953e-02
 -9.30001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  1.41298638e-01  5.43892365e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.9573146

2022-04-20 16:30.37 [debug    ] RoundIterator is selected.
2022-04-20 16:30.37 [info     ] Directory is created at d3rlpy_logs/FQE_20220420163037
2022-04-20 16:30.37 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 16:30.37 [debug    ] Building models...
2022-04-20 16:30.37 [debug    ] Models have been built.
2022-04-20 16:30.37 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420163037/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}},

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 16:30.39 [info     ] FQE_20220420163037: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.000172595645106116, 'time_algorithm_update': 0.005044187917265781, 'loss': 0.030240106044423787, 'time_step': 0.005291022533594176, 'init_value': -1.0023536682128906, 'ave_value': -0.985612016943124, 'soft_opc': nan} step=344




2022-04-20 16:30.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163037/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:30.41 [info     ] FQE_20220420163037: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00017039027324942656, 'time_algorithm_update': 0.004814120919205422, 'loss': 0.025006546728247994, 'time_step': 0.005057863024778144, 'init_value': -1.6422122716903687, 'ave_value': -1.6110262437446696, 'soft_opc': nan} step=688




2022-04-20 16:30.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163037/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:30.43 [info     ] FQE_20220420163037: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.0001690727333689845, 'time_algorithm_update': 0.005067274320957272, 'loss': 0.029878934815547667, 'time_step': 0.005307092916133792, 'init_value': -2.3529751300811768, 'ave_value': -2.322368726322243, 'soft_opc': nan} step=1032




2022-04-20 16:30.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163037/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:30.45 [info     ] FQE_20220420163037: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00017571865126144056, 'time_algorithm_update': 0.005059067593064419, 'loss': 0.033237108765819735, 'time_step': 0.005310767611791921, 'init_value': -2.7318973541259766, 'ave_value': -2.72704624376587, 'soft_opc': nan} step=1376




2022-04-20 16:30.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163037/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:30.46 [info     ] FQE_20220420163037: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00017038819401763206, 'time_algorithm_update': 0.004958506933478422, 'loss': 0.04277224839816606, 'time_step': 0.005202267752137295, 'init_value': -3.1824533939361572, 'ave_value': -3.197713221157356, 'soft_opc': nan} step=1720




2022-04-20 16:30.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163037/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:30.48 [info     ] FQE_20220420163037: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00017386813496434413, 'time_algorithm_update': 0.004598124775775644, 'loss': 0.05402414691309596, 'time_step': 0.004848878744036653, 'init_value': -3.449622631072998, 'ave_value': -3.5156574530719973, 'soft_opc': nan} step=2064




2022-04-20 16:30.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163037/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:30.50 [info     ] FQE_20220420163037: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.0001704166101854901, 'time_algorithm_update': 0.005087734654892323, 'loss': 0.06767322124945822, 'time_step': 0.005333654409231141, 'init_value': -3.787230968475342, 'ave_value': -3.8809552941085386, 'soft_opc': nan} step=2408




2022-04-20 16:30.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163037/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:30.52 [info     ] FQE_20220420163037: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.0001738882342050242, 'time_algorithm_update': 0.0050644832988118015, 'loss': 0.08805620821658522, 'time_step': 0.005314354286637417, 'init_value': -4.076708793640137, 'ave_value': -4.202182228734868, 'soft_opc': nan} step=2752




2022-04-20 16:30.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163037/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:30.54 [info     ] FQE_20220420163037: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00017261505126953125, 'time_algorithm_update': 0.004997834216716678, 'loss': 0.10897446196623753, 'time_step': 0.005245573298875676, 'init_value': -4.238123893737793, 'ave_value': -4.40908276304804, 'soft_opc': nan} step=3096




2022-04-20 16:30.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163037/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:30.56 [info     ] FQE_20220420163037: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00017382932263751362, 'time_algorithm_update': 0.005056163599324781, 'loss': 0.13421144710073984, 'time_step': 0.005307738864144614, 'init_value': -4.677271842956543, 'ave_value': -4.892215652983669, 'soft_opc': nan} step=3440




2022-04-20 16:30.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163037/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:30.58 [info     ] FQE_20220420163037: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00017098909200623978, 'time_algorithm_update': 0.004525287899860116, 'loss': 0.16715380113550224, 'time_step': 0.004771629045175952, 'init_value': -4.807441711425781, 'ave_value': -5.119262433293703, 'soft_opc': nan} step=3784




2022-04-20 16:30.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163037/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:31.00 [info     ] FQE_20220420163037: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00017685807028482126, 'time_algorithm_update': 0.005099121914353482, 'loss': 0.19238831308605367, 'time_step': 0.005351968282877013, 'init_value': -4.92929220199585, 'ave_value': -5.338566225569964, 'soft_opc': nan} step=4128




2022-04-20 16:31.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163037/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:31.02 [info     ] FQE_20220420163037: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00017164682233056358, 'time_algorithm_update': 0.005102153434309848, 'loss': 0.22424332004278724, 'time_step': 0.005350270936655444, 'init_value': -4.912151336669922, 'ave_value': -5.439292339488931, 'soft_opc': nan} step=4472




2022-04-20 16:31.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163037/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:31.04 [info     ] FQE_20220420163037: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.0001700991807981979, 'time_algorithm_update': 0.005111873842949091, 'loss': 0.2573178448757633, 'time_step': 0.005357801914215088, 'init_value': -5.149682521820068, 'ave_value': -5.803829053421882, 'soft_opc': nan} step=4816




2022-04-20 16:31.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163037/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:31.06 [info     ] FQE_20220420163037: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00017149434533230093, 'time_algorithm_update': 0.004654444234315739, 'loss': 0.2863507087743126, 'time_step': 0.004901082016700922, 'init_value': -5.181779384613037, 'ave_value': -6.025557512216069, 'soft_opc': nan} step=5160




2022-04-20 16:31.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163037/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:31.08 [info     ] FQE_20220420163037: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00017215692719747854, 'time_algorithm_update': 0.005121127810589102, 'loss': 0.3229388596955687, 'time_step': 0.005368202231651129, 'init_value': -5.284749984741211, 'ave_value': -6.2514546833496105, 'soft_opc': nan} step=5504




2022-04-20 16:31.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163037/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:31.10 [info     ] FQE_20220420163037: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00017431309056836506, 'time_algorithm_update': 0.005086939002192298, 'loss': 0.3586159447413804, 'time_step': 0.0053379459436549695, 'init_value': -5.212605953216553, 'ave_value': -6.364690282325856, 'soft_opc': nan} step=5848




2022-04-20 16:31.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163037/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:31.12 [info     ] FQE_20220420163037: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.0001724272273307623, 'time_algorithm_update': 0.005020633686420529, 'loss': 0.39009220033429215, 'time_step': 0.005267827316772106, 'init_value': -5.149827003479004, 'ave_value': -6.461683096827285, 'soft_opc': nan} step=6192




2022-04-20 16:31.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163037/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:31.14 [info     ] FQE_20220420163037: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00017618647841520087, 'time_algorithm_update': 0.00506173663361128, 'loss': 0.4214765044189123, 'time_step': 0.0053155727164689885, 'init_value': -4.839949131011963, 'ave_value': -6.36754818927698, 'soft_opc': nan} step=6536




2022-04-20 16:31.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163037/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:31.16 [info     ] FQE_20220420163037: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.0001725124758343364, 'time_algorithm_update': 0.004538640033367069, 'loss': 0.45380661427584845, 'time_step': 0.004787858142409214, 'init_value': -5.105384349822998, 'ave_value': -6.740646069647056, 'soft_opc': nan} step=6880




2022-04-20 16:31.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163037/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:31.18 [info     ] FQE_20220420163037: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.0001775539198587107, 'time_algorithm_update': 0.005097780809846035, 'loss': 0.48456253680985334, 'time_step': 0.005353065424187239, 'init_value': -5.134442329406738, 'ave_value': -6.827712649211019, 'soft_opc': nan} step=7224




2022-04-20 16:31.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163037/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:31.20 [info     ] FQE_20220420163037: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00017605202142582384, 'time_algorithm_update': 0.005076378583908081, 'loss': 0.5187593569307652, 'time_step': 0.005330823881681575, 'init_value': -5.512061595916748, 'ave_value': -7.276997554104205, 'soft_opc': nan} step=7568




2022-04-20 16:31.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163037/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:31.22 [info     ] FQE_20220420163037: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00017619687457417333, 'time_algorithm_update': 0.005090552013973857, 'loss': 0.5437420506983303, 'time_step': 0.005345774944438491, 'init_value': -5.5956268310546875, 'ave_value': -7.35536096759711, 'soft_opc': nan} step=7912




2022-04-20 16:31.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163037/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:31.23 [info     ] FQE_20220420163037: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00017052542331606844, 'time_algorithm_update': 0.0047250369260477465, 'loss': 0.5665993369725901, 'time_step': 0.004971612331479094, 'init_value': -5.514150142669678, 'ave_value': -7.291269131890825, 'soft_opc': nan} step=8256




2022-04-20 16:31.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163037/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:31.25 [info     ] FQE_20220420163037: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00017337397087452023, 'time_algorithm_update': 0.005063744478447493, 'loss': 0.5945910234437433, 'time_step': 0.005313273779181547, 'init_value': -5.967480659484863, 'ave_value': -7.752083446447914, 'soft_opc': nan} step=8600




2022-04-20 16:31.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163037/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:31.27 [info     ] FQE_20220420163037: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00017529587412989415, 'time_algorithm_update': 0.005155787911525992, 'loss': 0.6120273927984716, 'time_step': 0.005405766326327657, 'init_value': -6.3241472244262695, 'ave_value': -8.026768549523307, 'soft_opc': nan} step=8944




2022-04-20 16:31.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163037/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:31.29 [info     ] FQE_20220420163037: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.0001757539982019469, 'time_algorithm_update': 0.005087203757707463, 'loss': 0.6241212465995273, 'time_step': 0.005341847275578698, 'init_value': -6.580965995788574, 'ave_value': -8.137590130223884, 'soft_opc': nan} step=9288




2022-04-20 16:31.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163037/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:31.31 [info     ] FQE_20220420163037: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00017324228619420253, 'time_algorithm_update': 0.005053752383520437, 'loss': 0.6326360242376321, 'time_step': 0.005304261695506961, 'init_value': -7.023336410522461, 'ave_value': -8.380698174655203, 'soft_opc': nan} step=9632




2022-04-20 16:31.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163037/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:31.33 [info     ] FQE_20220420163037: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.0001691302587819654, 'time_algorithm_update': 0.004606845073921736, 'loss': 0.6322189244051832, 'time_step': 0.004850754904192548, 'init_value': -7.394778251647949, 'ave_value': -8.609551292119196, 'soft_opc': nan} step=9976




2022-04-20 16:31.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163037/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:31.35 [info     ] FQE_20220420163037: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00017168355542559956, 'time_algorithm_update': 0.005077490279840869, 'loss': 0.6506100330328526, 'time_step': 0.0053245875724526335, 'init_value': -7.446341514587402, 'ave_value': -8.424468288976733, 'soft_opc': nan} step=10320




2022-04-20 16:31.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163037/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:31.37 [info     ] FQE_20220420163037: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.0001696985821391261, 'time_algorithm_update': 0.005065851433332576, 'loss': 0.64326377022405, 'time_step': 0.005314147056535233, 'init_value': -7.75276517868042, 'ave_value': -8.567443918429214, 'soft_opc': nan} step=10664




2022-04-20 16:31.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163037/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:31.39 [info     ] FQE_20220420163037: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00017763362374416617, 'time_algorithm_update': 0.005064475674961888, 'loss': 0.6277762361867136, 'time_step': 0.005319249491358913, 'init_value': -8.096761703491211, 'ave_value': -8.722668277529312, 'soft_opc': nan} step=11008




2022-04-20 16:31.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163037/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:31.41 [info     ] FQE_20220420163037: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00016942897508310717, 'time_algorithm_update': 0.004880532275798709, 'loss': 0.6166423892948864, 'time_step': 0.005126753518747729, 'init_value': -8.18232536315918, 'ave_value': -8.699994457951906, 'soft_opc': nan} step=11352




2022-04-20 16:31.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163037/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:31.43 [info     ] FQE_20220420163037: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00017140840375146201, 'time_algorithm_update': 0.004873254271440728, 'loss': 0.6070472004411872, 'time_step': 0.00511946442515351, 'init_value': -8.141794204711914, 'ave_value': -8.584706449434833, 'soft_opc': nan} step=11696




2022-04-20 16:31.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163037/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:31.45 [info     ] FQE_20220420163037: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00017488141392552577, 'time_algorithm_update': 0.005097729522128438, 'loss': 0.6025533405847328, 'time_step': 0.005349423303160556, 'init_value': -8.25808334350586, 'ave_value': -8.587841073578005, 'soft_opc': nan} step=12040




2022-04-20 16:31.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163037/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:31.47 [info     ] FQE_20220420163037: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00017439348753108534, 'time_algorithm_update': 0.005114574071972869, 'loss': 0.5888131281231032, 'time_step': 0.005365831214328145, 'init_value': -8.52574348449707, 'ave_value': -8.698256173162713, 'soft_opc': nan} step=12384




2022-04-20 16:31.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163037/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:31.49 [info     ] FQE_20220420163037: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00017472062000008516, 'time_algorithm_update': 0.0050256605758223424, 'loss': 0.5805148504197944, 'time_step': 0.005277782678604126, 'init_value': -8.498443603515625, 'ave_value': -8.605103674364855, 'soft_opc': nan} step=12728




2022-04-20 16:31.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163037/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:31.51 [info     ] FQE_20220420163037: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00016851480617079625, 'time_algorithm_update': 0.0046080101368039155, 'loss': 0.5786702207636175, 'time_step': 0.004851039065871128, 'init_value': -8.740802764892578, 'ave_value': -8.798527955873828, 'soft_opc': nan} step=13072




2022-04-20 16:31.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163037/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:31.53 [info     ] FQE_20220420163037: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00017269198284592738, 'time_algorithm_update': 0.005129021267558253, 'loss': 0.5791781764737395, 'time_step': 0.005378584529078284, 'init_value': -8.893732070922852, 'ave_value': -8.83625830352172, 'soft_opc': nan} step=13416




2022-04-20 16:31.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163037/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:31.55 [info     ] FQE_20220420163037: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00017261920973312024, 'time_algorithm_update': 0.005040743323259576, 'loss': 0.5779512760467654, 'time_step': 0.00528957677441974, 'init_value': -8.909093856811523, 'ave_value': -8.833044425310852, 'soft_opc': nan} step=13760




2022-04-20 16:31.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163037/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:31.57 [info     ] FQE_20220420163037: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00017424447591914686, 'time_algorithm_update': 0.005163126213606014, 'loss': 0.58555591768087, 'time_step': 0.005415488814198693, 'init_value': -8.879007339477539, 'ave_value': -8.91466005297193, 'soft_opc': nan} step=14104




2022-04-20 16:31.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163037/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:31.59 [info     ] FQE_20220420163037: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00017603469449420307, 'time_algorithm_update': 0.0049535548964212105, 'loss': 0.5917848975744185, 'time_step': 0.005206111558648043, 'init_value': -9.393534660339355, 'ave_value': -9.514652855816792, 'soft_opc': nan} step=14448




2022-04-20 16:31.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163037/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:32.00 [info     ] FQE_20220420163037: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00017409269199814906, 'time_algorithm_update': 0.004822962505872859, 'loss': 0.6081145342092874, 'time_step': 0.005072864682175392, 'init_value': -9.702577590942383, 'ave_value': -9.696334519626589, 'soft_opc': nan} step=14792




2022-04-20 16:32.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163037/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:32.02 [info     ] FQE_20220420163037: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00017443784447603448, 'time_algorithm_update': 0.005079149506812872, 'loss': 0.6127021338270847, 'time_step': 0.0053306561569834866, 'init_value': -9.772649765014648, 'ave_value': -9.819469270786389, 'soft_opc': nan} step=15136




2022-04-20 16:32.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163037/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:32.04 [info     ] FQE_20220420163037: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.0001740448696668758, 'time_algorithm_update': 0.005114431991133579, 'loss': 0.6113822781480849, 'time_step': 0.005365965671317522, 'init_value': -10.031767845153809, 'ave_value': -10.183833368538735, 'soft_opc': nan} step=15480




2022-04-20 16:32.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163037/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:32.06 [info     ] FQE_20220420163037: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00017441705215808957, 'time_algorithm_update': 0.005080113577288251, 'loss': 0.6268371722747593, 'time_step': 0.005332097757694333, 'init_value': -10.394573211669922, 'ave_value': -10.770428278015272, 'soft_opc': nan} step=15824




2022-04-20 16:32.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163037/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:32.08 [info     ] FQE_20220420163037: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00017028423242790755, 'time_algorithm_update': 0.0046289223571156345, 'loss': 0.6236142574722857, 'time_step': 0.0048757243988125825, 'init_value': -10.582094192504883, 'ave_value': -11.216025537527628, 'soft_opc': nan} step=16168




2022-04-20 16:32.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163037/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:32.10 [info     ] FQE_20220420163037: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00017285624215769213, 'time_algorithm_update': 0.00512695173884547, 'loss': 0.6299458863971712, 'time_step': 0.005375414393668951, 'init_value': -10.340483665466309, 'ave_value': -11.29096570496053, 'soft_opc': nan} step=16512




2022-04-20 16:32.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163037/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:32.12 [info     ] FQE_20220420163037: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00017457923223805982, 'time_algorithm_update': 0.004983558211215707, 'loss': 0.6442646967290446, 'time_step': 0.005235822394836781, 'init_value': -10.547676086425781, 'ave_value': -11.651505940978055, 'soft_opc': nan} step=16856




2022-04-20 16:32.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163037/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:32.14 [info     ] FQE_20220420163037: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00017520785331726074, 'time_algorithm_update': 0.005110997793286345, 'loss': 0.6511847138253235, 'time_step': 0.005361047595046287, 'init_value': -10.217708587646484, 'ave_value': -11.529081318511519, 'soft_opc': nan} step=17200




2022-04-20 16:32.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163037/model_17200.pt
search iteration:  13
using hyper params:  [0.005794572673136579, 0.0020568559300911736, 5.00414863550367e-05, 1]
2022-04-20 16:32.14 [debug    ] RoundIterator is selected.
2022-04-20 16:32.14 [info     ] Directory is created at d3rlpy_logs/TD3PlusBC_20220420163214
2022-04-20 16:32.14 [debug    ] Fitting scaler...              scaler=standard
2022-04-20 16:32.14 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 16:32.14 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 16:32.15 [info     ] Parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163214/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.00579457267313

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:32.18 [info     ] TD3PlusBC_20220420163214: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.0003291828590526915, 'time_algorithm_update': 0.008427465868275068, 'critic_loss': 0.5896725964389349, 'actor_loss': 0.11090933788589567, 'time_step': 0.008837291371752645, 'td_error': 0.8120349809165367, 'init_value': -0.5296198725700378, 'ave_value': 0.14309552764197023} step=342
2022-04-20 16:32.18 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163214/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:32.22 [info     ] TD3PlusBC_20220420163214: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.0003263176533213833, 'time_algorithm_update': 0.008989306918361731, 'critic_loss': 0.20168373433121464, 'actor_loss': 0.021473843059693163, 'time_step': 0.009395816172772681, 'td_error': 0.8060601061097936, 'init_value': -0.7798115015029907, 'ave_value': 0.2151068067478922} step=684
2022-04-20 16:32.22 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163214/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:32.26 [info     ] TD3PlusBC_20220420163214: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.00032333323830052425, 'time_algorithm_update': 0.00890181845391703, 'critic_loss': 0.22729417154489204, 'actor_loss': 0.01888860733798373, 'time_step': 0.00930488527866832, 'td_error': 0.8009616230539649, 'init_value': -1.04520583152771, 'ave_value': 0.26564125248048504} step=1026
2022-04-20 16:32.26 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163214/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:32.29 [info     ] TD3PlusBC_20220420163214: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.0003270900737472445, 'time_algorithm_update': 0.0083539478960093, 'critic_loss': 0.2714994545394217, 'actor_loss': 0.010310466395832642, 'time_step': 0.008759743288943642, 'td_error': 0.8008709219938205, 'init_value': -1.3245203495025635, 'ave_value': 0.33193936389220696} step=1368
2022-04-20 16:32.29 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163214/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:32.33 [info     ] TD3PlusBC_20220420163214: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.0003236155760915656, 'time_algorithm_update': 0.008823745431955795, 'critic_loss': 0.32017667728819343, 'actor_loss': 0.020907175035504568, 'time_step': 0.00922518864012601, 'td_error': 0.8041326235027254, 'init_value': -1.5903189182281494, 'ave_value': 0.39301763377399895} step=1710
2022-04-20 16:32.33 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163214/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:32.36 [info     ] TD3PlusBC_20220420163214: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.0003335287696436832, 'time_algorithm_update': 0.008558936286390874, 'critic_loss': 0.39314375157680426, 'actor_loss': 0.03259624471092782, 'time_step': 0.008975771435520105, 'td_error': 0.8128514482233739, 'init_value': -1.9233369827270508, 'ave_value': 0.4479727345945414} step=2052
2022-04-20 16:32.36 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163214/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:32.40 [info     ] TD3PlusBC_20220420163214: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.00032786597982484696, 'time_algorithm_update': 0.008835382628859136, 'critic_loss': 0.44580649763171437, 'actor_loss': 0.019615090163479076, 'time_step': 0.00924357127027902, 'td_error': 0.8179938907430849, 'init_value': -2.1868040561676025, 'ave_value': 0.5242570349695329} step=2394
2022-04-20 16:32.40 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163214/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:32.44 [info     ] TD3PlusBC_20220420163214: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.0003290434329830415, 'time_algorithm_update': 0.008847391396238092, 'critic_loss': 0.5064640256483652, 'actor_loss': 0.05177457783131571, 'time_step': 0.009257189014501739, 'td_error': 0.8278412321752464, 'init_value': -2.436729907989502, 'ave_value': 0.6310397726148871} step=2736
2022-04-20 16:32.44 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163214/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:32.47 [info     ] TD3PlusBC_20220420163214: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.0003260081274467602, 'time_algorithm_update': 0.008415789632072226, 'critic_loss': 0.5914238430995341, 'actor_loss': 0.04878042328950257, 'time_step': 0.008821284561826471, 'td_error': 0.8435876251100585, 'init_value': -2.8295581340789795, 'ave_value': 0.6381106800629615} step=3078
2022-04-20 16:32.47 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163214/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:32.51 [info     ] TD3PlusBC_20220420163214: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.0003262458488955135, 'time_algorithm_update': 0.008942340549669768, 'critic_loss': 0.6697115166145459, 'actor_loss': 0.06295321337138003, 'time_step': 0.0093490359378837, 'td_error': 0.8586460276213843, 'init_value': -3.105602979660034, 'ave_value': 0.6919998570179634} step=3420
2022-04-20 16:32.51 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163214/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:32.55 [info     ] TD3PlusBC_20220420163214: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.0003266662184955084, 'time_algorithm_update': 0.00857770652101751, 'critic_loss': 0.7510722419846128, 'actor_loss': 0.039743968182139926, 'time_step': 0.008983197267989667, 'td_error': 0.8785808061616907, 'init_value': -3.4541988372802734, 'ave_value': 0.7542962057308319} step=3762
2022-04-20 16:32.55 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163214/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:32.58 [info     ] TD3PlusBC_20220420163214: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.00033022297753228084, 'time_algorithm_update': 0.008791845444350214, 'critic_loss': 0.8103281315742877, 'actor_loss': 0.05674146152814927, 'time_step': 0.009202076677690473, 'td_error': 0.9008333961653525, 'init_value': -3.7827773094177246, 'ave_value': 0.8103349556661471} step=4104
2022-04-20 16:32.58 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163214/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:33.02 [info     ] TD3PlusBC_20220420163214: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.0003323750189173291, 'time_algorithm_update': 0.00902731864773042, 'critic_loss': 0.9030991463284743, 'actor_loss': 0.049714727708470755, 'time_step': 0.0094398518054806, 'td_error': 0.9235133302294567, 'init_value': -4.0136895179748535, 'ave_value': 0.911213767688369} step=4446
2022-04-20 16:33.02 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163214/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:33.06 [info     ] TD3PlusBC_20220420163214: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.0003279844919840495, 'time_algorithm_update': 0.008491251900879264, 'critic_loss': 0.9784145180942022, 'actor_loss': 0.0641462912902846, 'time_step': 0.008901267720941911, 'td_error': 0.956182833814166, 'init_value': -4.412033557891846, 'ave_value': 0.8932526317365082} step=4788
2022-04-20 16:33.06 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163214/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:33.09 [info     ] TD3PlusBC_20220420163214: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.0003291222087123938, 'time_algorithm_update': 0.008935475907130548, 'critic_loss': 1.0914092069567993, 'actor_loss': 0.06223642392552387, 'time_step': 0.00934611217320314, 'td_error': 0.9796473119824883, 'init_value': -4.69140100479126, 'ave_value': 0.9888198199594679} step=5130
2022-04-20 16:33.09 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163214/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:33.13 [info     ] TD3PlusBC_20220420163214: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.0003278534314785784, 'time_algorithm_update': 0.008971367663110208, 'critic_loss': 1.1659087304744804, 'actor_loss': 0.06275451035178893, 'time_step': 0.009379609983566909, 'td_error': 1.0057167617587757, 'init_value': -4.995170593261719, 'ave_value': 1.061610670257989} step=5472
2022-04-20 16:33.13 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163214/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:33.17 [info     ] TD3PlusBC_20220420163214: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.0003271953404298303, 'time_algorithm_update': 0.008901191036603605, 'critic_loss': 1.2974642110846892, 'actor_loss': 0.07019405367604473, 'time_step': 0.009309712906329953, 'td_error': 1.043765683502086, 'init_value': -5.361309051513672, 'ave_value': 1.0903972018901091} step=5814
2022-04-20 16:33.17 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163214/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:33.20 [info     ] TD3PlusBC_20220420163214: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.00032671292622884113, 'time_algorithm_update': 0.009040435155232748, 'critic_loss': 1.4194061499067216, 'actor_loss': 0.0667086482614453, 'time_step': 0.009447674305118316, 'td_error': 1.070155858209439, 'init_value': -5.669469356536865, 'ave_value': 1.1644678416624161} step=6156
2022-04-20 16:33.20 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163214/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:33.24 [info     ] TD3PlusBC_20220420163214: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.0003225726690905833, 'time_algorithm_update': 0.008479756918566965, 'critic_loss': 1.5313999097772508, 'actor_loss': 0.053550341368070124, 'time_step': 0.008883237838745117, 'td_error': 1.1054930151629132, 'init_value': -5.999753952026367, 'ave_value': 1.155624950858303} step=6498
2022-04-20 16:33.24 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163214/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:33.28 [info     ] TD3PlusBC_20220420163214: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.0003282542814288223, 'time_algorithm_update': 0.008948360967357256, 'critic_loss': 1.6695259211704745, 'actor_loss': 0.07383073319555723, 'time_step': 0.009358164162663688, 'td_error': 1.1271330196173475, 'init_value': -6.3516340255737305, 'ave_value': 1.23617080712649} step=6840
2022-04-20 16:33.28 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163214/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:33.31 [info     ] TD3PlusBC_20220420163214: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.00032788410521390147, 'time_algorithm_update': 0.008973835504543014, 'critic_loss': 1.8474374170366086, 'actor_loss': 0.09350791732557336, 'time_step': 0.009384330950285258, 'td_error': 1.1790222585083323, 'init_value': -6.5790886878967285, 'ave_value': 1.3372256013217407} step=7182
2022-04-20 16:33.31 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163214/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:33.35 [info     ] TD3PlusBC_20220420163214: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.0003349474299023723, 'time_algorithm_update': 0.008564599773340058, 'critic_loss': 2.0178604905019726, 'actor_loss': 0.07281174129474233, 'time_step': 0.008978767004626536, 'td_error': 1.2066488248874798, 'init_value': -7.142897605895996, 'ave_value': 1.2554919222157097} step=7524
2022-04-20 16:33.35 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163214/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:33.39 [info     ] TD3PlusBC_20220420163214: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.00032394880439802915, 'time_algorithm_update': 0.008779331954599123, 'critic_loss': 2.199750429444146, 'actor_loss': 0.07688607998758729, 'time_step': 0.00918292232424195, 'td_error': 1.2447094805048564, 'init_value': -7.269570827484131, 'ave_value': 1.4077711768231884} step=7866
2022-04-20 16:33.39 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163214/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:33.42 [info     ] TD3PlusBC_20220420163214: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.0003296854900337799, 'time_algorithm_update': 0.008457678800437882, 'critic_loss': 2.3550813599810962, 'actor_loss': 0.06721833707732067, 'time_step': 0.00886807385940998, 'td_error': 1.271388987586536, 'init_value': -7.7260284423828125, 'ave_value': 1.4113104149951758} step=8208
2022-04-20 16:33.42 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163214/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:33.46 [info     ] TD3PlusBC_20220420163214: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.0003234726643701743, 'time_algorithm_update': 0.00882260353244536, 'critic_loss': 2.518042191253071, 'actor_loss': 0.0771818911732986, 'time_step': 0.009224032100878264, 'td_error': 1.3101738774387617, 'init_value': -8.046377182006836, 'ave_value': 1.4735977042920736} step=8550
2022-04-20 16:33.46 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163214/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:33.50 [info     ] TD3PlusBC_20220420163214: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.00032868580511438917, 'time_algorithm_update': 0.008911537845232333, 'critic_loss': 2.7573990102051296, 'actor_loss': 0.09264975766602315, 'time_step': 0.00931861038096467, 'td_error': 1.3702883463525142, 'init_value': -8.450689315795898, 'ave_value': 1.4266081450370283} step=8892
2022-04-20 16:33.50 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163214/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:33.53 [info     ] TD3PlusBC_20220420163214: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.0003264598679124263, 'time_algorithm_update': 0.008607798152499728, 'critic_loss': 2.9351138017679514, 'actor_loss': 0.08182046926248143, 'time_step': 0.009012648236681844, 'td_error': 1.3881374430101647, 'init_value': -8.743928909301758, 'ave_value': 1.5262849631669788} step=9234
2022-04-20 16:33.53 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163214/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:33.57 [info     ] TD3PlusBC_20220420163214: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.0003325716096755357, 'time_algorithm_update': 0.008953744207906443, 'critic_loss': 3.1233874134152955, 'actor_loss': 0.07034600375775706, 'time_step': 0.009370181295606825, 'td_error': 1.44157537933307, 'init_value': -8.984651565551758, 'ave_value': 1.5986129045545183} step=9576
2022-04-20 16:33.57 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163214/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:34.01 [info     ] TD3PlusBC_20220420163214: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.00032662717919600636, 'time_algorithm_update': 0.008506919208325837, 'critic_loss': 3.368152184008855, 'actor_loss': 0.08314314122959884, 'time_step': 0.00891310917703729, 'td_error': 1.485886024336749, 'init_value': -9.382238388061523, 'ave_value': 1.6244978164074741} step=9918
2022-04-20 16:34.01 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163214/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:34.04 [info     ] TD3PlusBC_20220420163214: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.0003245176627622013, 'time_algorithm_update': 0.008816915645933988, 'critic_loss': 3.5238093402650623, 'actor_loss': 0.06448850715369508, 'time_step': 0.009217497200993767, 'td_error': 1.522247327267186, 'init_value': -9.927003860473633, 'ave_value': 1.5863845782014667} step=10260
2022-04-20 16:34.04 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163214/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:34.08 [info     ] TD3PlusBC_20220420163214: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.0003286021494725991, 'time_algorithm_update': 0.009033488948442782, 'critic_loss': 3.7487972936435052, 'actor_loss': 0.0872603608023005, 'time_step': 0.00944135412138108, 'td_error': 1.5693345651968265, 'init_value': -10.080079078674316, 'ave_value': 1.7105590396613588} step=10602
2022-04-20 16:34.08 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163214/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:34.12 [info     ] TD3PlusBC_20220420163214: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.000322226195307503, 'time_algorithm_update': 0.008451439483821044, 'critic_loss': 3.9598636627197266, 'actor_loss': 0.06353621012721843, 'time_step': 0.008852831801475837, 'td_error': 1.5892883423698172, 'init_value': -10.51792049407959, 'ave_value': 1.7025621294632538} step=10944
2022-04-20 16:34.12 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163214/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:34.15 [info     ] TD3PlusBC_20220420163214: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.0003292449036536858, 'time_algorithm_update': 0.008897344968472307, 'critic_loss': 4.252783524536947, 'actor_loss': 0.07929087757018574, 'time_step': 0.009309365735416525, 'td_error': 1.6086130811009247, 'init_value': -11.037565231323242, 'ave_value': 1.770996163946803} step=11286
2022-04-20 16:34.15 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163214/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:34.19 [info     ] TD3PlusBC_20220420163214: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.0003288593905711035, 'time_algorithm_update': 0.008961838588379976, 'critic_loss': 4.474261076652516, 'actor_loss': 0.10214171987789417, 'time_step': 0.009369752560442651, 'td_error': 1.6653437382412026, 'init_value': -11.391672134399414, 'ave_value': 1.7572109644818077} step=11628
2022-04-20 16:34.19 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163214/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:34.23 [info     ] TD3PlusBC_20220420163214: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.0003302975704795436, 'time_algorithm_update': 0.008814783124198691, 'critic_loss': 4.703555253862637, 'actor_loss': 0.0819023633212374, 'time_step': 0.009225768652575754, 'td_error': 1.7314312687496922, 'init_value': -11.558673858642578, 'ave_value': 1.8530708160003027} step=11970
2022-04-20 16:34.23 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163214/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:34.26 [info     ] TD3PlusBC_20220420163214: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.00032804932510643674, 'time_algorithm_update': 0.008895052106756913, 'critic_loss': 4.940235021344402, 'actor_loss': 0.10814409439413868, 'time_step': 0.009301711941323084, 'td_error': 1.79200406497241, 'init_value': -12.071439743041992, 'ave_value': 1.7949043414657853} step=12312
2022-04-20 16:34.26 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163214/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:34.30 [info     ] TD3PlusBC_20220420163214: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.00032875900380095546, 'time_algorithm_update': 0.008465572407371119, 'critic_loss': 5.158416144331993, 'actor_loss': 0.08430252781911203, 'time_step': 0.00887468265511139, 'td_error': 1.813622295864945, 'init_value': -12.398473739624023, 'ave_value': 1.8919478610398768} step=12654
2022-04-20 16:34.30 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163214/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:34.34 [info     ] TD3PlusBC_20220420163214: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.00032549503951044805, 'time_algorithm_update': 0.008833333065635279, 'critic_loss': 5.456088357152995, 'actor_loss': 0.09153414373857933, 'time_step': 0.009238968815719872, 'td_error': 1.8918123574260417, 'init_value': -12.424413681030273, 'ave_value': 2.0209986929957933} step=12996
2022-04-20 16:34.34 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163214/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:34.37 [info     ] TD3PlusBC_20220420163214: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.00032931182816711783, 'time_algorithm_update': 0.008877174198976037, 'critic_loss': 5.671743706304428, 'actor_loss': 0.10052420693444229, 'time_step': 0.009289664134644625, 'td_error': 1.9059360337661508, 'init_value': -13.049240112304688, 'ave_value': 2.0204135527678235} step=13338
2022-04-20 16:34.37 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163214/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:34.41 [info     ] TD3PlusBC_20220420163214: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.00032732152102286357, 'time_algorithm_update': 0.008377114234612001, 'critic_loss': 5.951795125216768, 'actor_loss': 0.08524755619422734, 'time_step': 0.008781391277647856, 'td_error': 1.9685861877028048, 'init_value': -13.544351577758789, 'ave_value': 1.899525235201828} step=13680
2022-04-20 16:34.41 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163214/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:34.45 [info     ] TD3PlusBC_20220420163214: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.0003281720200477288, 'time_algorithm_update': 0.008985268442254318, 'critic_loss': 6.267331386344475, 'actor_loss': 0.10038672287387458, 'time_step': 0.009391728200410543, 'td_error': 2.0121360046777905, 'init_value': -13.773210525512695, 'ave_value': 1.9786945280368935} step=14022
2022-04-20 16:34.45 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163214/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:34.48 [info     ] TD3PlusBC_20220420163214: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.0003272789960716203, 'time_algorithm_update': 0.008491108292027524, 'critic_loss': 6.4079768162721775, 'actor_loss': 0.08595819165657835, 'time_step': 0.008899721485829492, 'td_error': 2.0637216425334963, 'init_value': -14.311203002929688, 'ave_value': 2.0413288686649356} step=14364
2022-04-20 16:34.48 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163214/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:34.52 [info     ] TD3PlusBC_20220420163214: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.00032727132763778953, 'time_algorithm_update': 0.00895019232878211, 'critic_loss': 6.796617821294662, 'actor_loss': 0.08952344115278875, 'time_step': 0.009357257196080614, 'td_error': 2.1281878713165194, 'init_value': -14.73541259765625, 'ave_value': 1.940226401828758} step=14706
2022-04-20 16:34.52 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163214/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:34.56 [info     ] TD3PlusBC_20220420163214: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.00033109857324968307, 'time_algorithm_update': 0.008933183045415153, 'critic_loss': 7.025396655177512, 'actor_loss': 0.0973024894837399, 'time_step': 0.009343018308717605, 'td_error': 2.166851906325704, 'init_value': -15.004241943359375, 'ave_value': 2.1390851294624342} step=15048
2022-04-20 16:34.56 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163214/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:34.59 [info     ] TD3PlusBC_20220420163214: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.00032941500345865887, 'time_algorithm_update': 0.008430526967634234, 'critic_loss': 7.257108017017967, 'actor_loss': 0.10411602977597922, 'time_step': 0.008843099164683916, 'td_error': 2.2206360429095415, 'init_value': -15.596890449523926, 'ave_value': 2.036543750251803} step=15390
2022-04-20 16:34.59 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163214/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:35.03 [info     ] TD3PlusBC_20220420163214: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.0003313662713034111, 'time_algorithm_update': 0.008916273451687997, 'critic_loss': 7.572818287980486, 'actor_loss': 0.08441342517994997, 'time_step': 0.009325672311392443, 'td_error': 2.3022606721636407, 'init_value': -15.714731216430664, 'ave_value': 2.0995645947646566} step=15732
2022-04-20 16:35.03 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163214/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:35.06 [info     ] TD3PlusBC_20220420163214: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.0003276554464596754, 'time_algorithm_update': 0.008686974034671896, 'critic_loss': 7.828801972649948, 'actor_loss': 0.11350431615671916, 'time_step': 0.00909346934647588, 'td_error': 2.349668906407434, 'init_value': -16.30811309814453, 'ave_value': 2.205428742256754} step=16074
2022-04-20 16:35.06 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163214/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:35.10 [info     ] TD3PlusBC_20220420163214: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.00033105325977704676, 'time_algorithm_update': 0.009005404355233176, 'critic_loss': 8.032324315511692, 'actor_loss': 0.11420912254187796, 'time_step': 0.009419559038173386, 'td_error': 2.3990169984475114, 'init_value': -16.744131088256836, 'ave_value': 2.0849513418459846} step=16416
2022-04-20 16:35.10 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163214/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:35.14 [info     ] TD3PlusBC_20220420163214: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.0003297754198487042, 'time_algorithm_update': 0.008891159330892283, 'critic_loss': 8.347478092413896, 'actor_loss': 0.09213783408988986, 'time_step': 0.009303615107173807, 'td_error': 2.4829129326369905, 'init_value': -16.595191955566406, 'ave_value': 2.2635835719598574} step=16758
2022-04-20 16:35.14 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163214/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:35.17 [info     ] TD3PlusBC_20220420163214: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.0003285937839084201, 'time_algorithm_update': 0.008348670619273047, 'critic_loss': 8.571489898782028, 'actor_loss': 0.10771406984381508, 'time_step': 0.008756467473437215, 'td_error': 2.5314180799232453, 'init_value': -17.74147605895996, 'ave_value': 2.160982673180432} step=17100
2022-04-20 16:35.17 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163214/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 102 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.28189108e-01  

Epoch 1/50:   0%|          | 0/177 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 16:35.19 [info     ] FQE_20220420163518: epoch=1 step=177 epoch=1 metrics={'time_sample_batch': 0.00016229704948468398, 'time_algorithm_update': 0.005053980875823458, 'loss': 0.007128114047812792, 'time_step': 0.005292167771334028, 'init_value': -0.45475509762763977, 'ave_value': -0.4638693628010449, 'soft_opc': nan} step=177




2022-04-20 16:35.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163518/model_177.pt


Epoch 2/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:35.20 [info     ] FQE_20220420163518: epoch=2 step=354 epoch=2 metrics={'time_sample_batch': 0.0001626957607808086, 'time_algorithm_update': 0.005027214686075847, 'loss': 0.005796077529673522, 'time_step': 0.005263189811491023, 'init_value': -0.5894134044647217, 'ave_value': -0.5611049286596052, 'soft_opc': nan} step=354




2022-04-20 16:35.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163518/model_354.pt


Epoch 3/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:35.21 [info     ] FQE_20220420163518: epoch=3 step=531 epoch=3 metrics={'time_sample_batch': 0.00016338811755853858, 'time_algorithm_update': 0.005054856424277785, 'loss': 0.005643726836911029, 'time_step': 0.005290066455043642, 'init_value': -0.6777052283287048, 'ave_value': -0.6002828173809224, 'soft_opc': nan} step=531




2022-04-20 16:35.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163518/model_531.pt


Epoch 4/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:35.22 [info     ] FQE_20220420163518: epoch=4 step=708 epoch=4 metrics={'time_sample_batch': 0.0001615858347402454, 'time_algorithm_update': 0.005069109006116619, 'loss': 0.005405512225345872, 'time_step': 0.005305610807602015, 'init_value': -0.7921759486198425, 'ave_value': -0.6764426372609696, 'soft_opc': nan} step=708




2022-04-20 16:35.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163518/model_708.pt


Epoch 5/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:35.23 [info     ] FQE_20220420163518: epoch=5 step=885 epoch=5 metrics={'time_sample_batch': 0.00016341236351573535, 'time_algorithm_update': 0.005123683961771302, 'loss': 0.005224109856190823, 'time_step': 0.0053609333469369315, 'init_value': -0.8505508303642273, 'ave_value': -0.6939308631975013, 'soft_opc': nan} step=885




2022-04-20 16:35.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163518/model_885.pt


Epoch 6/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:35.24 [info     ] FQE_20220420163518: epoch=6 step=1062 epoch=6 metrics={'time_sample_batch': 0.00016773218488962637, 'time_algorithm_update': 0.004972181751229669, 'loss': 0.004924287242120353, 'time_step': 0.0052119500219485184, 'init_value': -0.9254865646362305, 'ave_value': -0.7430940296019878, 'soft_opc': nan} step=1062




2022-04-20 16:35.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163518/model_1062.pt


Epoch 7/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:35.25 [info     ] FQE_20220420163518: epoch=7 step=1239 epoch=7 metrics={'time_sample_batch': 0.00015842712531655522, 'time_algorithm_update': 0.004164439810197906, 'loss': 0.004626232312497615, 'time_step': 0.004393308176159186, 'init_value': -0.9750709533691406, 'ave_value': -0.7605932747548049, 'soft_opc': nan} step=1239




2022-04-20 16:35.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163518/model_1239.pt


Epoch 8/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:35.26 [info     ] FQE_20220420163518: epoch=8 step=1416 epoch=8 metrics={'time_sample_batch': 0.0001675597691940049, 'time_algorithm_update': 0.005131561203865008, 'loss': 0.004464427282833783, 'time_step': 0.0053743979351668705, 'init_value': -1.0730429887771606, 'ave_value': -0.8266223720065108, 'soft_opc': nan} step=1416




2022-04-20 16:35.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163518/model_1416.pt


Epoch 9/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:35.27 [info     ] FQE_20220420163518: epoch=9 step=1593 epoch=9 metrics={'time_sample_batch': 0.00016538032704153976, 'time_algorithm_update': 0.005010808255039366, 'loss': 0.004523843519605359, 'time_step': 0.005252153859973627, 'init_value': -1.144698977470398, 'ave_value': -0.8842455577563954, 'soft_opc': nan} step=1593




2022-04-20 16:35.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163518/model_1593.pt


Epoch 10/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:35.28 [info     ] FQE_20220420163518: epoch=10 step=1770 epoch=10 metrics={'time_sample_batch': 0.0001674021704722259, 'time_algorithm_update': 0.005088693004543498, 'loss': 0.004582718876322027, 'time_step': 0.005330918198924953, 'init_value': -1.226906657218933, 'ave_value': -0.9329884935338217, 'soft_opc': nan} step=1770




2022-04-20 16:35.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163518/model_1770.pt


Epoch 11/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:35.29 [info     ] FQE_20220420163518: epoch=11 step=1947 epoch=11 metrics={'time_sample_batch': 0.00016316047496041337, 'time_algorithm_update': 0.00496731639581885, 'loss': 0.004588064511364462, 'time_step': 0.005203901711156813, 'init_value': -1.2762212753295898, 'ave_value': -0.9535928585686841, 'soft_opc': nan} step=1947




2022-04-20 16:35.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163518/model_1947.pt


Epoch 12/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:35.30 [info     ] FQE_20220420163518: epoch=12 step=2124 epoch=12 metrics={'time_sample_batch': 0.0001632183958581612, 'time_algorithm_update': 0.0050583397601283876, 'loss': 0.004657442383547076, 'time_step': 0.005296138720323811, 'init_value': -1.3533477783203125, 'ave_value': -0.9931066127331766, 'soft_opc': nan} step=2124




2022-04-20 16:35.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163518/model_2124.pt


Epoch 13/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:35.31 [info     ] FQE_20220420163518: epoch=13 step=2301 epoch=13 metrics={'time_sample_batch': 0.00016897681069239385, 'time_algorithm_update': 0.005023785230130125, 'loss': 0.004765173543960193, 'time_step': 0.005267904303168173, 'init_value': -1.409243106842041, 'ave_value': -1.0231924949913054, 'soft_opc': nan} step=2301




2022-04-20 16:35.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163518/model_2301.pt


Epoch 14/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:35.32 [info     ] FQE_20220420163518: epoch=14 step=2478 epoch=14 metrics={'time_sample_batch': 0.00016361710715428584, 'time_algorithm_update': 0.0050967655612924, 'loss': 0.004943135070172533, 'time_step': 0.005335132954484325, 'init_value': -1.503409504890442, 'ave_value': -1.1016656432244871, 'soft_opc': nan} step=2478




2022-04-20 16:35.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163518/model_2478.pt


Epoch 15/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:35.33 [info     ] FQE_20220420163518: epoch=15 step=2655 epoch=15 metrics={'time_sample_batch': 0.000164725686197227, 'time_algorithm_update': 0.004908744898219567, 'loss': 0.0050646900745256055, 'time_step': 0.005147022042570814, 'init_value': -1.588016152381897, 'ave_value': -1.1462729064820407, 'soft_opc': nan} step=2655




2022-04-20 16:35.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163518/model_2655.pt


Epoch 16/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:35.33 [info     ] FQE_20220420163518: epoch=16 step=2832 epoch=16 metrics={'time_sample_batch': 0.0001625785719876909, 'time_algorithm_update': 0.004265262582207803, 'loss': 0.005412866558095619, 'time_step': 0.004502273548794331, 'init_value': -1.6410197019577026, 'ave_value': -1.1789329246268259, 'soft_opc': nan} step=2832




2022-04-20 16:35.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163518/model_2832.pt


Epoch 17/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:35.34 [info     ] FQE_20220420163518: epoch=17 step=3009 epoch=17 metrics={'time_sample_batch': 0.00016430542293914966, 'time_algorithm_update': 0.00492138108291195, 'loss': 0.00569454493661448, 'time_step': 0.005160862443137303, 'init_value': -1.7204760313034058, 'ave_value': -1.2129774608859072, 'soft_opc': nan} step=3009




2022-04-20 16:35.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163518/model_3009.pt


Epoch 18/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:35.35 [info     ] FQE_20220420163518: epoch=18 step=3186 epoch=18 metrics={'time_sample_batch': 0.00016560931663728702, 'time_algorithm_update': 0.0050943611705370545, 'loss': 0.005918273349729298, 'time_step': 0.0053386055143539515, 'init_value': -1.7392398118972778, 'ave_value': -1.2006989324415052, 'soft_opc': nan} step=3186




2022-04-20 16:35.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163518/model_3186.pt


Epoch 19/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:35.37 [info     ] FQE_20220420163518: epoch=19 step=3363 epoch=19 metrics={'time_sample_batch': 0.0001643094639320158, 'time_algorithm_update': 0.0050394737114340575, 'loss': 0.006380677463928774, 'time_step': 0.005281036182985467, 'init_value': -1.86729097366333, 'ave_value': -1.3086673607518364, 'soft_opc': nan} step=3363




2022-04-20 16:35.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163518/model_3363.pt


Epoch 20/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:35.38 [info     ] FQE_20220420163518: epoch=20 step=3540 epoch=20 metrics={'time_sample_batch': 0.00016620738358147401, 'time_algorithm_update': 0.005084932187182755, 'loss': 0.006991517778070338, 'time_step': 0.005324898466552045, 'init_value': -1.9767104387283325, 'ave_value': -1.3844724763621081, 'soft_opc': nan} step=3540




2022-04-20 16:35.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163518/model_3540.pt


Epoch 21/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:35.39 [info     ] FQE_20220420163518: epoch=21 step=3717 epoch=21 metrics={'time_sample_batch': 0.00017038307620980645, 'time_algorithm_update': 0.005005363690651069, 'loss': 0.007426400622879129, 'time_step': 0.005252575470229327, 'init_value': -2.0978763103485107, 'ave_value': -1.476960537088168, 'soft_opc': nan} step=3717




2022-04-20 16:35.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163518/model_3717.pt


Epoch 22/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:35.40 [info     ] FQE_20220420163518: epoch=22 step=3894 epoch=22 metrics={'time_sample_batch': 0.00016245330120884092, 'time_algorithm_update': 0.004997712744157867, 'loss': 0.007567214243519146, 'time_step': 0.005233143682533739, 'init_value': -2.136436700820923, 'ave_value': -1.4814994935516839, 'soft_opc': nan} step=3894




2022-04-20 16:35.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163518/model_3894.pt


Epoch 23/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:35.41 [info     ] FQE_20220420163518: epoch=23 step=4071 epoch=23 metrics={'time_sample_batch': 0.00016443069371799965, 'time_algorithm_update': 0.005096582369615803, 'loss': 0.008091468154847369, 'time_step': 0.0053354575809112376, 'init_value': -2.1426403522491455, 'ave_value': -1.4789584052276326, 'soft_opc': nan} step=4071




2022-04-20 16:35.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163518/model_4071.pt


Epoch 24/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:35.42 [info     ] FQE_20220420163518: epoch=24 step=4248 epoch=24 metrics={'time_sample_batch': 0.0001680554643189166, 'time_algorithm_update': 0.005143170976369394, 'loss': 0.008206615070575627, 'time_step': 0.0053870246908759, 'init_value': -2.32568097114563, 'ave_value': -1.63014565555899, 'soft_opc': nan} step=4248




2022-04-20 16:35.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163518/model_4248.pt


Epoch 25/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:35.42 [info     ] FQE_20220420163518: epoch=25 step=4425 epoch=25 metrics={'time_sample_batch': 0.00016212867478192862, 'time_algorithm_update': 0.0042242141766736735, 'loss': 0.008677665674322321, 'time_step': 0.004456188719151384, 'init_value': -2.3591225147247314, 'ave_value': -1.620511052737365, 'soft_opc': nan} step=4425




2022-04-20 16:35.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163518/model_4425.pt


Epoch 26/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:35.43 [info     ] FQE_20220420163518: epoch=26 step=4602 epoch=26 metrics={'time_sample_batch': 0.000163370606589452, 'time_algorithm_update': 0.00506812704485015, 'loss': 0.009225735434930835, 'time_step': 0.00530335189258985, 'init_value': -2.410515069961548, 'ave_value': -1.6684220283597082, 'soft_opc': nan} step=4602




2022-04-20 16:35.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163518/model_4602.pt


Epoch 27/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:35.44 [info     ] FQE_20220420163518: epoch=27 step=4779 epoch=27 metrics={'time_sample_batch': 0.00016678659255895238, 'time_algorithm_update': 0.005012036716870669, 'loss': 0.00936922415009684, 'time_step': 0.005250121240561964, 'init_value': -2.5231997966766357, 'ave_value': -1.740462910950005, 'soft_opc': nan} step=4779




2022-04-20 16:35.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163518/model_4779.pt


Epoch 28/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:35.45 [info     ] FQE_20220420163518: epoch=28 step=4956 epoch=28 metrics={'time_sample_batch': 0.00016885692790403204, 'time_algorithm_update': 0.005065620282275528, 'loss': 0.0099292154967164, 'time_step': 0.005307095198981506, 'init_value': -2.6237118244171143, 'ave_value': -1.8314249598407173, 'soft_opc': nan} step=4956




2022-04-20 16:35.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163518/model_4956.pt


Epoch 29/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:35.46 [info     ] FQE_20220420163518: epoch=29 step=5133 epoch=29 metrics={'time_sample_batch': 0.00016406161636955995, 'time_algorithm_update': 0.0051440357488427456, 'loss': 0.01072238361078846, 'time_step': 0.0053844034335034045, 'init_value': -2.7779648303985596, 'ave_value': -1.902707238920458, 'soft_opc': nan} step=5133




2022-04-20 16:35.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163518/model_5133.pt


Epoch 30/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:35.47 [info     ] FQE_20220420163518: epoch=30 step=5310 epoch=30 metrics={'time_sample_batch': 0.00016273078271898173, 'time_algorithm_update': 0.004977778526349256, 'loss': 0.010736438610833033, 'time_step': 0.00521573643226408, 'init_value': -2.8772029876708984, 'ave_value': -2.0173171133787426, 'soft_opc': nan} step=5310




2022-04-20 16:35.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163518/model_5310.pt


Epoch 31/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:35.48 [info     ] FQE_20220420163518: epoch=31 step=5487 epoch=31 metrics={'time_sample_batch': 0.00016266882082836777, 'time_algorithm_update': 0.005075671578531211, 'loss': 0.011337014280212254, 'time_step': 0.005310470775022345, 'init_value': -2.954063653945923, 'ave_value': -2.0420349465207654, 'soft_opc': nan} step=5487




2022-04-20 16:35.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163518/model_5487.pt


Epoch 32/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:35.50 [info     ] FQE_20220420163518: epoch=32 step=5664 epoch=32 metrics={'time_sample_batch': 0.00016532240614379194, 'time_algorithm_update': 0.005147893550032276, 'loss': 0.012172741686535926, 'time_step': 0.00539007159949696, 'init_value': -3.1270782947540283, 'ave_value': -2.1525066066969623, 'soft_opc': nan} step=5664




2022-04-20 16:35.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163518/model_5664.pt


Epoch 33/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:35.51 [info     ] FQE_20220420163518: epoch=33 step=5841 epoch=33 metrics={'time_sample_batch': 0.00016429599395579537, 'time_algorithm_update': 0.005128465803329554, 'loss': 0.012749561428955341, 'time_step': 0.0053670999020506436, 'init_value': -3.188990354537964, 'ave_value': -2.1940787021641257, 'soft_opc': nan} step=5841




2022-04-20 16:35.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163518/model_5841.pt


Epoch 34/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:35.51 [info     ] FQE_20220420163518: epoch=34 step=6018 epoch=34 metrics={'time_sample_batch': 0.00015920973093496205, 'time_algorithm_update': 0.004142528199880136, 'loss': 0.013544752069822603, 'time_step': 0.004374587603208036, 'init_value': -3.3796546459198, 'ave_value': -2.3139887633266394, 'soft_opc': nan} step=6018




2022-04-20 16:35.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163518/model_6018.pt


Epoch 35/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:35.52 [info     ] FQE_20220420163518: epoch=35 step=6195 epoch=35 metrics={'time_sample_batch': 0.00016250852811134468, 'time_algorithm_update': 0.005075477610873637, 'loss': 0.014091854181672093, 'time_step': 0.0053108627513303594, 'init_value': -3.453930139541626, 'ave_value': -2.3652777773302955, 'soft_opc': nan} step=6195




2022-04-20 16:35.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163518/model_6195.pt


Epoch 36/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:35.53 [info     ] FQE_20220420163518: epoch=36 step=6372 epoch=36 metrics={'time_sample_batch': 0.000164095291310111, 'time_algorithm_update': 0.005107615627137955, 'loss': 0.015420517650706494, 'time_step': 0.005348613706685729, 'init_value': -3.581982374191284, 'ave_value': -2.4391682012661082, 'soft_opc': nan} step=6372




2022-04-20 16:35.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163518/model_6372.pt


Epoch 37/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:35.54 [info     ] FQE_20220420163518: epoch=37 step=6549 epoch=37 metrics={'time_sample_batch': 0.00016888790884933902, 'time_algorithm_update': 0.0050717598974368, 'loss': 0.016188247466070504, 'time_step': 0.005315984036289366, 'init_value': -3.645418167114258, 'ave_value': -2.4558721353878847, 'soft_opc': nan} step=6549




2022-04-20 16:35.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163518/model_6549.pt


Epoch 38/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:35.55 [info     ] FQE_20220420163518: epoch=38 step=6726 epoch=38 metrics={'time_sample_batch': 0.00016670038471114165, 'time_algorithm_update': 0.005062196214320296, 'loss': 0.01685261668763676, 'time_step': 0.005303766767857439, 'init_value': -3.8245556354522705, 'ave_value': -2.5866432532354877, 'soft_opc': nan} step=6726




2022-04-20 16:35.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163518/model_6726.pt


Epoch 39/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:35.56 [info     ] FQE_20220420163518: epoch=39 step=6903 epoch=39 metrics={'time_sample_batch': 0.00016258261298055703, 'time_algorithm_update': 0.0050416221726412155, 'loss': 0.017260742704594496, 'time_step': 0.005279790210185078, 'init_value': -3.8859474658966064, 'ave_value': -2.626513373574337, 'soft_opc': nan} step=6903




2022-04-20 16:35.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163518/model_6903.pt


Epoch 40/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:35.57 [info     ] FQE_20220420163518: epoch=40 step=7080 epoch=40 metrics={'time_sample_batch': 0.00016703174612616416, 'time_algorithm_update': 0.004936743590791347, 'loss': 0.018120998958662406, 'time_step': 0.0051783249203094656, 'init_value': -3.972722291946411, 'ave_value': -2.696437721343728, 'soft_opc': nan} step=7080




2022-04-20 16:35.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163518/model_7080.pt


Epoch 41/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:35.58 [info     ] FQE_20220420163518: epoch=41 step=7257 epoch=41 metrics={'time_sample_batch': 0.00016578712032339668, 'time_algorithm_update': 0.004999550048914333, 'loss': 0.018568213900819548, 'time_step': 0.005246221682446151, 'init_value': -4.069239139556885, 'ave_value': -2.7141696603627534, 'soft_opc': nan} step=7257




2022-04-20 16:35.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163518/model_7257.pt


Epoch 42/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:36.00 [info     ] FQE_20220420163518: epoch=42 step=7434 epoch=42 metrics={'time_sample_batch': 0.0001671368119406835, 'time_algorithm_update': 0.005180252473906609, 'loss': 0.0188905792363451, 'time_step': 0.005423700742128878, 'init_value': -4.19399881362915, 'ave_value': -2.7911394154166316, 'soft_opc': nan} step=7434




2022-04-20 16:36.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163518/model_7434.pt


Epoch 43/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:36.00 [info     ] FQE_20220420163518: epoch=43 step=7611 epoch=43 metrics={'time_sample_batch': 0.00016439567177982652, 'time_algorithm_update': 0.004162766839151328, 'loss': 0.020375603754704787, 'time_step': 0.004403704303806111, 'init_value': -4.301497459411621, 'ave_value': -2.899525691107945, 'soft_opc': nan} step=7611




2022-04-20 16:36.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163518/model_7611.pt


Epoch 44/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:36.01 [info     ] FQE_20220420163518: epoch=44 step=7788 epoch=44 metrics={'time_sample_batch': 0.00017226348489017809, 'time_algorithm_update': 0.005127050108828787, 'loss': 0.020438487673083604, 'time_step': 0.005373933220987266, 'init_value': -4.391417980194092, 'ave_value': -2.9635272557671004, 'soft_opc': nan} step=7788




2022-04-20 16:36.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163518/model_7788.pt


Epoch 45/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:36.02 [info     ] FQE_20220420163518: epoch=45 step=7965 epoch=45 metrics={'time_sample_batch': 0.0001661481156861041, 'time_algorithm_update': 0.005124680740011614, 'loss': 0.02183436023171381, 'time_step': 0.005364416682787534, 'init_value': -4.576478958129883, 'ave_value': -3.088899744451941, 'soft_opc': nan} step=7965




2022-04-20 16:36.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163518/model_7965.pt


Epoch 46/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:36.03 [info     ] FQE_20220420163518: epoch=46 step=8142 epoch=46 metrics={'time_sample_batch': 0.00016913036842130673, 'time_algorithm_update': 0.005128875290606655, 'loss': 0.02188867078844121, 'time_step': 0.005374803381451106, 'init_value': -4.673155307769775, 'ave_value': -3.1497903427204212, 'soft_opc': nan} step=8142




2022-04-20 16:36.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163518/model_8142.pt


Epoch 47/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:36.04 [info     ] FQE_20220420163518: epoch=47 step=8319 epoch=47 metrics={'time_sample_batch': 0.00017078044050830904, 'time_algorithm_update': 0.005038311252486234, 'loss': 0.02211791912809351, 'time_step': 0.0052841369715114096, 'init_value': -4.723025798797607, 'ave_value': -3.1511920327717835, 'soft_opc': nan} step=8319




2022-04-20 16:36.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163518/model_8319.pt


Epoch 48/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:36.05 [info     ] FQE_20220420163518: epoch=48 step=8496 epoch=48 metrics={'time_sample_batch': 0.000166238364526781, 'time_algorithm_update': 0.005151904908950719, 'loss': 0.022644128332751156, 'time_step': 0.005393437746554445, 'init_value': -4.7733049392700195, 'ave_value': -3.161566103471292, 'soft_opc': nan} step=8496




2022-04-20 16:36.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163518/model_8496.pt


Epoch 49/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:36.06 [info     ] FQE_20220420163518: epoch=49 step=8673 epoch=49 metrics={'time_sample_batch': 0.00017325487514000155, 'time_algorithm_update': 0.0051186879475911455, 'loss': 0.024333411573805967, 'time_step': 0.005367293869708217, 'init_value': -4.839611530303955, 'ave_value': -3.2115328793232147, 'soft_opc': nan} step=8673




2022-04-20 16:36.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163518/model_8673.pt


Epoch 50/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:36.08 [info     ] FQE_20220420163518: epoch=50 step=8850 epoch=50 metrics={'time_sample_batch': 0.0001715105132194562, 'time_algorithm_update': 0.005109691350473522, 'loss': 0.025134496632200472, 'time_step': 0.0053567414903371345, 'init_value': -4.861858367919922, 'ave_value': -3.201112996243142, 'soft_opc': nan} step=8850




2022-04-20 16:36.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163518/model_8850.pt
start
[ 0.00000000e+00  7.95731469e+08  1.43210892e-01 -3.25999953e-02
 -1.38000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.28049971e-01  6.00000000e-01  4.67532035e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.87189108e-01  2.46000047e-02
 -8.00013420e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.61927906e-02  2.08952959e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.53789108e-01  1.32000047e-02
  8.79998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -9.71586383e-02 -6.00000000e-01 -5.33093061e-02]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.39489108e-01 -4.75999953e-02
 -9.30001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  1.41298638e-01  5.43892365e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.9573146

2022-04-20 16:36.08 [info     ] Directory is created at d3rlpy_logs/FQE_20220420163608
2022-04-20 16:36.08 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 16:36.08 [debug    ] Building models...
2022-04-20 16:36.08 [debug    ] Models have been built.
2022-04-20 16:36.08 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420163608/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 16:36.10 [info     ] FQE_20220420163608: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00016972076061160066, 'time_algorithm_update': 0.004643820052923158, 'loss': 0.02798219020261841, 'time_step': 0.004889831293460934, 'init_value': -0.9689867496490479, 'ave_value': -0.9370307295641921, 'soft_opc': nan} step=344




2022-04-20 16:36.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163608/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:36.12 [info     ] FQE_20220420163608: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00017275505287702694, 'time_algorithm_update': 0.005039111819378165, 'loss': 0.021846462777534197, 'time_step': 0.005285960297251857, 'init_value': -1.393388032913208, 'ave_value': -1.3089820651136137, 'soft_opc': nan} step=688




2022-04-20 16:36.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163608/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:36.14 [info     ] FQE_20220420163608: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00017107434050981388, 'time_algorithm_update': 0.005152771639269452, 'loss': 0.025661425084298, 'time_step': 0.005399880714194719, 'init_value': -2.0814390182495117, 'ave_value': -1.9032586270978942, 'soft_opc': nan} step=1032




2022-04-20 16:36.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163608/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:36.16 [info     ] FQE_20220420163608: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00017308634380961574, 'time_algorithm_update': 0.005087308412374452, 'loss': 0.03238705635005825, 'time_step': 0.005337065042451371, 'init_value': -2.4735374450683594, 'ave_value': -2.179136315396324, 'soft_opc': nan} step=1376




2022-04-20 16:36.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163608/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:36.17 [info     ] FQE_20220420163608: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00016643418822177622, 'time_algorithm_update': 0.004753302696139314, 'loss': 0.04465958774414693, 'time_step': 0.004998878684154776, 'init_value': -3.1281023025512695, 'ave_value': -2.6507937068733187, 'soft_opc': nan} step=1720




2022-04-20 16:36.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163608/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:36.19 [info     ] FQE_20220420163608: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.0001695100651230923, 'time_algorithm_update': 0.00501228834307471, 'loss': 0.0555156747171612, 'time_step': 0.0052566743174264595, 'init_value': -3.7565698623657227, 'ave_value': -3.125559798484618, 'soft_opc': nan} step=2064




2022-04-20 16:36.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163608/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:36.21 [info     ] FQE_20220420163608: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00016957382823145665, 'time_algorithm_update': 0.0050381352735120195, 'loss': 0.07143255797767102, 'time_step': 0.005285994258037833, 'init_value': -4.387904167175293, 'ave_value': -3.551009099557786, 'soft_opc': nan} step=2408




2022-04-20 16:36.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163608/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:36.23 [info     ] FQE_20220420163608: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.0001728091129036837, 'time_algorithm_update': 0.005132649527039639, 'loss': 0.08815082237341029, 'time_step': 0.005380042763643487, 'init_value': -4.82936954498291, 'ave_value': -3.791533167080467, 'soft_opc': nan} step=2752




2022-04-20 16:36.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163608/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:36.25 [info     ] FQE_20220420163608: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00017053512639777604, 'time_algorithm_update': 0.00507097604662873, 'loss': 0.1044827455676399, 'time_step': 0.0053199786086415134, 'init_value': -5.242002487182617, 'ave_value': -4.058197567053244, 'soft_opc': nan} step=3096




2022-04-20 16:36.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163608/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:36.27 [info     ] FQE_20220420163608: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00017072295033654502, 'time_algorithm_update': 0.004618347384208857, 'loss': 0.1273470021632695, 'time_step': 0.0048583385556243185, 'init_value': -5.960742950439453, 'ave_value': -4.673880541623939, 'soft_opc': nan} step=3440




2022-04-20 16:36.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163608/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:36.29 [info     ] FQE_20220420163608: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00016982680143311967, 'time_algorithm_update': 0.005052704450696013, 'loss': 0.14771056806582006, 'time_step': 0.005299750455590182, 'init_value': -6.174318313598633, 'ave_value': -4.82316705817493, 'soft_opc': nan} step=3784




2022-04-20 16:36.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163608/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:36.31 [info     ] FQE_20220420163608: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00017299901607424715, 'time_algorithm_update': 0.005099183598230052, 'loss': 0.16665291391464687, 'time_step': 0.005347822987756064, 'init_value': -6.6428093910217285, 'ave_value': -5.257695559996311, 'soft_opc': nan} step=4128




2022-04-20 16:36.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163608/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:36.33 [info     ] FQE_20220420163608: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00016858827236086824, 'time_algorithm_update': 0.005089565071948739, 'loss': 0.1877167798026443, 'time_step': 0.005335607500963433, 'init_value': -7.113715648651123, 'ave_value': -5.773857117551148, 'soft_opc': nan} step=4472




2022-04-20 16:36.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163608/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:36.35 [info     ] FQE_20220420163608: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00017394575961800508, 'time_algorithm_update': 0.00488558896752291, 'loss': 0.20858235739518044, 'time_step': 0.005135145991347557, 'init_value': -7.4346771240234375, 'ave_value': -6.233827241940365, 'soft_opc': nan} step=4816




2022-04-20 16:36.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163608/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:36.37 [info     ] FQE_20220420163608: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00016913303109102471, 'time_algorithm_update': 0.004854040783505107, 'loss': 0.22547311333094744, 'time_step': 0.005096938720969267, 'init_value': -7.527358055114746, 'ave_value': -6.452237846566624, 'soft_opc': nan} step=5160




2022-04-20 16:36.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163608/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:36.39 [info     ] FQE_20220420163608: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00017177088316096815, 'time_algorithm_update': 0.005094684833704039, 'loss': 0.23957562729781276, 'time_step': 0.0053444636422534325, 'init_value': -7.860609531402588, 'ave_value': -6.9501004574796275, 'soft_opc': nan} step=5504




2022-04-20 16:36.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163608/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:36.41 [info     ] FQE_20220420163608: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.0001704720563666765, 'time_algorithm_update': 0.005091807869977729, 'loss': 0.2610009260161585, 'time_step': 0.005339808935342833, 'init_value': -7.820644378662109, 'ave_value': -7.074372259736573, 'soft_opc': nan} step=5848




2022-04-20 16:36.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163608/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:36.43 [info     ] FQE_20220420163608: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00017154493997263354, 'time_algorithm_update': 0.0050498725369919175, 'loss': 0.2713145274170783, 'time_step': 0.005297517360642899, 'init_value': -7.877854824066162, 'ave_value': -7.193244596595481, 'soft_opc': nan} step=6192




2022-04-20 16:36.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163608/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:36.45 [info     ] FQE_20220420163608: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.0001661507196204607, 'time_algorithm_update': 0.004569472961647566, 'loss': 0.2845076408107267, 'time_step': 0.004810743553693904, 'init_value': -8.079304695129395, 'ave_value': -7.572803944869906, 'soft_opc': nan} step=6536




2022-04-20 16:36.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163608/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:36.47 [info     ] FQE_20220420163608: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00017272525055463925, 'time_algorithm_update': 0.0050930152105730635, 'loss': 0.2962225776566409, 'time_step': 0.005344381859136182, 'init_value': -8.17198371887207, 'ave_value': -7.662587752974349, 'soft_opc': nan} step=6880




2022-04-20 16:36.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163608/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:36.49 [info     ] FQE_20220420163608: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00017038472863130792, 'time_algorithm_update': 0.0050563479578772256, 'loss': 0.3009229714895577, 'time_step': 0.005302793064782786, 'init_value': -8.450275421142578, 'ave_value': -8.013848617540773, 'soft_opc': nan} step=7224




2022-04-20 16:36.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163608/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:36.51 [info     ] FQE_20220420163608: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.0001686658970145292, 'time_algorithm_update': 0.005077599786048712, 'loss': 0.30951388038391636, 'time_step': 0.005324099646058194, 'init_value': -8.796159744262695, 'ave_value': -8.321343739941792, 'soft_opc': nan} step=7568




2022-04-20 16:36.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163608/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:36.53 [info     ] FQE_20220420163608: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00017217148182003996, 'time_algorithm_update': 0.005102817402329556, 'loss': 0.32071731309947926, 'time_step': 0.005354656729587289, 'init_value': -8.896228790283203, 'ave_value': -8.400002765646079, 'soft_opc': nan} step=7912




2022-04-20 16:36.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163608/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:36.55 [info     ] FQE_20220420163608: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.0001693125381026157, 'time_algorithm_update': 0.0047707425993542335, 'loss': 0.3223054424296458, 'time_step': 0.005015608183173246, 'init_value': -9.607644081115723, 'ave_value': -8.967042123767136, 'soft_opc': nan} step=8256




2022-04-20 16:36.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163608/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:36.57 [info     ] FQE_20220420163608: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00017161840616270553, 'time_algorithm_update': 0.0051133320775142935, 'loss': 0.3227706885041106, 'time_step': 0.005363067222196002, 'init_value': -9.759566307067871, 'ave_value': -8.982497708105088, 'soft_opc': nan} step=8600




2022-04-20 16:36.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163608/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:36.59 [info     ] FQE_20220420163608: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00017091978427975676, 'time_algorithm_update': 0.005158812500709711, 'loss': 0.33394726600801183, 'time_step': 0.005407409612522568, 'init_value': -10.230616569519043, 'ave_value': -9.314838289249655, 'soft_opc': nan} step=8944




2022-04-20 16:36.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163608/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:37.01 [info     ] FQE_20220420163608: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00016909006030060524, 'time_algorithm_update': 0.005128504924995955, 'loss': 0.3444046232481162, 'time_step': 0.005374742108722066, 'init_value': -10.719181060791016, 'ave_value': -9.698995881262109, 'soft_opc': nan} step=9288




2022-04-20 16:37.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163608/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:37.02 [info     ] FQE_20220420163608: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.0001654673454373382, 'time_algorithm_update': 0.004574623911879783, 'loss': 0.34919367006135194, 'time_step': 0.004815571529920711, 'init_value': -11.200586318969727, 'ave_value': -9.987948140603079, 'soft_opc': nan} step=9632




2022-04-20 16:37.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163608/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:37.04 [info     ] FQE_20220420163608: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00016990719839583997, 'time_algorithm_update': 0.005078784255094306, 'loss': 0.3517968022362004, 'time_step': 0.0053246340086293776, 'init_value': -11.527933120727539, 'ave_value': -10.132774210893256, 'soft_opc': nan} step=9976




2022-04-20 16:37.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163608/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:37.06 [info     ] FQE_20220420163608: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00017007215078486952, 'time_algorithm_update': 0.005028172980907352, 'loss': 0.36060489931815237, 'time_step': 0.005275223144265108, 'init_value': -12.330306053161621, 'ave_value': -10.666143234491113, 'soft_opc': nan} step=10320




2022-04-20 16:37.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163608/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:37.08 [info     ] FQE_20220420163608: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00017226366109626237, 'time_algorithm_update': 0.0050280288208362665, 'loss': 0.35479982226523893, 'time_step': 0.005277677330859872, 'init_value': -12.433906555175781, 'ave_value': -10.616007971648663, 'soft_opc': nan} step=10664




2022-04-20 16:37.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163608/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:37.10 [info     ] FQE_20220420163608: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.0001723288103591564, 'time_algorithm_update': 0.005136241746503253, 'loss': 0.3631025505804478, 'time_step': 0.005387022744777591, 'init_value': -13.074249267578125, 'ave_value': -10.956126378742592, 'soft_opc': nan} step=11008




2022-04-20 16:37.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163608/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:37.12 [info     ] FQE_20220420163608: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00017090245734813602, 'time_algorithm_update': 0.00466920816621115, 'loss': 0.37037977258414895, 'time_step': 0.004919713319734086, 'init_value': -13.568031311035156, 'ave_value': -11.139166027179769, 'soft_opc': nan} step=11352




2022-04-20 16:37.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163608/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:37.14 [info     ] FQE_20220420163608: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00017089067503463392, 'time_algorithm_update': 0.00506646896517554, 'loss': 0.3745035658276445, 'time_step': 0.005315389050993808, 'init_value': -13.93710708618164, 'ave_value': -11.307777899635436, 'soft_opc': nan} step=11696




2022-04-20 16:37.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163608/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:37.16 [info     ] FQE_20220420163608: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00016851549924806107, 'time_algorithm_update': 0.005022755889005439, 'loss': 0.3816715817801048, 'time_step': 0.00526842197706533, 'init_value': -14.38105297088623, 'ave_value': -11.462839856255707, 'soft_opc': nan} step=12040




2022-04-20 16:37.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163608/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:37.18 [info     ] FQE_20220420163608: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.0001716454361760339, 'time_algorithm_update': 0.005046466755312543, 'loss': 0.38163013294945625, 'time_step': 0.005294716635415721, 'init_value': -14.47059440612793, 'ave_value': -11.396061975072634, 'soft_opc': nan} step=12384




2022-04-20 16:37.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163608/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:37.20 [info     ] FQE_20220420163608: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00016598923261775526, 'time_algorithm_update': 0.0045474102330762285, 'loss': 0.3728345055945304, 'time_step': 0.0047889934029690055, 'init_value': -14.764688491821289, 'ave_value': -11.622885199295576, 'soft_opc': nan} step=12728




2022-04-20 16:37.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163608/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:37.22 [info     ] FQE_20220420163608: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.0001693721427473911, 'time_algorithm_update': 0.005006668872611467, 'loss': 0.37314234867280477, 'time_step': 0.005253020414086275, 'init_value': -14.86683464050293, 'ave_value': -11.538614577905753, 'soft_opc': nan} step=13072




2022-04-20 16:37.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163608/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:37.24 [info     ] FQE_20220420163608: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00016957521438598633, 'time_algorithm_update': 0.005099245975183886, 'loss': 0.38127003318825087, 'time_step': 0.005346338416254798, 'init_value': -15.32378101348877, 'ave_value': -11.815745682098173, 'soft_opc': nan} step=13416




2022-04-20 16:37.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163608/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:37.26 [info     ] FQE_20220420163608: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00016952669897744822, 'time_algorithm_update': 0.005047297754953074, 'loss': 0.38026564494100246, 'time_step': 0.005293044239975685, 'init_value': -15.493812561035156, 'ave_value': -11.734126765442475, 'soft_opc': nan} step=13760




2022-04-20 16:37.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163608/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:37.28 [info     ] FQE_20220420163608: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00016699419465175894, 'time_algorithm_update': 0.0050406449062879695, 'loss': 0.38439181324116195, 'time_step': 0.005282984223476676, 'init_value': -15.836921691894531, 'ave_value': -12.0553699998915, 'soft_opc': nan} step=14104




2022-04-20 16:37.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163608/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:37.29 [info     ] FQE_20220420163608: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.0001638178215470425, 'time_algorithm_update': 0.003384011429409648, 'loss': 0.38793827866225744, 'time_step': 0.003618451051933821, 'init_value': -15.825457572937012, 'ave_value': -11.964770229867248, 'soft_opc': nan} step=14448




2022-04-20 16:37.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163608/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:37.31 [info     ] FQE_20220420163608: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00016436881797258243, 'time_algorithm_update': 0.003535690002663191, 'loss': 0.38916251079590863, 'time_step': 0.003775424735490666, 'init_value': -16.033809661865234, 'ave_value': -11.880760494394748, 'soft_opc': nan} step=14792




2022-04-20 16:37.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163608/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:37.32 [info     ] FQE_20220420163608: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00016394812007283055, 'time_algorithm_update': 0.003513013900712479, 'loss': 0.39089943304506325, 'time_step': 0.003751181585844173, 'init_value': -16.304052352905273, 'ave_value': -12.147330563021656, 'soft_opc': nan} step=15136




2022-04-20 16:37.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163608/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:37.33 [info     ] FQE_20220420163608: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00016509377679159475, 'time_algorithm_update': 0.0035586204639700956, 'loss': 0.3908331666715703, 'time_step': 0.003799527883529663, 'init_value': -16.356765747070312, 'ave_value': -12.130610335657062, 'soft_opc': nan} step=15480




2022-04-20 16:37.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163608/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:37.35 [info     ] FQE_20220420163608: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00016677102377248365, 'time_algorithm_update': 0.003583701544029768, 'loss': 0.39748342232886963, 'time_step': 0.0038267886915872265, 'init_value': -16.671756744384766, 'ave_value': -12.421563996124785, 'soft_opc': nan} step=15824




2022-04-20 16:37.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163608/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:37.36 [info     ] FQE_20220420163608: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.0001666317152422528, 'time_algorithm_update': 0.003522442523823228, 'loss': 0.4084515977261025, 'time_step': 0.0037637131158695662, 'init_value': -17.067363739013672, 'ave_value': -12.85338011917675, 'soft_opc': nan} step=16168




2022-04-20 16:37.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163608/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:37.38 [info     ] FQE_20220420163608: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00016408604244853175, 'time_algorithm_update': 0.0034565967182780422, 'loss': 0.4130770841499704, 'time_step': 0.0036920690259268116, 'init_value': -17.357528686523438, 'ave_value': -13.045415640907647, 'soft_opc': nan} step=16512




2022-04-20 16:37.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163608/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:37.39 [info     ] FQE_20220420163608: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00016538279001102892, 'time_algorithm_update': 0.003543250089467958, 'loss': 0.42310714568444635, 'time_step': 0.003780573606491089, 'init_value': -17.24016761779785, 'ave_value': -12.879035757289076, 'soft_opc': nan} step=16856




2022-04-20 16:37.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163608/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:37.40 [info     ] FQE_20220420163608: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00016836163609526878, 'time_algorithm_update': 0.003504837668219278, 'loss': 0.4158142281988592, 'time_step': 0.003750285436940748, 'init_value': -17.29956817626953, 'ave_value': -12.793563566404721, 'soft_opc': nan} step=17200




2022-04-20 16:37.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163608/model_17200.pt
search iteration:  14
using hyper params:  [0.005714149903860868, 0.009661977818509417, 2.7950952676642017e-05, 7]
2022-04-20 16:37.40 [debug    ] RoundIterator is selected.
2022-04-20 16:37.40 [info     ] Directory is created at d3rlpy_logs/TD3PlusBC_20220420163740
2022-04-20 16:37.40 [debug    ] Fitting scaler...              scaler=standard
2022-04-20 16:37.41 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 16:37.41 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 16:37.41 [info     ] Parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163740/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0057141499038

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:37.44 [info     ] TD3PlusBC_20220420163740: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.0003978495012249863, 'time_algorithm_update': 0.006839512384425827, 'critic_loss': 9.027647707197401, 'actor_loss': 2.646172276714392, 'time_step': 0.007319363237124438, 'td_error': 1.0333923131452623, 'init_value': -11.427597999572754, 'ave_value': -7.244679731801572} step=342
2022-04-20 16:37.44 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163740/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:37.46 [info     ] TD3PlusBC_20220420163740: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.00039833958385980615, 'time_algorithm_update': 0.006810588446276927, 'critic_loss': 6.764085223103128, 'actor_loss': 2.5791324760481626, 'time_step': 0.007287308486581546, 'td_error': 1.2820734877704443, 'init_value': -15.96491813659668, 'ave_value': -10.223863281574738} step=684
2022-04-20 16:37.47 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163740/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:37.49 [info     ] TD3PlusBC_20220420163740: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.0004003361651771947, 'time_algorithm_update': 0.006828781456975212, 'critic_loss': 10.577735785155268, 'actor_loss': 2.5711800550159656, 'time_step': 0.0072993325908281646, 'td_error': 1.6799772071349448, 'init_value': -21.00284194946289, 'ave_value': -13.67450241855522} step=1026
2022-04-20 16:37.49 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163740/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:37.52 [info     ] TD3PlusBC_20220420163740: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.0004042442779094852, 'time_algorithm_update': 0.006899357539171364, 'critic_loss': 14.828564491885448, 'actor_loss': 2.5694291814726, 'time_step': 0.007372223145780507, 'td_error': 2.065562490863673, 'init_value': -26.113941192626953, 'ave_value': -16.967839808418994} step=1368
2022-04-20 16:37.52 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163740/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:37.55 [info     ] TD3PlusBC_20220420163740: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.0003976612760309587, 'time_algorithm_update': 0.006854101928354007, 'critic_loss': 19.25040610631307, 'actor_loss': 2.56624896763361, 'time_step': 0.007323937109339307, 'td_error': 2.5562489193800353, 'init_value': -29.9866943359375, 'ave_value': -19.682258646626714} step=1710
2022-04-20 16:37.55 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163740/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:37.58 [info     ] TD3PlusBC_20220420163740: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.00039895166430556985, 'time_algorithm_update': 0.006790015432569716, 'critic_loss': 24.16864538471601, 'actor_loss': 2.564810372235482, 'time_step': 0.007266112935473347, 'td_error': 2.8873510860238167, 'init_value': -34.061668395996094, 'ave_value': -22.395493095637477} step=2052
2022-04-20 16:37.58 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163740/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:38.01 [info     ] TD3PlusBC_20220420163740: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.0004066152182238841, 'time_algorithm_update': 0.006847413659792895, 'critic_loss': 29.04716137835854, 'actor_loss': 2.5633143514220476, 'time_step': 0.007332646358779996, 'td_error': 3.44118219786216, 'init_value': -39.19041442871094, 'ave_value': -25.61421205648288} step=2394
2022-04-20 16:38.01 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163740/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:38.04 [info     ] TD3PlusBC_20220420163740: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.00040198906123289587, 'time_algorithm_update': 0.006774601880569904, 'critic_loss': 34.46820096244589, 'actor_loss': 2.56153010346039, 'time_step': 0.00725144670720686, 'td_error': 4.0387324171065035, 'init_value': -44.43767547607422, 'ave_value': -28.541968552532044} step=2736
2022-04-20 16:38.04 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163740/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:38.07 [info     ] TD3PlusBC_20220420163740: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.00040149549294633477, 'time_algorithm_update': 0.0068615688915141144, 'critic_loss': 40.01288251151816, 'actor_loss': 2.5618470230994865, 'time_step': 0.007337262058815761, 'td_error': 4.625416988686915, 'init_value': -48.04658126831055, 'ave_value': -31.12929641966582} step=3078
2022-04-20 16:38.07 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163740/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:38.10 [info     ] TD3PlusBC_20220420163740: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.00039484277803298327, 'time_algorithm_update': 0.006533263022439522, 'critic_loss': 45.54425913269757, 'actor_loss': 2.560630994930602, 'time_step': 0.007004210823460629, 'td_error': 5.241360674865302, 'init_value': -51.25609588623047, 'ave_value': -33.52660799123682} step=3420
2022-04-20 16:38.10 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163740/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:38.13 [info     ] TD3PlusBC_20220420163740: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.0004010151701363904, 'time_algorithm_update': 0.006833745025054753, 'critic_loss': 51.60549317187036, 'actor_loss': 2.5608624031669214, 'time_step': 0.007311590930871796, 'td_error': 5.759982325220081, 'init_value': -55.16120529174805, 'ave_value': -35.85889045423526} step=3762
2022-04-20 16:38.13 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163740/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:38.15 [info     ] TD3PlusBC_20220420163740: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.0004009593997085304, 'time_algorithm_update': 0.006819237742507667, 'critic_loss': 57.1975558431525, 'actor_loss': 2.561328157346848, 'time_step': 0.0072959326861197485, 'td_error': 6.585742406121005, 'init_value': -59.58009719848633, 'ave_value': -38.68058897464838} step=4104
2022-04-20 16:38.15 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163740/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:38.18 [info     ] TD3PlusBC_20220420163740: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.0004032034622995477, 'time_algorithm_update': 0.006884294643736722, 'critic_loss': 63.035740712929886, 'actor_loss': 2.560278263705516, 'time_step': 0.007362666185836346, 'td_error': 7.008369555014119, 'init_value': -61.595420837402344, 'ave_value': -40.25583949937888} step=4446
2022-04-20 16:38.18 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163740/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:38.21 [info     ] TD3PlusBC_20220420163740: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.0004060149889940407, 'time_algorithm_update': 0.006891702350817229, 'critic_loss': 69.31986624734444, 'actor_loss': 2.559851071987933, 'time_step': 0.007376901587547615, 'td_error': 7.6949157076697405, 'init_value': -65.81141662597656, 'ave_value': -42.40688246367331} step=4788
2022-04-20 16:38.21 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163740/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:38.24 [info     ] TD3PlusBC_20220420163740: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.0003690266469765825, 'time_algorithm_update': 0.006372225911993729, 'critic_loss': 74.58022290503072, 'actor_loss': 2.5597278820840934, 'time_step': 0.0068135010568719165, 'td_error': 8.367005019157038, 'init_value': -66.85456085205078, 'ave_value': -44.36432000636467} step=5130
2022-04-20 16:38.24 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163740/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:38.27 [info     ] TD3PlusBC_20220420163740: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.00034857423681961864, 'time_algorithm_update': 0.006070949877911841, 'critic_loss': 80.3582397929409, 'actor_loss': 2.5606151854085644, 'time_step': 0.006486281316879897, 'td_error': 8.34760932962853, 'init_value': -68.98170471191406, 'ave_value': -45.825630101364204} step=5472
2022-04-20 16:38.27 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163740/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:38.29 [info     ] TD3PlusBC_20220420163740: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.0003745778959396987, 'time_algorithm_update': 0.006433429773788006, 'critic_loss': 86.15053051954125, 'actor_loss': 2.560989000643903, 'time_step': 0.006880071428087022, 'td_error': 9.356241936337957, 'init_value': -72.5389175415039, 'ave_value': -47.87199640758386} step=5814
2022-04-20 16:38.29 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163740/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:38.32 [info     ] TD3PlusBC_20220420163740: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.00040105142091449936, 'time_algorithm_update': 0.006855548473826626, 'critic_loss': 91.45346768697102, 'actor_loss': 2.5612654030671593, 'time_step': 0.007333560296666553, 'td_error': 10.243645517725668, 'init_value': -75.29270935058594, 'ave_value': -49.5402333934736} step=6156
2022-04-20 16:38.32 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163740/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:38.35 [info     ] TD3PlusBC_20220420163740: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.0004045844775194313, 'time_algorithm_update': 0.006887253264934696, 'critic_loss': 97.4730961336727, 'actor_loss': 2.5627286420230977, 'time_step': 0.007369988145884018, 'td_error': 10.588204888267239, 'init_value': -76.13042449951172, 'ave_value': -51.15558934214416} step=6498
2022-04-20 16:38.35 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163740/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:38.38 [info     ] TD3PlusBC_20220420163740: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.00040213476147568015, 'time_algorithm_update': 0.006886658612747639, 'critic_loss': 101.98226919787669, 'actor_loss': 2.5635116365220814, 'time_step': 0.007354748876471268, 'td_error': 10.7732184496295, 'init_value': -75.95699310302734, 'ave_value': -52.06033029600474} step=6840
2022-04-20 16:38.38 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163740/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:38.41 [info     ] TD3PlusBC_20220420163740: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.00039883803205880504, 'time_algorithm_update': 0.006833783367223907, 'critic_loss': 107.27526143838091, 'actor_loss': 2.564734182859722, 'time_step': 0.007296854292440136, 'td_error': 11.907530360393412, 'init_value': -76.67684173583984, 'ave_value': -53.165462978480235} step=7182
2022-04-20 16:38.41 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163740/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:38.44 [info     ] TD3PlusBC_20220420163740: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.0003967494295354475, 'time_algorithm_update': 0.006871035921643352, 'critic_loss': 111.87890182182802, 'actor_loss': 2.5655719285819965, 'time_step': 0.007331024833589966, 'td_error': 11.551175348800149, 'init_value': -78.11964416503906, 'ave_value': -54.38502823283057} step=7524
2022-04-20 16:38.44 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163740/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:38.47 [info     ] TD3PlusBC_20220420163740: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.00040356666721098606, 'time_algorithm_update': 0.006870284415127938, 'critic_loss': 117.16027833146659, 'actor_loss': 2.566056708843387, 'time_step': 0.007343198820861459, 'td_error': 13.112288867360807, 'init_value': -80.87709045410156, 'ave_value': -55.960683212991434} step=7866
2022-04-20 16:38.47 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163740/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:38.50 [info     ] TD3PlusBC_20220420163740: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.00040006776999311836, 'time_algorithm_update': 0.006848612723991885, 'critic_loss': 121.52958587735716, 'actor_loss': 2.565689471730015, 'time_step': 0.00731211935567577, 'td_error': 13.336203692711765, 'init_value': -80.23451232910156, 'ave_value': -56.90035824887069} step=8208
2022-04-20 16:38.50 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163740/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:38.53 [info     ] TD3PlusBC_20220420163740: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.00040315675456621496, 'time_algorithm_update': 0.006887387811091908, 'critic_loss': 124.95789870323493, 'actor_loss': 2.566922211507608, 'time_step': 0.007354248336881225, 'td_error': 13.518835857270306, 'init_value': -82.42543029785156, 'ave_value': -58.498501100595035} step=8550
2022-04-20 16:38.53 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163740/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:38.56 [info     ] TD3PlusBC_20220420163740: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.00040061501731649475, 'time_algorithm_update': 0.006858482695462411, 'critic_loss': 128.7098758987516, 'actor_loss': 2.56649324907894, 'time_step': 0.007322282121892561, 'td_error': 13.257930486598624, 'init_value': -80.77561950683594, 'ave_value': -58.40202654574939} step=8892
2022-04-20 16:38.56 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163740/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:38.58 [info     ] TD3PlusBC_20220420163740: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.0003713690049467031, 'time_algorithm_update': 0.006432701969704433, 'critic_loss': 133.09854810837416, 'actor_loss': 2.567575983136718, 'time_step': 0.0068622318624753, 'td_error': 15.1517913005994, 'init_value': -84.0546646118164, 'ave_value': -59.62679903314726} step=9234
2022-04-20 16:38.58 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163740/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:39.01 [info     ] TD3PlusBC_20220420163740: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.00034776486848530014, 'time_algorithm_update': 0.006103620891682586, 'critic_loss': 136.66770754362406, 'actor_loss': 2.5671268541213363, 'time_step': 0.006510430609273632, 'td_error': 15.412921237766135, 'init_value': -81.74147033691406, 'ave_value': -59.95269353919268} step=9576
2022-04-20 16:39.01 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163740/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:39.04 [info     ] TD3PlusBC_20220420163740: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.00038040520852072196, 'time_algorithm_update': 0.006642437120627242, 'critic_loss': 139.66394156740424, 'actor_loss': 2.5685009761163364, 'time_step': 0.007083197783308419, 'td_error': 14.65055278655979, 'init_value': -82.36241149902344, 'ave_value': -60.889488103130645} step=9918
2022-04-20 16:39.04 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163740/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:39.07 [info     ] TD3PlusBC_20220420163740: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.0003987927185861688, 'time_algorithm_update': 0.006873058296783626, 'critic_loss': 143.0638023510314, 'actor_loss': 2.568539583195023, 'time_step': 0.0073353337962725005, 'td_error': 14.84253266428612, 'init_value': -84.80703735351562, 'ave_value': -62.1431328089151} step=10260
2022-04-20 16:39.07 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163740/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:39.10 [info     ] TD3PlusBC_20220420163740: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.0004016920837045413, 'time_algorithm_update': 0.006743654870150382, 'critic_loss': 146.00959766677946, 'actor_loss': 2.5678473885296382, 'time_step': 0.007207033926980537, 'td_error': 15.108007492514323, 'init_value': -84.27813720703125, 'ave_value': -62.55826218831093} step=10602
2022-04-20 16:39.10 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163740/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:39.13 [info     ] TD3PlusBC_20220420163740: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.0004058072441502621, 'time_algorithm_update': 0.006957061806617424, 'critic_loss': 148.76826048733895, 'actor_loss': 2.568498891696595, 'time_step': 0.007426503108955963, 'td_error': 15.767467349650968, 'init_value': -86.14796447753906, 'ave_value': -63.497459694413415} step=10944
2022-04-20 16:39.13 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163740/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:39.16 [info     ] TD3PlusBC_20220420163740: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.0003982064319632904, 'time_algorithm_update': 0.006786561151694136, 'critic_loss': 151.7262021561115, 'actor_loss': 2.5686166872058, 'time_step': 0.007245369124830815, 'td_error': 15.779632565273776, 'init_value': -85.19867706298828, 'ave_value': -63.9152168800165} step=11286
2022-04-20 16:39.16 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163740/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:39.19 [info     ] TD3PlusBC_20220420163740: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.00039550017195138316, 'time_algorithm_update': 0.0068858722497148125, 'critic_loss': 154.35727834422687, 'actor_loss': 2.5675335599665057, 'time_step': 0.007345458917450486, 'td_error': 15.77820875826019, 'init_value': -85.91223907470703, 'ave_value': -64.59584790971684} step=11628
2022-04-20 16:39.19 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163740/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:39.21 [info     ] TD3PlusBC_20220420163740: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.0003960041971931681, 'time_algorithm_update': 0.00679299775619953, 'critic_loss': 156.25091916357565, 'actor_loss': 2.5683027108510337, 'time_step': 0.007252017656962077, 'td_error': 15.72354923491341, 'init_value': -84.93492126464844, 'ave_value': -64.50066021962645} step=11970
2022-04-20 16:39.21 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163740/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:39.24 [info     ] TD3PlusBC_20220420163740: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.0003945973881503992, 'time_algorithm_update': 0.0068540008444535104, 'critic_loss': 158.72164013511255, 'actor_loss': 2.56769873803122, 'time_step': 0.0073066636135703635, 'td_error': 16.496012754546296, 'init_value': -87.63191986083984, 'ave_value': -65.67697702868632} step=12312
2022-04-20 16:39.24 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163740/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:39.27 [info     ] TD3PlusBC_20220420163740: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.00039966064586974026, 'time_algorithm_update': 0.006903029324715598, 'critic_loss': 160.49397768611797, 'actor_loss': 2.567696986839785, 'time_step': 0.007366513648228339, 'td_error': 16.31340700456658, 'init_value': -86.18429565429688, 'ave_value': -65.81827212240832} step=12654
2022-04-20 16:39.27 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163740/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:39.30 [info     ] TD3PlusBC_20220420163740: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.0003978745979175233, 'time_algorithm_update': 0.006734566381800244, 'critic_loss': 162.61663309733072, 'actor_loss': 2.5679041084490324, 'time_step': 0.0071949735719558094, 'td_error': 16.30056946559466, 'init_value': -83.43624114990234, 'ave_value': -65.83547693550705} step=12996
2022-04-20 16:39.30 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163740/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:39.34 [info     ] TD3PlusBC_20220420163740: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.00040111625403688666, 'time_algorithm_update': 0.008245777665523061, 'critic_loss': 163.88964600590936, 'actor_loss': 2.56786488789564, 'time_step': 0.008707457118564181, 'td_error': 16.394575811383284, 'init_value': -85.92501068115234, 'ave_value': -66.81164295366068} step=13338
2022-04-20 16:39.34 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163740/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:39.37 [info     ] TD3PlusBC_20220420163740: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.00040216194955926195, 'time_algorithm_update': 0.009001688650477003, 'critic_loss': 165.87458187794826, 'actor_loss': 2.568502176574796, 'time_step': 0.009468579152871293, 'td_error': 16.587571340901206, 'init_value': -86.7156982421875, 'ave_value': -67.2285370738264} step=13680
2022-04-20 16:39.37 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163740/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:39.41 [info     ] TD3PlusBC_20220420163740: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.00040107582047668814, 'time_algorithm_update': 0.008502115283096046, 'critic_loss': 167.64214191102144, 'actor_loss': 2.567782618148982, 'time_step': 0.008962491799516288, 'td_error': 16.8412172301929, 'init_value': -85.89756774902344, 'ave_value': -67.50793733977554} step=14022
2022-04-20 16:39.41 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163740/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:39.45 [info     ] TD3PlusBC_20220420163740: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.0003987529821563185, 'time_algorithm_update': 0.008850512448807208, 'critic_loss': 168.65874755591676, 'actor_loss': 2.5677116391254446, 'time_step': 0.009319743217780577, 'td_error': 16.43189022580177, 'init_value': -83.61803436279297, 'ave_value': -67.36483888768734} step=14364
2022-04-20 16:39.45 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163740/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:39.48 [info     ] TD3PlusBC_20220420163740: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.00040025878370853894, 'time_algorithm_update': 0.00899367374286317, 'critic_loss': 169.82004897357427, 'actor_loss': 2.567181286058928, 'time_step': 0.009453026174801833, 'td_error': 17.420620749036377, 'init_value': -88.49127197265625, 'ave_value': -68.64901338868934} step=14706
2022-04-20 16:39.49 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163740/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:39.52 [info     ] TD3PlusBC_20220420163740: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.0004011629617702194, 'time_algorithm_update': 0.008552243137917323, 'critic_loss': 171.30929413734123, 'actor_loss': 2.5684470140446, 'time_step': 0.009018534805342468, 'td_error': 17.98515138234369, 'init_value': -88.34910583496094, 'ave_value': -69.13242386185657} step=15048
2022-04-20 16:39.52 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163740/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:39.56 [info     ] TD3PlusBC_20220420163740: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.0004055827681781256, 'time_algorithm_update': 0.008939374957168312, 'critic_loss': 172.0871605900993, 'actor_loss': 2.5675054237856503, 'time_step': 0.009406660732470061, 'td_error': 16.873907016729614, 'init_value': -86.10420227050781, 'ave_value': -68.71511365677372} step=15390
2022-04-20 16:39.56 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163740/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:39.59 [info     ] TD3PlusBC_20220420163740: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.00040321391925477145, 'time_algorithm_update': 0.008456946116441872, 'critic_loss': 172.83736691837422, 'actor_loss': 2.5682395062251397, 'time_step': 0.00892743938847592, 'td_error': 19.036205612167233, 'init_value': -86.69105529785156, 'ave_value': -69.49756545503968} step=15732
2022-04-20 16:39.59 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163740/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:40.03 [info     ] TD3PlusBC_20220420163740: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.0004045063989204273, 'time_algorithm_update': 0.008957012354979041, 'critic_loss': 173.516178844965, 'actor_loss': 2.567737690886559, 'time_step': 0.009426492696617082, 'td_error': 18.525013437793554, 'init_value': -90.10489654541016, 'ave_value': -70.31530744126117} step=16074
2022-04-20 16:40.03 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163740/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:40.07 [info     ] TD3PlusBC_20220420163740: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.00040145645364683273, 'time_algorithm_update': 0.008871700331481576, 'critic_loss': 174.65763167888798, 'actor_loss': 2.5674997937609576, 'time_step': 0.009345691803603144, 'td_error': 17.502925403933947, 'init_value': -84.72276306152344, 'ave_value': -69.17529453654815} step=16416
2022-04-20 16:40.07 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163740/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:40.11 [info     ] TD3PlusBC_20220420163740: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.00040069658156724003, 'time_algorithm_update': 0.00859682950360036, 'critic_loss': 175.91742409600153, 'actor_loss': 2.567367760061521, 'time_step': 0.009063498318543908, 'td_error': 19.00012237891855, 'init_value': -90.22795104980469, 'ave_value': -70.51963320314817} step=16758
2022-04-20 16:40.11 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163740/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:40.14 [info     ] TD3PlusBC_20220420163740: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.0003980537604170236, 'time_algorithm_update': 0.00884487824133265, 'critic_loss': 176.76373621176558, 'actor_loss': 2.5681722930997437, 'time_step': 0.009314412959137855, 'td_error': 18.871504025186095, 'init_value': -86.92842864990234, 'ave_value': -70.19637447998129} step=17100
2022-04-20 16:40.14 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420163740/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 102 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.28189108e-01  

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 16:40.15 [info     ] FQE_20220420164014: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00016419427940644413, 'time_algorithm_update': 0.005098503756235881, 'loss': 0.00845978991689542, 'time_step': 0.005336613540189812, 'init_value': -0.35398510098457336, 'ave_value': -0.28689430362570123, 'soft_opc': nan} step=166




2022-04-20 16:40.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164014/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:40.16 [info     ] FQE_20220420164014: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.0001633756132010954, 'time_algorithm_update': 0.0050144971135150955, 'loss': 0.0064885379502504885, 'time_step': 0.005252458963049464, 'init_value': -0.5292735695838928, 'ave_value': -0.4028035087276015, 'soft_opc': nan} step=332




2022-04-20 16:40.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164014/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:40.17 [info     ] FQE_20220420164014: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.0001590510448777532, 'time_algorithm_update': 0.004379981971648802, 'loss': 0.005440186723191516, 'time_step': 0.004608023597533445, 'init_value': -0.5738295316696167, 'ave_value': -0.41885595983649426, 'soft_opc': nan} step=498




2022-04-20 16:40.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164014/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:40.18 [info     ] FQE_20220420164014: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00016085211052952042, 'time_algorithm_update': 0.004876926720860493, 'loss': 0.005302977962812117, 'time_step': 0.005112039037497647, 'init_value': -0.6482791900634766, 'ave_value': -0.4596728116911542, 'soft_opc': nan} step=664




2022-04-20 16:40.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164014/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:40.19 [info     ] FQE_20220420164014: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00016157023877982633, 'time_algorithm_update': 0.005100461373846215, 'loss': 0.00489576564153589, 'time_step': 0.00533797080258289, 'init_value': -0.7324849367141724, 'ave_value': -0.5131558109376881, 'soft_opc': nan} step=830




2022-04-20 16:40.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164014/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:40.20 [info     ] FQE_20220420164014: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00016469696918165828, 'time_algorithm_update': 0.0050392265779426295, 'loss': 0.004634624972647752, 'time_step': 0.005273477140679417, 'init_value': -0.7473771572113037, 'ave_value': -0.5275547528706558, 'soft_opc': nan} step=996




2022-04-20 16:40.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164014/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:40.21 [info     ] FQE_20220420164014: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00016271780772381518, 'time_algorithm_update': 0.005067176129444536, 'loss': 0.0044962060259071636, 'time_step': 0.005306048565600292, 'init_value': -0.75468909740448, 'ave_value': -0.5159638393468954, 'soft_opc': nan} step=1162




2022-04-20 16:40.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164014/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:40.22 [info     ] FQE_20220420164014: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00016008227704519248, 'time_algorithm_update': 0.005025149827980134, 'loss': 0.004341177437566669, 'time_step': 0.005258565925690065, 'init_value': -0.8261865973472595, 'ave_value': -0.5702029832982802, 'soft_opc': nan} step=1328




2022-04-20 16:40.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164014/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:40.23 [info     ] FQE_20220420164014: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00016201260578201478, 'time_algorithm_update': 0.005074384700821106, 'loss': 0.004314438773723072, 'time_step': 0.005308030599571136, 'init_value': -0.838309109210968, 'ave_value': -0.5643804306348971, 'soft_opc': nan} step=1494




2022-04-20 16:40.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164014/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:40.24 [info     ] FQE_20220420164014: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00016908186027802616, 'time_algorithm_update': 0.004974310656627977, 'loss': 0.0047012879534243015, 'time_step': 0.005215972302907921, 'init_value': -0.8569202423095703, 'ave_value': -0.5736370624504513, 'soft_opc': nan} step=1660




2022-04-20 16:40.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164014/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:40.25 [info     ] FQE_20220420164014: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00018268751810832196, 'time_algorithm_update': 0.00503957989704178, 'loss': 0.004637238034467965, 'time_step': 0.005299003727464791, 'init_value': -0.9384872913360596, 'ave_value': -0.6558345430203386, 'soft_opc': nan} step=1826




2022-04-20 16:40.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164014/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:40.26 [info     ] FQE_20220420164014: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00016554436051701926, 'time_algorithm_update': 0.0049625089369624495, 'loss': 0.004856436841571367, 'time_step': 0.005200538290552346, 'init_value': -0.9260624647140503, 'ave_value': -0.6428315409383661, 'soft_opc': nan} step=1992




2022-04-20 16:40.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164014/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:40.27 [info     ] FQE_20220420164014: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00015849952238151827, 'time_algorithm_update': 0.004253554056925946, 'loss': 0.00536206061250519, 'time_step': 0.004483498722673899, 'init_value': -0.9807014465332031, 'ave_value': -0.6897256561571748, 'soft_opc': nan} step=2158




2022-04-20 16:40.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164014/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:40.28 [info     ] FQE_20220420164014: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00016256556453475034, 'time_algorithm_update': 0.005092738622642425, 'loss': 0.005978146431187774, 'time_step': 0.005328648061637419, 'init_value': -1.028611660003662, 'ave_value': -0.7346704000095325, 'soft_opc': nan} step=2324




2022-04-20 16:40.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164014/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:40.29 [info     ] FQE_20220420164014: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00016403485493487622, 'time_algorithm_update': 0.005114634352994253, 'loss': 0.006280630148094462, 'time_step': 0.005350775029285845, 'init_value': -1.0494132041931152, 'ave_value': -0.7225271636368455, 'soft_opc': nan} step=2490




2022-04-20 16:40.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164014/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:40.30 [info     ] FQE_20220420164014: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00016555585056902413, 'time_algorithm_update': 0.00505439201033259, 'loss': 0.00744477124935218, 'time_step': 0.005290817065411304, 'init_value': -1.1263151168823242, 'ave_value': -0.7699344898867715, 'soft_opc': nan} step=2656




2022-04-20 16:40.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164014/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:40.31 [info     ] FQE_20220420164014: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00016466824405164603, 'time_algorithm_update': 0.0049313967486461965, 'loss': 0.007831390433754009, 'time_step': 0.005168618926082749, 'init_value': -1.0840628147125244, 'ave_value': -0.715760677417757, 'soft_opc': nan} step=2822




2022-04-20 16:40.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164014/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:40.31 [info     ] FQE_20220420164014: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.0001718322914766978, 'time_algorithm_update': 0.005097169473946813, 'loss': 0.009011702788769302, 'time_step': 0.0053439413208559335, 'init_value': -1.2156208753585815, 'ave_value': -0.801412869131545, 'soft_opc': nan} step=2988




2022-04-20 16:40.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164014/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:40.32 [info     ] FQE_20220420164014: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.0001602776079292757, 'time_algorithm_update': 0.005070647561406514, 'loss': 0.009856254640119487, 'time_step': 0.005303612674575254, 'init_value': -1.2764277458190918, 'ave_value': -0.8535025853529439, 'soft_opc': nan} step=3154




2022-04-20 16:40.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164014/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:40.33 [info     ] FQE_20220420164014: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00016742011150681828, 'time_algorithm_update': 0.005009384040372917, 'loss': 0.010597902864879783, 'time_step': 0.005256866834249841, 'init_value': -1.392751693725586, 'ave_value': -0.9415269831531086, 'soft_opc': nan} step=3320




2022-04-20 16:40.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164014/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:40.34 [info     ] FQE_20220420164014: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.0001659896000322089, 'time_algorithm_update': 0.005176469504115093, 'loss': 0.011700776341113162, 'time_step': 0.005418313554970615, 'init_value': -1.392059087753296, 'ave_value': -0.9029186833280701, 'soft_opc': nan} step=3486




2022-04-20 16:40.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164014/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:40.35 [info     ] FQE_20220420164014: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00015840616570897848, 'time_algorithm_update': 0.0039789892104734855, 'loss': 0.0130217999369972, 'time_step': 0.00421121465154441, 'init_value': -1.4418994188308716, 'ave_value': -0.954280778270651, 'soft_opc': nan} step=3652




2022-04-20 16:40.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164014/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:40.36 [info     ] FQE_20220420164014: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.0001633038003760648, 'time_algorithm_update': 0.005083762019513601, 'loss': 0.013403104250010732, 'time_step': 0.0053215701896024035, 'init_value': -1.585724115371704, 'ave_value': -1.0680060404176648, 'soft_opc': nan} step=3818




2022-04-20 16:40.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164014/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:40.37 [info     ] FQE_20220420164014: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.0001710179340408509, 'time_algorithm_update': 0.00503978959049087, 'loss': 0.014491818393636719, 'time_step': 0.005286162158092821, 'init_value': -1.613312005996704, 'ave_value': -1.0588559931295143, 'soft_opc': nan} step=3984




2022-04-20 16:40.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164014/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:40.38 [info     ] FQE_20220420164014: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.0001633540693535862, 'time_algorithm_update': 0.00509674003325313, 'loss': 0.015766031883028615, 'time_step': 0.00533287640077522, 'init_value': -1.6781601905822754, 'ave_value': -1.108003176335111, 'soft_opc': nan} step=4150




2022-04-20 16:40.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164014/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:40.39 [info     ] FQE_20220420164014: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.0001617124281733869, 'time_algorithm_update': 0.005126723323959902, 'loss': 0.016643744572879274, 'time_step': 0.005361282681844321, 'init_value': -1.6755417585372925, 'ave_value': -1.0777119289513106, 'soft_opc': nan} step=4316




2022-04-20 16:40.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164014/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:40.40 [info     ] FQE_20220420164014: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00016549121902649662, 'time_algorithm_update': 0.005158583801912974, 'loss': 0.0183152014863711, 'time_step': 0.00539703397865755, 'init_value': -1.8002300262451172, 'ave_value': -1.1819865730104482, 'soft_opc': nan} step=4482




2022-04-20 16:40.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164014/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:40.41 [info     ] FQE_20220420164014: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00016402623739587255, 'time_algorithm_update': 0.005074101758290486, 'loss': 0.019964184463360762, 'time_step': 0.005314406142177352, 'init_value': -1.8462458848953247, 'ave_value': -1.204727529331639, 'soft_opc': nan} step=4648




2022-04-20 16:40.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164014/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:40.42 [info     ] FQE_20220420164014: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00016786535102200796, 'time_algorithm_update': 0.005087391439690648, 'loss': 0.020785449168796325, 'time_step': 0.005329738180321383, 'init_value': -1.9082025289535522, 'ave_value': -1.2487604205370755, 'soft_opc': nan} step=4814




2022-04-20 16:40.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164014/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:40.43 [info     ] FQE_20220420164014: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.0001643005623874894, 'time_algorithm_update': 0.0050919975142881095, 'loss': 0.022237027361315775, 'time_step': 0.005329366189887725, 'init_value': -1.98136305809021, 'ave_value': -1.3026329405085595, 'soft_opc': nan} step=4980




2022-04-20 16:40.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164014/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:40.44 [info     ] FQE_20220420164014: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.0001602402652602598, 'time_algorithm_update': 0.004081401480249612, 'loss': 0.023923187731226616, 'time_step': 0.004314109503504741, 'init_value': -2.075448989868164, 'ave_value': -1.3593189994393438, 'soft_opc': nan} step=5146




2022-04-20 16:40.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164014/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:40.45 [info     ] FQE_20220420164014: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.0001615285873413086, 'time_algorithm_update': 0.005034959459879312, 'loss': 0.02502480913963483, 'time_step': 0.0052706376615777074, 'init_value': -2.225132942199707, 'ave_value': -1.488959188449725, 'soft_opc': nan} step=5312




2022-04-20 16:40.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164014/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:40.46 [info     ] FQE_20220420164014: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.0001643608851605151, 'time_algorithm_update': 0.005109140671879412, 'loss': 0.026802713414008944, 'time_step': 0.005343899669417416, 'init_value': -2.1725399494171143, 'ave_value': -1.4274528205663235, 'soft_opc': nan} step=5478




2022-04-20 16:40.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164014/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:40.47 [info     ] FQE_20220420164014: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00016459499497011485, 'time_algorithm_update': 0.00513129780091435, 'loss': 0.027862496509957296, 'time_step': 0.0053720818944724205, 'init_value': -2.1900806427001953, 'ave_value': -1.4516846756517954, 'soft_opc': nan} step=5644




2022-04-20 16:40.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164014/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:40.48 [info     ] FQE_20220420164014: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00016693465680961148, 'time_algorithm_update': 0.0050636084682970164, 'loss': 0.028562431641663593, 'time_step': 0.005306141922272831, 'init_value': -2.4253900051116943, 'ave_value': -1.6614763390239295, 'soft_opc': nan} step=5810




2022-04-20 16:40.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164014/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:40.49 [info     ] FQE_20220420164014: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00016767863767692842, 'time_algorithm_update': 0.0050471231161829935, 'loss': 0.029903823134618396, 'time_step': 0.005290208092655044, 'init_value': -2.425161361694336, 'ave_value': -1.6747519136876339, 'soft_opc': nan} step=5976




2022-04-20 16:40.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164014/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:40.50 [info     ] FQE_20220420164014: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00016549696405249905, 'time_algorithm_update': 0.005096125315470868, 'loss': 0.031777434922047855, 'time_step': 0.005334676030170487, 'init_value': -2.483489513397217, 'ave_value': -1.711935501753747, 'soft_opc': nan} step=6142




2022-04-20 16:40.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164014/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:40.51 [info     ] FQE_20220420164014: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00016509337597582713, 'time_algorithm_update': 0.004948305796427899, 'loss': 0.032936661723013745, 'time_step': 0.005184651857399079, 'init_value': -2.5755698680877686, 'ave_value': -1.8000859099297641, 'soft_opc': nan} step=6308




2022-04-20 16:40.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164014/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:40.51 [info     ] FQE_20220420164014: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00016544525881847703, 'time_algorithm_update': 0.005061895014291786, 'loss': 0.03335998614573941, 'time_step': 0.005302256848438677, 'init_value': -2.566179037094116, 'ave_value': -1.7959852990786638, 'soft_opc': nan} step=6474




2022-04-20 16:40.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164014/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:40.52 [info     ] FQE_20220420164014: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00016100291746208467, 'time_algorithm_update': 0.00435941190604704, 'loss': 0.03366411272899236, 'time_step': 0.004591460687568389, 'init_value': -2.6311168670654297, 'ave_value': -1.8463819091418037, 'soft_opc': nan} step=6640




2022-04-20 16:40.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164014/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:40.53 [info     ] FQE_20220420164014: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00016824165022516824, 'time_algorithm_update': 0.005227360380701272, 'loss': 0.034869573443724086, 'time_step': 0.005467825625316206, 'init_value': -2.5725326538085938, 'ave_value': -1.7837518726249, 'soft_opc': nan} step=6806




2022-04-20 16:40.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164014/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:40.54 [info     ] FQE_20220420164014: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.0001662912138973374, 'time_algorithm_update': 0.005015683461384601, 'loss': 0.037611579765139305, 'time_step': 0.005260964474046087, 'init_value': -2.767423629760742, 'ave_value': -1.9694465236385932, 'soft_opc': nan} step=6972




2022-04-20 16:40.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164014/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:40.55 [info     ] FQE_20220420164014: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.0001669246030141072, 'time_algorithm_update': 0.004972594330109745, 'loss': 0.03828960471135755, 'time_step': 0.005211716674896608, 'init_value': -2.775808334350586, 'ave_value': -2.001995998342429, 'soft_opc': nan} step=7138




2022-04-20 16:40.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164014/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:40.56 [info     ] FQE_20220420164014: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.0001661935484552958, 'time_algorithm_update': 0.005013178630047534, 'loss': 0.03834528494303687, 'time_step': 0.005254110658025167, 'init_value': -2.8011422157287598, 'ave_value': -1.9889729014362905, 'soft_opc': nan} step=7304




2022-04-20 16:40.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164014/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:40.57 [info     ] FQE_20220420164014: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00016772316162844738, 'time_algorithm_update': 0.0050400308815829725, 'loss': 0.03933779949305512, 'time_step': 0.00528186487864299, 'init_value': -2.8304717540740967, 'ave_value': -2.0054006622558846, 'soft_opc': nan} step=7470




2022-04-20 16:40.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164014/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:40.58 [info     ] FQE_20220420164014: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00016222804425710654, 'time_algorithm_update': 0.005114884261625359, 'loss': 0.04154279160226347, 'time_step': 0.005352966756705779, 'init_value': -2.777709484100342, 'ave_value': -1.9709539075826739, 'soft_opc': nan} step=7636




2022-04-20 16:40.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164014/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:40.59 [info     ] FQE_20220420164014: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00016952997230621706, 'time_algorithm_update': 0.005035628755408597, 'loss': 0.042733724349660863, 'time_step': 0.005278975130563759, 'init_value': -2.9297103881835938, 'ave_value': -2.103688094874861, 'soft_opc': nan} step=7802




2022-04-20 16:40.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164014/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:41.00 [info     ] FQE_20220420164014: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00016556590436452842, 'time_algorithm_update': 0.0049821957048163355, 'loss': 0.04284150774677632, 'time_step': 0.005223310137369546, 'init_value': -2.965050220489502, 'ave_value': -2.1325970842880575, 'soft_opc': nan} step=7968




2022-04-20 16:41.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164014/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:41.01 [info     ] FQE_20220420164014: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00017021075788750705, 'time_algorithm_update': 0.004930402859147772, 'loss': 0.04507189336042088, 'time_step': 0.005173635770039386, 'init_value': -2.9705448150634766, 'ave_value': -2.1450207766078346, 'soft_opc': nan} step=8134




2022-04-20 16:41.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164014/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:41.02 [info     ] FQE_20220420164014: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00016431492495249552, 'time_algorithm_update': 0.004488462425140013, 'loss': 0.04573053302852078, 'time_step': 0.0047269959047616245, 'init_value': -3.0002381801605225, 'ave_value': -2.165692920534796, 'soft_opc': nan} step=8300




2022-04-20 16:41.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164014/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.43210892e-01 -3.25999953e-02
 -1.38000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.28049971e-01  6.00000000e-01  4.67532035e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.87189108e-01  2.46000047e-02
 -8.00013420e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.61927906e-02  2.08952959e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.53789108e-01  1.32000047e-02
  8.79998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -9.71586383e-02 -6.00000000e-01 -5.33093061e-02]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.39489108e-01 -4.75999953e-02
 -9.30001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  1.41298638e-01  5.43892365e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.9573146

2022-04-20 16:41.02 [debug    ] RoundIterator is selected.
2022-04-20 16:41.02 [info     ] Directory is created at d3rlpy_logs/FQE_20220420164102
2022-04-20 16:41.02 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 16:41.02 [debug    ] Building models...
2022-04-20 16:41.02 [debug    ] Models have been built.
2022-04-20 16:41.02 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420164102/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}},

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 16:41.04 [info     ] FQE_20220420164102: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00016892025637072185, 'time_algorithm_update': 0.005069321671197581, 'loss': 0.030582860661778857, 'time_step': 0.005313489326210909, 'init_value': -1.0529484748840332, 'ave_value': -1.0651807704084628, 'soft_opc': nan} step=344




2022-04-20 16:41.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164102/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:41.06 [info     ] FQE_20220420164102: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00017152622688648312, 'time_algorithm_update': 0.005095768113468968, 'loss': 0.026032122916022186, 'time_step': 0.005341198555258817, 'init_value': -1.6584864854812622, 'ave_value': -1.659832975812055, 'soft_opc': nan} step=688




2022-04-20 16:41.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164102/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:41.08 [info     ] FQE_20220420164102: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00017115057900894519, 'time_algorithm_update': 0.005067507887995521, 'loss': 0.029744551546850004, 'time_step': 0.005312697138897208, 'init_value': -2.5194642543792725, 'ave_value': -2.4795641895551404, 'soft_opc': nan} step=1032




2022-04-20 16:41.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164102/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:41.10 [info     ] FQE_20220420164102: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.000167414199474246, 'time_algorithm_update': 0.004606099322784779, 'loss': 0.03424335087833623, 'time_step': 0.0048471779324287595, 'init_value': -3.0684642791748047, 'ave_value': -2.9653237510640342, 'soft_opc': nan} step=1376




2022-04-20 16:41.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164102/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:41.12 [info     ] FQE_20220420164102: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.0001693880835244822, 'time_algorithm_update': 0.005102681559185649, 'loss': 0.04388677830493814, 'time_step': 0.005349218152290167, 'init_value': -3.7110605239868164, 'ave_value': -3.54401676498085, 'soft_opc': nan} step=1720




2022-04-20 16:41.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164102/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:41.14 [info     ] FQE_20220420164102: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00017178959624711856, 'time_algorithm_update': 0.004986538443454477, 'loss': 0.05333784994734234, 'time_step': 0.005235989426457604, 'init_value': -4.208106994628906, 'ave_value': -3.9494233637276266, 'soft_opc': nan} step=2064




2022-04-20 16:41.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164102/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:41.16 [info     ] FQE_20220420164102: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00017252356507057367, 'time_algorithm_update': 0.005091883415399596, 'loss': 0.06779580066240458, 'time_step': 0.0053425209466801135, 'init_value': -4.764625549316406, 'ave_value': -4.440516178603578, 'soft_opc': nan} step=2408




2022-04-20 16:41.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164102/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:41.18 [info     ] FQE_20220420164102: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00017395407654518304, 'time_algorithm_update': 0.0050666103529375655, 'loss': 0.08411976417288357, 'time_step': 0.005318760871887207, 'init_value': -5.203261852264404, 'ave_value': -4.796145019908897, 'soft_opc': nan} step=2752




2022-04-20 16:41.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164102/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:41.20 [info     ] FQE_20220420164102: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.0001697380875432214, 'time_algorithm_update': 0.0045622711957887165, 'loss': 0.10184408444911242, 'time_step': 0.004811884358871815, 'init_value': -5.536203384399414, 'ave_value': -5.067908270299636, 'soft_opc': nan} step=3096




2022-04-20 16:41.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164102/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:41.22 [info     ] FQE_20220420164102: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.000172563070474669, 'time_algorithm_update': 0.005116947861604913, 'loss': 0.12155895826385119, 'time_step': 0.0053645919921786285, 'init_value': -5.968081951141357, 'ave_value': -5.4606686495361965, 'soft_opc': nan} step=3440




2022-04-20 16:41.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164102/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:41.24 [info     ] FQE_20220420164102: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00017031126244123593, 'time_algorithm_update': 0.005076551160147024, 'loss': 0.14256744706180208, 'time_step': 0.005321709915649059, 'init_value': -6.241259574890137, 'ave_value': -5.642534136249231, 'soft_opc': nan} step=3784




2022-04-20 16:41.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164102/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:41.26 [info     ] FQE_20220420164102: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.0001713252344796824, 'time_algorithm_update': 0.0050837175790653675, 'loss': 0.16794387449856935, 'time_step': 0.0053308550701584925, 'init_value': -6.841515064239502, 'ave_value': -6.284332741684331, 'soft_opc': nan} step=4128




2022-04-20 16:41.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164102/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:41.28 [info     ] FQE_20220420164102: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00016991828763207725, 'time_algorithm_update': 0.0046318159546963, 'loss': 0.19132747633252725, 'time_step': 0.004878042742263439, 'init_value': -6.81703519821167, 'ave_value': -6.272293615464943, 'soft_opc': nan} step=4472




2022-04-20 16:41.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164102/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:41.30 [info     ] FQE_20220420164102: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00017054413640221885, 'time_algorithm_update': 0.005092208468636801, 'loss': 0.21265332681278504, 'time_step': 0.005339658537576365, 'init_value': -6.900839328765869, 'ave_value': -6.37831695528852, 'soft_opc': nan} step=4816




2022-04-20 16:41.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164102/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:41.32 [info     ] FQE_20220420164102: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00017217148182003996, 'time_algorithm_update': 0.005095925442008085, 'loss': 0.23326513292484505, 'time_step': 0.005344900280930275, 'init_value': -7.052400588989258, 'ave_value': -6.479226834361382, 'soft_opc': nan} step=5160




2022-04-20 16:41.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164102/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:41.34 [info     ] FQE_20220420164102: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.000171587910763053, 'time_algorithm_update': 0.005099205776702526, 'loss': 0.2573587344526205, 'time_step': 0.005348066257876019, 'init_value': -7.67729377746582, 'ave_value': -7.084770557331341, 'soft_opc': nan} step=5504




2022-04-20 16:41.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164102/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:41.36 [info     ] FQE_20220420164102: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00017274119133173034, 'time_algorithm_update': 0.005088242680527443, 'loss': 0.27582148591403005, 'time_step': 0.005337389402611311, 'init_value': -7.666958808898926, 'ave_value': -7.064657921359774, 'soft_opc': nan} step=5848




2022-04-20 16:41.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164102/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:41.38 [info     ] FQE_20220420164102: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00016908728799154592, 'time_algorithm_update': 0.004761134469231894, 'loss': 0.2858141148733625, 'time_step': 0.005004638156225515, 'init_value': -8.120756149291992, 'ave_value': -7.547614880946574, 'soft_opc': nan} step=6192




2022-04-20 16:41.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164102/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:41.40 [info     ] FQE_20220420164102: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00017044364019881849, 'time_algorithm_update': 0.005162482344826987, 'loss': 0.31148077696994986, 'time_step': 0.005408489426901174, 'init_value': -8.517702102661133, 'ave_value': -7.911342470653079, 'soft_opc': nan} step=6536




2022-04-20 16:41.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164102/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:41.42 [info     ] FQE_20220420164102: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00017064532568288404, 'time_algorithm_update': 0.00506618341734243, 'loss': 0.3218777408271075, 'time_step': 0.005311569502187329, 'init_value': -8.87239933013916, 'ave_value': -8.300589447610525, 'soft_opc': nan} step=6880




2022-04-20 16:41.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164102/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:41.44 [info     ] FQE_20220420164102: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00016882807709449945, 'time_algorithm_update': 0.005062860804934835, 'loss': 0.3380339738999515, 'time_step': 0.005310186119966729, 'init_value': -9.112648963928223, 'ave_value': -8.59250592407393, 'soft_opc': nan} step=7224




2022-04-20 16:41.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164102/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:41.45 [info     ] FQE_20220420164102: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.0001695426397545393, 'time_algorithm_update': 0.004624593396519505, 'loss': 0.35242178370136507, 'time_step': 0.004869228878686595, 'init_value': -9.221845626831055, 'ave_value': -8.645490824727359, 'soft_opc': nan} step=7568




2022-04-20 16:41.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164102/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:41.47 [info     ] FQE_20220420164102: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00017321179079454998, 'time_algorithm_update': 0.0050393876641295675, 'loss': 0.35289021381068714, 'time_step': 0.005290000244628551, 'init_value': -9.46754264831543, 'ave_value': -8.94863300391217, 'soft_opc': nan} step=7912




2022-04-20 16:41.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164102/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:41.49 [info     ] FQE_20220420164102: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.0001724133657854657, 'time_algorithm_update': 0.005057592031567595, 'loss': 0.3607042524692884, 'time_step': 0.005306654198225154, 'init_value': -9.942558288574219, 'ave_value': -9.41621701483791, 'soft_opc': nan} step=8256




2022-04-20 16:41.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164102/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:41.51 [info     ] FQE_20220420164102: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00017287356908931287, 'time_algorithm_update': 0.00501747256101564, 'loss': 0.3639384524214493, 'time_step': 0.005268278510071511, 'init_value': -10.175798416137695, 'ave_value': -9.57141477776553, 'soft_opc': nan} step=8600




2022-04-20 16:41.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164102/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:41.53 [info     ] FQE_20220420164102: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00017446972603021667, 'time_algorithm_update': 0.005107391019200169, 'loss': 0.3736643012732181, 'time_step': 0.005359707876693371, 'init_value': -10.455199241638184, 'ave_value': -9.816989510910803, 'soft_opc': nan} step=8944




2022-04-20 16:41.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164102/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:41.55 [info     ] FQE_20220420164102: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00016873728397280672, 'time_algorithm_update': 0.004659902217776277, 'loss': 0.3773946307469593, 'time_step': 0.004903663729512414, 'init_value': -10.637691497802734, 'ave_value': -9.983608338001881, 'soft_opc': nan} step=9288




2022-04-20 16:41.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164102/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:41.57 [info     ] FQE_20220420164102: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00017429922902306846, 'time_algorithm_update': 0.005103056513985922, 'loss': 0.36865135937452664, 'time_step': 0.005356509325116179, 'init_value': -10.358637809753418, 'ave_value': -9.749757381752428, 'soft_opc': nan} step=9632




2022-04-20 16:41.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164102/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:41.59 [info     ] FQE_20220420164102: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00017076800035875896, 'time_algorithm_update': 0.0050181399944216705, 'loss': 0.3633507320354151, 'time_step': 0.005266138287477715, 'init_value': -10.571773529052734, 'ave_value': -10.001361410360918, 'soft_opc': nan} step=9976




2022-04-20 16:41.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164102/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:42.01 [info     ] FQE_20220420164102: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00017444200293962345, 'time_algorithm_update': 0.005160988070244013, 'loss': 0.35794252835127505, 'time_step': 0.005412279173385265, 'init_value': -10.841543197631836, 'ave_value': -10.297642734900185, 'soft_opc': nan} step=10320




2022-04-20 16:42.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164102/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:42.03 [info     ] FQE_20220420164102: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00017070700955945393, 'time_algorithm_update': 0.004550065412077793, 'loss': 0.3593885972933454, 'time_step': 0.004798180835191594, 'init_value': -10.91501235961914, 'ave_value': -10.438696588474738, 'soft_opc': nan} step=10664




2022-04-20 16:42.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164102/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:42.05 [info     ] FQE_20220420164102: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.0001719788063404172, 'time_algorithm_update': 0.0050698553406915, 'loss': 0.3618775857395904, 'time_step': 0.005319599495377652, 'init_value': -11.234329223632812, 'ave_value': -10.812416175395242, 'soft_opc': nan} step=11008




2022-04-20 16:42.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164102/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:42.07 [info     ] FQE_20220420164102: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00017217356105183446, 'time_algorithm_update': 0.0051005503465962965, 'loss': 0.3719737190579952, 'time_step': 0.0053498911303143165, 'init_value': -11.207813262939453, 'ave_value': -10.89328416685473, 'soft_opc': nan} step=11352




2022-04-20 16:42.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164102/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:42.09 [info     ] FQE_20220420164102: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00017251108967980674, 'time_algorithm_update': 0.005079643670902695, 'loss': 0.3709026436094021, 'time_step': 0.005329827929652015, 'init_value': -11.723287582397461, 'ave_value': -11.366282831996612, 'soft_opc': nan} step=11696




2022-04-20 16:42.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164102/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:42.11 [info     ] FQE_20220420164102: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00017052126485247944, 'time_algorithm_update': 0.005081753398096839, 'loss': 0.37811217501997774, 'time_step': 0.005328456329744916, 'init_value': -12.119409561157227, 'ave_value': -12.013085413973796, 'soft_opc': nan} step=12040




2022-04-20 16:42.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164102/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:42.13 [info     ] FQE_20220420164102: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00017173900160678598, 'time_algorithm_update': 0.0045846679876017015, 'loss': 0.3867440711013799, 'time_step': 0.004836267510125804, 'init_value': -12.211725234985352, 'ave_value': -12.082977281109773, 'soft_opc': nan} step=12384




2022-04-20 16:42.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164102/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:42.15 [info     ] FQE_20220420164102: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00017300802607868993, 'time_algorithm_update': 0.005140510409377342, 'loss': 0.38940179312796613, 'time_step': 0.0053908644720565444, 'init_value': -12.254178047180176, 'ave_value': -12.263878490413363, 'soft_opc': nan} step=12728




2022-04-20 16:42.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164102/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:42.17 [info     ] FQE_20220420164102: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.0001696389774943507, 'time_algorithm_update': 0.005075697288956753, 'loss': 0.38745345231037326, 'time_step': 0.005322656659192817, 'init_value': -12.234735488891602, 'ave_value': -12.274607278922623, 'soft_opc': nan} step=13072




2022-04-20 16:42.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164102/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:42.19 [info     ] FQE_20220420164102: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00017224217570105263, 'time_algorithm_update': 0.0051211978113928506, 'loss': 0.3955648349987906, 'time_step': 0.005370989095333011, 'init_value': -12.34632682800293, 'ave_value': -12.486211715752745, 'soft_opc': nan} step=13416




2022-04-20 16:42.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164102/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:42.21 [info     ] FQE_20220420164102: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00016968472059382948, 'time_algorithm_update': 0.004675598338592884, 'loss': 0.3973914699921341, 'time_step': 0.004922694938127385, 'init_value': -12.075884819030762, 'ave_value': -12.387907614713317, 'soft_opc': nan} step=13760




2022-04-20 16:42.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164102/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:42.23 [info     ] FQE_20220420164102: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00017256376355193382, 'time_algorithm_update': 0.005115843096444773, 'loss': 0.39874427572304255, 'time_step': 0.005367524402086125, 'init_value': -12.405208587646484, 'ave_value': -12.693497770313208, 'soft_opc': nan} step=14104




2022-04-20 16:42.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164102/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:42.25 [info     ] FQE_20220420164102: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00016996472380882086, 'time_algorithm_update': 0.00515351323194282, 'loss': 0.4021197388620051, 'time_step': 0.005399507145548976, 'init_value': -12.14201545715332, 'ave_value': -12.503106350713486, 'soft_opc': nan} step=14448




2022-04-20 16:42.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164102/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:42.27 [info     ] FQE_20220420164102: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00017130444216173748, 'time_algorithm_update': 0.005066380944362906, 'loss': 0.40639683183250125, 'time_step': 0.005315784105034762, 'init_value': -12.3955659866333, 'ave_value': -12.888121490717472, 'soft_opc': nan} step=14792




2022-04-20 16:42.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164102/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:42.29 [info     ] FQE_20220420164102: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00017288673755734465, 'time_algorithm_update': 0.0050629897173060925, 'loss': 0.4204902190005753, 'time_step': 0.005314612111379934, 'init_value': -12.661293983459473, 'ave_value': -13.135859881609946, 'soft_opc': nan} step=15136




2022-04-20 16:42.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164102/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:42.30 [info     ] FQE_20220420164102: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.0001697956129562023, 'time_algorithm_update': 0.004629748505215312, 'loss': 0.4267800377362386, 'time_step': 0.004875167857768924, 'init_value': -12.710248947143555, 'ave_value': -13.309693002861898, 'soft_opc': nan} step=15480




2022-04-20 16:42.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164102/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:42.32 [info     ] FQE_20220420164102: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00017321040464002033, 'time_algorithm_update': 0.00510210769121037, 'loss': 0.43570079977981485, 'time_step': 0.005353521469027497, 'init_value': -12.824443817138672, 'ave_value': -13.455830556476439, 'soft_opc': nan} step=15824




2022-04-20 16:42.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164102/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:42.34 [info     ] FQE_20220420164102: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00016981917758320653, 'time_algorithm_update': 0.0050772837428159495, 'loss': 0.44247511055775335, 'time_step': 0.005324394203895746, 'init_value': -13.057180404663086, 'ave_value': -13.791988885241587, 'soft_opc': nan} step=16168




2022-04-20 16:42.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164102/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:42.36 [info     ] FQE_20220420164102: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00017180623010147448, 'time_algorithm_update': 0.005035853663156199, 'loss': 0.447794338539351, 'time_step': 0.0052852256353511365, 'init_value': -12.729157447814941, 'ave_value': -13.68719794970375, 'soft_opc': nan} step=16512




2022-04-20 16:42.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164102/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:42.38 [info     ] FQE_20220420164102: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00016555675240450128, 'time_algorithm_update': 0.00463632858076761, 'loss': 0.44645410365977345, 'time_step': 0.004879507214524025, 'init_value': -13.231771469116211, 'ave_value': -14.275154711937045, 'soft_opc': nan} step=16856




2022-04-20 16:42.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164102/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:42.40 [info     ] FQE_20220420164102: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00017153038535007212, 'time_algorithm_update': 0.0051747172377830325, 'loss': 0.45161556378331813, 'time_step': 0.005421671063400978, 'init_value': -13.276372909545898, 'ave_value': -14.56894198986324, 'soft_opc': nan} step=17200




2022-04-20 16:42.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164102/model_17200.pt
search iteration:  15
using hyper params:  [0.003976210572883172, 0.0018813746160568211, 2.678612896293183e-05, 1]
2022-04-20 16:42.40 [debug    ] RoundIterator is selected.
2022-04-20 16:42.40 [info     ] Directory is created at d3rlpy_logs/TD3PlusBC_20220420164240
2022-04-20 16:42.40 [debug    ] Fitting scaler...              scaler=standard
2022-04-20 16:42.40 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 16:42.40 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 16:42.40 [info     ] Parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164240/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0039762105728

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:42.44 [info     ] TD3PlusBC_20220420164240: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.00032635181270844753, 'time_algorithm_update': 0.008939943118402136, 'critic_loss': 0.6146313600590703, 'actor_loss': 0.008928804783618937, 'time_step': 0.009346122630158363, 'td_error': 0.8098321564753446, 'init_value': -0.5281380414962769, 'ave_value': 0.15247548971539113} step=342
2022-04-20 16:42.44 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164240/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:42.48 [info     ] TD3PlusBC_20220420164240: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.000326269551327354, 'time_algorithm_update': 0.00842406247791491, 'critic_loss': 0.18419624753949934, 'actor_loss': -0.01243785609714469, 'time_step': 0.008831609759414405, 'td_error': 0.8035074555572823, 'init_value': -0.7456817030906677, 'ave_value': 0.2225771571973104} step=684
2022-04-20 16:42.48 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164240/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:42.52 [info     ] TD3PlusBC_20220420164240: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.00032981933906064395, 'time_algorithm_update': 0.008993515494274117, 'critic_loss': 0.21719804942085033, 'actor_loss': -0.0011286059318230165, 'time_step': 0.009402567880195483, 'td_error': 0.8030113059686974, 'init_value': -1.0427061319351196, 'ave_value': 0.2778167368028825} step=1026
2022-04-20 16:42.52 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164240/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:42.55 [info     ] TD3PlusBC_20220420164240: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.0003307771961591397, 'time_algorithm_update': 0.008787009451124404, 'critic_loss': 0.2564384878466004, 'actor_loss': 0.011629776277563028, 'time_step': 0.009198074452361168, 'td_error': 0.8052927261547741, 'init_value': -1.311963677406311, 'ave_value': 0.33388960868325757} step=1368
2022-04-20 16:42.55 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164240/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:42.59 [info     ] TD3PlusBC_20220420164240: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.0003229142629612259, 'time_algorithm_update': 0.008491619288572791, 'critic_loss': 0.3142182461655976, 'actor_loss': 0.0006437486810990941, 'time_step': 0.008897193691186737, 'td_error': 0.803827650447278, 'init_value': -1.5589065551757812, 'ave_value': 0.4254215095337346} step=1710
2022-04-20 16:42.59 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164240/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:43.02 [info     ] TD3PlusBC_20220420164240: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.00032963181099696467, 'time_algorithm_update': 0.008846968935247054, 'critic_loss': 0.3746927073170916, 'actor_loss': 0.021094547355907003, 'time_step': 0.00925970635218927, 'td_error': 0.8127551088195268, 'init_value': -1.9519516229629517, 'ave_value': 0.4415106447232333} step=2052
2022-04-20 16:43.02 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164240/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:43.06 [info     ] TD3PlusBC_20220420164240: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.0003271542097392835, 'time_algorithm_update': 0.00886434490917719, 'critic_loss': 0.4358365099811763, 'actor_loss': 0.024231804371402973, 'time_step': 0.009272886298553289, 'td_error': 0.8214321562190713, 'init_value': -2.162440061569214, 'ave_value': 0.5660676946245796} step=2394
2022-04-20 16:43.06 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164240/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:43.10 [info     ] TD3PlusBC_20220420164240: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.00032781230078803166, 'time_algorithm_update': 0.008973326599388792, 'critic_loss': 0.5046476216997668, 'actor_loss': 0.042486967028755894, 'time_step': 0.00938346581152308, 'td_error': 0.8347506521977297, 'init_value': -2.4903557300567627, 'ave_value': 0.5936783493332447} step=2736
2022-04-20 16:43.10 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164240/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:43.13 [info     ] TD3PlusBC_20220420164240: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.00032845087218702886, 'time_algorithm_update': 0.008983963414242393, 'critic_loss': 0.5764056136559325, 'actor_loss': 0.04204259923937028, 'time_step': 0.00939628673575775, 'td_error': 0.847497116761302, 'init_value': -2.775860071182251, 'ave_value': 0.6748614227418108} step=3078
2022-04-20 16:43.13 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164240/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:43.17 [info     ] TD3PlusBC_20220420164240: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.00032628140254327426, 'time_algorithm_update': 0.008489366860417595, 'critic_loss': 0.63414465370234, 'actor_loss': 0.035975660823764855, 'time_step': 0.008897911735445435, 'td_error': 0.8631541014471869, 'init_value': -3.029587507247925, 'ave_value': 0.7953230301253413} step=3420
2022-04-20 16:43.17 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164240/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:43.21 [info     ] TD3PlusBC_20220420164240: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.00032582687355621514, 'time_algorithm_update': 0.008912059298732825, 'critic_loss': 0.702962346200706, 'actor_loss': 0.059840427395718834, 'time_step': 0.009318134938067163, 'td_error': 0.8892584730069306, 'init_value': -3.4007010459899902, 'ave_value': 0.7579057736393182} step=3762
2022-04-20 16:43.21 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164240/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:43.24 [info     ] TD3PlusBC_20220420164240: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.0003292246868735866, 'time_algorithm_update': 0.008827394909328885, 'critic_loss': 0.7570394764256756, 'actor_loss': 0.043386084664809074, 'time_step': 0.009237227384109942, 'td_error': 0.9099839979319728, 'init_value': -3.624840497970581, 'ave_value': 0.9001586213770432} step=4104
2022-04-20 16:43.24 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164240/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:43.28 [info     ] TD3PlusBC_20220420164240: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.00032580735390646415, 'time_algorithm_update': 0.008716872561047648, 'critic_loss': 0.8428838494402623, 'actor_loss': 0.04850439446275694, 'time_step': 0.009124060820417794, 'td_error': 0.9358424581411148, 'init_value': -4.018935203552246, 'ave_value': 0.8846044715294229} step=4446
2022-04-20 16:43.28 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164240/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:43.32 [info     ] TD3PlusBC_20220420164240: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.00032541417239005107, 'time_algorithm_update': 0.00888604169700578, 'critic_loss': 0.9270446961037597, 'actor_loss': 0.04720152910167014, 'time_step': 0.009291539415281418, 'td_error': 0.9628035496841272, 'init_value': -4.363746166229248, 'ave_value': 0.9081463754804687} step=4788
2022-04-20 16:43.32 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164240/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:43.35 [info     ] TD3PlusBC_20220420164240: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.0003281678372656393, 'time_algorithm_update': 0.008444236035932574, 'critic_loss': 0.9965443767563641, 'actor_loss': 0.055282433718791484, 'time_step': 0.008854601815430045, 'td_error': 0.9900445926115462, 'init_value': -4.6577324867248535, 'ave_value': 0.9849808141478413} step=5130
2022-04-20 16:43.35 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164240/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:43.39 [info     ] TD3PlusBC_20220420164240: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.0003193010363662452, 'time_algorithm_update': 0.008868234199390077, 'critic_loss': 1.095869063677495, 'actor_loss': 0.04669174556320871, 'time_step': 0.009269377641510545, 'td_error': 1.0239488547282152, 'init_value': -5.003471851348877, 'ave_value': 1.0555876487338294} step=5472
2022-04-20 16:43.39 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164240/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:43.43 [info     ] TD3PlusBC_20220420164240: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.0003280925471880283, 'time_algorithm_update': 0.0088656150806717, 'critic_loss': 1.1894747655381237, 'actor_loss': 0.0652944698755504, 'time_step': 0.009276446543241801, 'td_error': 1.0555446603031817, 'init_value': -5.310530185699463, 'ave_value': 1.0959922057675902} step=5814
2022-04-20 16:43.43 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164240/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:43.46 [info     ] TD3PlusBC_20220420164240: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.0003208193862647341, 'time_algorithm_update': 0.008566440894589787, 'critic_loss': 1.3014707689919667, 'actor_loss': 0.027029180539804593, 'time_step': 0.008967330581263491, 'td_error': 1.0847698402880073, 'init_value': -5.556632995605469, 'ave_value': 1.1550884352475068} step=6156
2022-04-20 16:43.46 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164240/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:43.50 [info     ] TD3PlusBC_20220420164240: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.00033067890078003644, 'time_algorithm_update': 0.008963830986915276, 'critic_loss': 1.398555562858693, 'actor_loss': 0.07326748008616486, 'time_step': 0.009373434802942109, 'td_error': 1.117461342892528, 'init_value': -5.988058567047119, 'ave_value': 1.1653894918868402} step=6498
2022-04-20 16:43.50 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164240/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:43.54 [info     ] TD3PlusBC_20220420164240: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.00032543369203980206, 'time_algorithm_update': 0.00836952178798921, 'critic_loss': 1.5524549673785244, 'actor_loss': 0.06560555198474934, 'time_step': 0.00877518612041808, 'td_error': 1.1548556563585541, 'init_value': -6.078671932220459, 'ave_value': 1.3367016991018947} step=6840
2022-04-20 16:43.54 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164240/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:43.57 [info     ] TD3PlusBC_20220420164240: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.00032795381824872645, 'time_algorithm_update': 0.009005833787527697, 'critic_loss': 1.657831388433077, 'actor_loss': 0.056178377973928786, 'time_step': 0.00941548152276647, 'td_error': 1.195879415384104, 'init_value': -6.550449371337891, 'ave_value': 1.2729280063264945} step=7182
2022-04-20 16:43.57 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164240/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:44.01 [info     ] TD3PlusBC_20220420164240: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.0003278339118288274, 'time_algorithm_update': 0.008968544982330144, 'critic_loss': 1.8736498039542584, 'actor_loss': 0.07061730103011717, 'time_step': 0.009378483420924136, 'td_error': 1.2269094582142612, 'init_value': -6.864640712738037, 'ave_value': 1.3614384376764432} step=7524
2022-04-20 16:44.01 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164240/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:44.05 [info     ] TD3PlusBC_20220420164240: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.00032917728200990555, 'time_algorithm_update': 0.008583185965554756, 'critic_loss': 2.006821447384288, 'actor_loss': 0.0613494671619775, 'time_step': 0.008993356548554717, 'td_error': 1.272734587213594, 'init_value': -7.242990016937256, 'ave_value': 1.3511942546941373} step=7866
2022-04-20 16:44.05 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164240/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:44.08 [info     ] TD3PlusBC_20220420164240: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.00032967573020890444, 'time_algorithm_update': 0.008945635884825946, 'critic_loss': 2.2167956939740487, 'actor_loss': 0.06012057628460795, 'time_step': 0.009358622874432837, 'td_error': 1.3067597701874176, 'init_value': -7.617732048034668, 'ave_value': 1.4065741580062723} step=8208
2022-04-20 16:44.09 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164240/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:44.12 [info     ] TD3PlusBC_20220420164240: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.00032818247700295254, 'time_algorithm_update': 0.008434715326766522, 'critic_loss': 2.3927246875058836, 'actor_loss': 0.06870964600851662, 'time_step': 0.008844362364874946, 'td_error': 1.3139557489563867, 'init_value': -8.08899974822998, 'ave_value': 1.474545032030584} step=8550
2022-04-20 16:44.12 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164240/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:44.16 [info     ] TD3PlusBC_20220420164240: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.00032905249567756874, 'time_algorithm_update': 0.008986652245995595, 'critic_loss': 2.6159288544229597, 'actor_loss': 0.06604729603692802, 'time_step': 0.009396564193636353, 'td_error': 1.3937289314450634, 'init_value': -8.279593467712402, 'ave_value': 1.478624379304028} step=8892
2022-04-20 16:44.16 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164240/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:44.19 [info     ] TD3PlusBC_20220420164240: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.00032748464952435407, 'time_algorithm_update': 0.008848528415836089, 'critic_loss': 2.858766573040109, 'actor_loss': 0.07422954578235832, 'time_step': 0.00925767491435447, 'td_error': 1.3998605838797125, 'init_value': -8.869911193847656, 'ave_value': 1.4162328637915722} step=9234
2022-04-20 16:44.20 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164240/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:44.23 [info     ] TD3PlusBC_20220420164240: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.0003282738010785733, 'time_algorithm_update': 0.00846824171947457, 'critic_loss': 3.0549652694958693, 'actor_loss': 0.09088034195858136, 'time_step': 0.008877433531465586, 'td_error': 1.4583598302193026, 'init_value': -9.020820617675781, 'ave_value': 1.5721968359383545} step=9576
2022-04-20 16:44.23 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164240/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:44.27 [info     ] TD3PlusBC_20220420164240: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.000327166758085552, 'time_algorithm_update': 0.00889371501074897, 'critic_loss': 3.2714661988947125, 'actor_loss': 0.08150432537696516, 'time_step': 0.009302352604113127, 'td_error': 1.4954006886297482, 'init_value': -9.408730506896973, 'ave_value': 1.550778754623481} step=9918
2022-04-20 16:44.27 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164240/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:44.30 [info     ] TD3PlusBC_20220420164240: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.0003281155524895205, 'time_algorithm_update': 0.008869695384600008, 'critic_loss': 3.51470410283546, 'actor_loss': 0.07149919915443276, 'time_step': 0.009277695103695518, 'td_error': 1.5257282903874532, 'init_value': -9.775272369384766, 'ave_value': 1.6559217188976336} step=10260
2022-04-20 16:44.30 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164240/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:44.34 [info     ] TD3PlusBC_20220420164240: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.0003256177344517401, 'time_algorithm_update': 0.009025869313736407, 'critic_loss': 3.7777723982843043, 'actor_loss': 0.0860113243392685, 'time_step': 0.009432964854770236, 'td_error': 1.5658349228973334, 'init_value': -10.095739364624023, 'ave_value': 1.6531763295494104} step=10602
2022-04-20 16:44.34 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164240/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:44.38 [info     ] TD3PlusBC_20220420164240: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.0003283365428099158, 'time_algorithm_update': 0.008902726814760798, 'critic_loss': 3.990427151409506, 'actor_loss': 0.080768153441754, 'time_step': 0.009308810122528969, 'td_error': 1.6130008922616357, 'init_value': -10.464754104614258, 'ave_value': 1.7262566182154089} step=10944
2022-04-20 16:44.38 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164240/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:44.41 [info     ] TD3PlusBC_20220420164240: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.00032535979622288754, 'time_algorithm_update': 0.008551450500711363, 'critic_loss': 4.34413828435, 'actor_loss': 0.09507330685679675, 'time_step': 0.00895731909233227, 'td_error': 1.6453733232651848, 'init_value': -10.834911346435547, 'ave_value': 1.7260991656901903} step=11286
2022-04-20 16:44.41 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164240/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:44.45 [info     ] TD3PlusBC_20220420164240: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.00032532493970547505, 'time_algorithm_update': 0.009001032650819299, 'critic_loss': 4.545538132127962, 'actor_loss': 0.08007886977485049, 'time_step': 0.009408036170647158, 'td_error': 1.687021326584902, 'init_value': -11.226049423217773, 'ave_value': 1.7812138708925864} step=11628
2022-04-20 16:44.45 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164240/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:44.49 [info     ] TD3PlusBC_20220420164240: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.00033377694804766027, 'time_algorithm_update': 0.008973438140244512, 'critic_loss': 4.823390520455544, 'actor_loss': 0.08742374920269899, 'time_step': 0.009388982203968783, 'td_error': 1.7482621934116902, 'init_value': -11.51664924621582, 'ave_value': 1.8336243836950041} step=11970
2022-04-20 16:44.49 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164240/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:44.52 [info     ] TD3PlusBC_20220420164240: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.0003306733237372504, 'time_algorithm_update': 0.008775065516867834, 'critic_loss': 5.088301382218188, 'actor_loss': 0.09509617933317234, 'time_step': 0.009186590624134443, 'td_error': 1.771540634516021, 'init_value': -12.059969902038574, 'ave_value': 1.8253503283985832} step=12312
2022-04-20 16:44.52 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164240/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:44.56 [info     ] TD3PlusBC_20220420164240: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.0003245134799801118, 'time_algorithm_update': 0.008827349595856248, 'critic_loss': 5.394752218186507, 'actor_loss': 0.08951628005556893, 'time_step': 0.009234415160285102, 'td_error': 1.827919570375012, 'init_value': -12.307205200195312, 'ave_value': 1.9080880307444783} step=12654
2022-04-20 16:44.56 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164240/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:45.00 [info     ] TD3PlusBC_20220420164240: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.0003286418859024494, 'time_algorithm_update': 0.008551704953288474, 'critic_loss': 5.675929063593435, 'actor_loss': 0.09123444180303847, 'time_step': 0.008961361054091425, 'td_error': 1.8776389325815057, 'init_value': -12.727765083312988, 'ave_value': 1.8495078769724749} step=12996
2022-04-20 16:45.00 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164240/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:45.03 [info     ] TD3PlusBC_20220420164240: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.00032384702336718464, 'time_algorithm_update': 0.008914080279612402, 'critic_loss': 5.914758079581791, 'actor_loss': 0.09845285198231887, 'time_step': 0.00931930332853083, 'td_error': 1.9016204790208489, 'init_value': -13.113409042358398, 'ave_value': 1.98801678572773} step=13338
2022-04-20 16:45.04 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164240/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:45.07 [info     ] TD3PlusBC_20220420164240: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.00032715699826067654, 'time_algorithm_update': 0.008852266428763406, 'critic_loss': 6.228381125550521, 'actor_loss': 0.09728978609132488, 'time_step': 0.009259480481956437, 'td_error': 1.974735032732544, 'init_value': -13.556674003601074, 'ave_value': 1.8874764406236375} step=13680
2022-04-20 16:45.07 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164240/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:45.11 [info     ] TD3PlusBC_20220420164240: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.00033019439518800256, 'time_algorithm_update': 0.00851990604958339, 'critic_loss': 6.547744276927926, 'actor_loss': 0.08231101063085579, 'time_step': 0.008930413346541556, 'td_error': 2.043449670234998, 'init_value': -13.840612411499023, 'ave_value': 1.9961923502066785} step=14022
2022-04-20 16:45.11 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164240/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:45.15 [info     ] TD3PlusBC_20220420164240: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.0003266815553631699, 'time_algorithm_update': 0.00897083366126345, 'critic_loss': 6.799949532015281, 'actor_loss': 0.10437105410890272, 'time_step': 0.00937851409465946, 'td_error': 2.0682914088321676, 'init_value': -14.147369384765625, 'ave_value': 2.0525437870625876} step=14364
2022-04-20 16:45.15 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164240/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:45.18 [info     ] TD3PlusBC_20220420164240: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.0003243949678209093, 'time_algorithm_update': 0.0085950009306969, 'critic_loss': 7.1313897747742505, 'actor_loss': 0.092474988176983, 'time_step': 0.008999340715464096, 'td_error': 2.1452500296171824, 'init_value': -14.536001205444336, 'ave_value': 2.0518170181462154} step=14706
2022-04-20 16:45.18 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164240/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:45.22 [info     ] TD3PlusBC_20220420164240: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.000329866743924325, 'time_algorithm_update': 0.008958067810326292, 'critic_loss': 7.382432890042924, 'actor_loss': 0.09323189987076654, 'time_step': 0.00937209212989138, 'td_error': 2.1986792027268662, 'init_value': -14.98145866394043, 'ave_value': 2.0304502039790355} step=15048
2022-04-20 16:45.22 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164240/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:45.26 [info     ] TD3PlusBC_20220420164240: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.0003288419623123972, 'time_algorithm_update': 0.008934506198816132, 'critic_loss': 7.715303055724205, 'actor_loss': 0.12055502953933693, 'time_step': 0.009343898784347444, 'td_error': 2.230677145459314, 'init_value': -15.581820487976074, 'ave_value': 2.0158040575820717} step=15390
2022-04-20 16:45.26 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164240/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:45.29 [info     ] TD3PlusBC_20220420164240: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.0003246117753592151, 'time_algorithm_update': 0.00841601131952297, 'critic_loss': 8.033238319277066, 'actor_loss': 0.09270859812522492, 'time_step': 0.008821338937993636, 'td_error': 2.310936246023549, 'init_value': -15.548344612121582, 'ave_value': 2.1396619830597694} step=15732
2022-04-20 16:45.29 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164240/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:45.33 [info     ] TD3PlusBC_20220420164240: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.00032470867647762185, 'time_algorithm_update': 0.008968107184471443, 'critic_loss': 8.296682410769993, 'actor_loss': 0.12442392023683291, 'time_step': 0.009374726585477416, 'td_error': 2.374762998146354, 'init_value': -15.983640670776367, 'ave_value': 2.1265706128236914} step=16074
2022-04-20 16:45.33 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164240/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:45.37 [info     ] TD3PlusBC_20220420164240: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.0003305966393989429, 'time_algorithm_update': 0.00858534776676468, 'critic_loss': 8.554623627523233, 'actor_loss': 0.08700085076236586, 'time_step': 0.008995708666349711, 'td_error': 2.430217311064622, 'init_value': -16.52853012084961, 'ave_value': 2.112591318773391} step=16416
2022-04-20 16:45.37 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164240/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:45.40 [info     ] TD3PlusBC_20220420164240: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.00033023134309645983, 'time_algorithm_update': 0.008898725286561843, 'critic_loss': 8.90254741244846, 'actor_loss': 0.10184185609918589, 'time_step': 0.009310136761581688, 'td_error': 2.486779559578996, 'init_value': -16.67155647277832, 'ave_value': 2.18062978070632} step=16758
2022-04-20 16:45.40 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164240/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:45.44 [info     ] TD3PlusBC_20220420164240: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.0003315273084138569, 'time_algorithm_update': 0.008865467289037872, 'critic_loss': 9.307418570183872, 'actor_loss': 0.08657290819182731, 'time_step': 0.009279400284527338, 'td_error': 2.5473576612686886, 'init_value': -17.232463836669922, 'ave_value': 2.2283456499351875} step=17100
2022-04-20 16:45.44 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164240/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 102 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.28189108e-01

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 16:45.45 [info     ] FQE_20220420164544: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00016066252467143968, 'time_algorithm_update': 0.004524999354259077, 'loss': 0.008311116349804833, 'time_step': 0.004759176667914333, 'init_value': -0.12820769846439362, 'ave_value': -0.026605681968466924, 'soft_opc': nan} step=166




2022-04-20 16:45.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164544/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:45.46 [info     ] FQE_20220420164544: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00016679390367255154, 'time_algorithm_update': 0.004815699106239411, 'loss': 0.005809495022172579, 'time_step': 0.0050566699131425604, 'init_value': -0.2513403594493866, 'ave_value': -0.08631657206721872, 'soft_opc': nan} step=332




2022-04-20 16:45.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164544/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:45.47 [info     ] FQE_20220420164544: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00016442408044654202, 'time_algorithm_update': 0.005131055073565747, 'loss': 0.005225983915390469, 'time_step': 0.0053639354476009505, 'init_value': -0.31186264753341675, 'ave_value': -0.11537951814128203, 'soft_opc': nan} step=498




2022-04-20 16:45.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164544/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:45.48 [info     ] FQE_20220420164544: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00016295622630291674, 'time_algorithm_update': 0.005028635622507118, 'loss': 0.005257982912147979, 'time_step': 0.005263819751969303, 'init_value': -0.43996500968933105, 'ave_value': -0.20406839275241803, 'soft_opc': nan} step=664




2022-04-20 16:45.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164544/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:45.49 [info     ] FQE_20220420164544: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00016693896557911332, 'time_algorithm_update': 0.005049067807484822, 'loss': 0.005078769217424813, 'time_step': 0.005290593009397208, 'init_value': -0.4702167510986328, 'ave_value': -0.19423260634433612, 'soft_opc': nan} step=830




2022-04-20 16:45.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164544/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:45.50 [info     ] FQE_20220420164544: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00016485495739672557, 'time_algorithm_update': 0.005118816731924034, 'loss': 0.004732122912677566, 'time_step': 0.005357011255011501, 'init_value': -0.5106267929077148, 'ave_value': -0.2207242106454581, 'soft_opc': nan} step=996




2022-04-20 16:45.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164544/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:45.51 [info     ] FQE_20220420164544: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00016971812190779722, 'time_algorithm_update': 0.004997260599251253, 'loss': 0.004633318827801709, 'time_step': 0.005239196570522814, 'init_value': -0.5318472385406494, 'ave_value': -0.21011757926243516, 'soft_opc': nan} step=1162




2022-04-20 16:45.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164544/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:45.52 [info     ] FQE_20220420164544: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00016700503337814147, 'time_algorithm_update': 0.005018511450434306, 'loss': 0.0043763299199124145, 'time_step': 0.005264578095401626, 'init_value': -0.5736607909202576, 'ave_value': -0.22880816772865417, 'soft_opc': nan} step=1328




2022-04-20 16:45.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164544/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:45.53 [info     ] FQE_20220420164544: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00016992925161338714, 'time_algorithm_update': 0.005059496465935765, 'loss': 0.004174035311822432, 'time_step': 0.005304080894194454, 'init_value': -0.5916769504547119, 'ave_value': -0.22999605202527196, 'soft_opc': nan} step=1494




2022-04-20 16:45.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164544/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:45.54 [info     ] FQE_20220420164544: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00016639606062188205, 'time_algorithm_update': 0.004862832735819989, 'loss': 0.004276821504807077, 'time_step': 0.005099871072424464, 'init_value': -0.6020375490188599, 'ave_value': -0.220102529796176, 'soft_opc': nan} step=1660




2022-04-20 16:45.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164544/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:45.54 [info     ] FQE_20220420164544: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.0001642747097704784, 'time_algorithm_update': 0.004458045384970056, 'loss': 0.004424836837927845, 'time_step': 0.0046934866043458505, 'init_value': -0.64814293384552, 'ave_value': -0.2554739006492939, 'soft_opc': nan} step=1826




2022-04-20 16:45.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164544/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:45.55 [info     ] FQE_20220420164544: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.0001694868846111987, 'time_algorithm_update': 0.0051120505275496515, 'loss': 0.00426678006826193, 'time_step': 0.005356392228459737, 'init_value': -0.6870717406272888, 'ave_value': -0.2700540825128891, 'soft_opc': nan} step=1992




2022-04-20 16:45.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164544/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:45.56 [info     ] FQE_20220420164544: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00016533610332443053, 'time_algorithm_update': 0.005097695143826036, 'loss': 0.004907893910386925, 'time_step': 0.005339711545461632, 'init_value': -0.7299542427062988, 'ave_value': -0.3036678230062731, 'soft_opc': nan} step=2158




2022-04-20 16:45.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164544/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:45.57 [info     ] FQE_20220420164544: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00016993068786988775, 'time_algorithm_update': 0.0050314233963748055, 'loss': 0.00464614878158949, 'time_step': 0.0052813334637377635, 'init_value': -0.7769166827201843, 'ave_value': -0.3367766526192091, 'soft_opc': nan} step=2324




2022-04-20 16:45.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164544/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:45.58 [info     ] FQE_20220420164544: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00016927431864910815, 'time_algorithm_update': 0.0050740184554134505, 'loss': 0.004931233598183033, 'time_step': 0.0053164140287652074, 'init_value': -0.7784784436225891, 'ave_value': -0.3301988532280063, 'soft_opc': nan} step=2490




2022-04-20 16:45.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164544/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:45.59 [info     ] FQE_20220420164544: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.0001648822462702372, 'time_algorithm_update': 0.0051415067121207, 'loss': 0.004964503362152097, 'time_step': 0.005382211811571236, 'init_value': -0.7823476195335388, 'ave_value': -0.33667305983384854, 'soft_opc': nan} step=2656




2022-04-20 16:45.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164544/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:46.00 [info     ] FQE_20220420164544: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00016754650207887213, 'time_algorithm_update': 0.00502549309328378, 'loss': 0.005299675845013684, 'time_step': 0.005269909479531897, 'init_value': -0.8135044574737549, 'ave_value': -0.35848964526074695, 'soft_opc': nan} step=2822




2022-04-20 16:46.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164544/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:46.01 [info     ] FQE_20220420164544: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00017088148967329278, 'time_algorithm_update': 0.0050556329359491186, 'loss': 0.005715740231323022, 'time_step': 0.00530071718147002, 'init_value': -0.8534818887710571, 'ave_value': -0.3914851355517434, 'soft_opc': nan} step=2988




2022-04-20 16:46.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164544/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:46.02 [info     ] FQE_20220420164544: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00016595943864569607, 'time_algorithm_update': 0.00507678755794663, 'loss': 0.006275289859594665, 'time_step': 0.0053184635667915805, 'init_value': -0.9352998733520508, 'ave_value': -0.47004854692699943, 'soft_opc': nan} step=3154




2022-04-20 16:46.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164544/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:46.03 [info     ] FQE_20220420164544: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.0001626977001328066, 'time_algorithm_update': 0.004258412912667516, 'loss': 0.007191433833564171, 'time_step': 0.004494194524833955, 'init_value': -0.9661831259727478, 'ave_value': -0.5039467040038257, 'soft_opc': nan} step=3320




2022-04-20 16:46.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164544/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:46.04 [info     ] FQE_20220420164544: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00016323773257703665, 'time_algorithm_update': 0.004998398114399737, 'loss': 0.007375274772239647, 'time_step': 0.0052359592483704346, 'init_value': -1.0262954235076904, 'ave_value': -0.5361887796370832, 'soft_opc': nan} step=3486




2022-04-20 16:46.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164544/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:46.05 [info     ] FQE_20220420164544: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00016702657722565066, 'time_algorithm_update': 0.005067235015961061, 'loss': 0.007760059761012489, 'time_step': 0.005310858588620841, 'init_value': -1.0604662895202637, 'ave_value': -0.558872889296932, 'soft_opc': nan} step=3652




2022-04-20 16:46.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164544/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:46.06 [info     ] FQE_20220420164544: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00017292097390416157, 'time_algorithm_update': 0.005164452345974474, 'loss': 0.008818332804717595, 'time_step': 0.005412064402936453, 'init_value': -1.1881630420684814, 'ave_value': -0.6939536477510423, 'soft_opc': nan} step=3818




2022-04-20 16:46.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164544/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:46.07 [info     ] FQE_20220420164544: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00017057269452566123, 'time_algorithm_update': 0.004988164786832878, 'loss': 0.008989641432375479, 'time_step': 0.005235802696411868, 'init_value': -1.1705255508422852, 'ave_value': -0.6736517636455713, 'soft_opc': nan} step=3984




2022-04-20 16:46.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164544/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:46.08 [info     ] FQE_20220420164544: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.0001696822154952819, 'time_algorithm_update': 0.005030143691832761, 'loss': 0.00988274830511311, 'time_step': 0.005274246974163745, 'init_value': -1.2533595561981201, 'ave_value': -0.7317532012335587, 'soft_opc': nan} step=4150




2022-04-20 16:46.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164544/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:46.09 [info     ] FQE_20220420164544: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00017324844038630105, 'time_algorithm_update': 0.005015087414936847, 'loss': 0.01079119885402047, 'time_step': 0.005263768046735281, 'init_value': -1.35406494140625, 'ave_value': -0.8032286483312009, 'soft_opc': nan} step=4316




2022-04-20 16:46.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164544/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:46.10 [info     ] FQE_20220420164544: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00016943517937717667, 'time_algorithm_update': 0.005021593656884618, 'loss': 0.011044795408210975, 'time_step': 0.005268071071211114, 'init_value': -1.368584156036377, 'ave_value': -0.7908113404735084, 'soft_opc': nan} step=4482




2022-04-20 16:46.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164544/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:46.11 [info     ] FQE_20220420164544: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00016956587871873235, 'time_algorithm_update': 0.005045343594378735, 'loss': 0.011765702373028385, 'time_step': 0.005290590136884207, 'init_value': -1.4486792087554932, 'ave_value': -0.8623758504167199, 'soft_opc': nan} step=4648




2022-04-20 16:46.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164544/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:46.12 [info     ] FQE_20220420164544: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00016431492495249552, 'time_algorithm_update': 0.004099374794098268, 'loss': 0.012171757763044078, 'time_step': 0.004336154604532632, 'init_value': -1.4971997737884521, 'ave_value': -0.8963530361769838, 'soft_opc': nan} step=4814




2022-04-20 16:46.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164544/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:46.13 [info     ] FQE_20220420164544: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.0001700470246464373, 'time_algorithm_update': 0.005137881600713155, 'loss': 0.012665504989416215, 'time_step': 0.005382063877151673, 'init_value': -1.5352632999420166, 'ave_value': -0.9393021110847996, 'soft_opc': nan} step=4980




2022-04-20 16:46.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164544/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:46.14 [info     ] FQE_20220420164544: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.0001683594232582184, 'time_algorithm_update': 0.0050057919628648875, 'loss': 0.013271467201271465, 'time_step': 0.0052486485745533405, 'init_value': -1.556279182434082, 'ave_value': -0.9592116636593264, 'soft_opc': nan} step=5146




2022-04-20 16:46.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164544/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:46.14 [info     ] FQE_20220420164544: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.0001710423504013613, 'time_algorithm_update': 0.005005852285637913, 'loss': 0.013515201481395817, 'time_step': 0.0052504510764616085, 'init_value': -1.6561833620071411, 'ave_value': -1.0289962010491673, 'soft_opc': nan} step=5312




2022-04-20 16:46.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164544/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:46.15 [info     ] FQE_20220420164544: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.000167287975908762, 'time_algorithm_update': 0.005058788391480963, 'loss': 0.014792561058392635, 'time_step': 0.005304243191179022, 'init_value': -1.6742448806762695, 'ave_value': -1.0179445265538505, 'soft_opc': nan} step=5478




2022-04-20 16:46.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164544/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:46.16 [info     ] FQE_20220420164544: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00017237376017742846, 'time_algorithm_update': 0.005012276660965149, 'loss': 0.01504006698104856, 'time_step': 0.005263186362852533, 'init_value': -1.6959271430969238, 'ave_value': -1.0132030882698542, 'soft_opc': nan} step=5644




2022-04-20 16:46.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164544/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:46.17 [info     ] FQE_20220420164544: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00016273647905832314, 'time_algorithm_update': 0.005012683121554823, 'loss': 0.015143700239265406, 'time_step': 0.005249714276876794, 'init_value': -1.783766508102417, 'ave_value': -1.0708529879116697, 'soft_opc': nan} step=5810




2022-04-20 16:46.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164544/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:46.18 [info     ] FQE_20220420164544: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00016707397369017084, 'time_algorithm_update': 0.005128524389611669, 'loss': 0.016178193908179157, 'time_step': 0.005370637020432806, 'init_value': -1.8805487155914307, 'ave_value': -1.1466546142968843, 'soft_opc': nan} step=5976




2022-04-20 16:46.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164544/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:46.19 [info     ] FQE_20220420164544: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00016561617334204983, 'time_algorithm_update': 0.004976394664810364, 'loss': 0.01727880710414723, 'time_step': 0.005216907305889819, 'init_value': -1.8411858081817627, 'ave_value': -1.086100995173963, 'soft_opc': nan} step=6142




2022-04-20 16:46.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164544/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:46.20 [info     ] FQE_20220420164544: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00016373324106974774, 'time_algorithm_update': 0.0043694484664733154, 'loss': 0.016957803160170967, 'time_step': 0.0046055948877909095, 'init_value': -1.9441442489624023, 'ave_value': -1.1706704038044287, 'soft_opc': nan} step=6308




2022-04-20 16:46.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164544/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:46.21 [info     ] FQE_20220420164544: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.0001680664269320936, 'time_algorithm_update': 0.0049507761576089515, 'loss': 0.01801135067208053, 'time_step': 0.005192129008741264, 'init_value': -2.0604023933410645, 'ave_value': -1.27101143410572, 'soft_opc': nan} step=6474




2022-04-20 16:46.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164544/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:46.22 [info     ] FQE_20220420164544: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00016831489930669945, 'time_algorithm_update': 0.005093643464237811, 'loss': 0.018436254040725214, 'time_step': 0.005337363266083132, 'init_value': -2.0472664833068848, 'ave_value': -1.2402800163207994, 'soft_opc': nan} step=6640




2022-04-20 16:46.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164544/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:46.23 [info     ] FQE_20220420164544: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00017345095255288733, 'time_algorithm_update': 0.004985081144126065, 'loss': 0.019045612732711506, 'time_step': 0.005233592297657427, 'init_value': -2.20763897895813, 'ave_value': -1.3546815715642093, 'soft_opc': nan} step=6806




2022-04-20 16:46.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164544/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:46.24 [info     ] FQE_20220420164544: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00017023086547851562, 'time_algorithm_update': 0.004978096628763589, 'loss': 0.02001829552239772, 'time_step': 0.005223916237612805, 'init_value': -2.3073840141296387, 'ave_value': -1.4271572645575328, 'soft_opc': nan} step=6972




2022-04-20 16:46.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164544/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:46.25 [info     ] FQE_20220420164544: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00016950842845870788, 'time_algorithm_update': 0.005083740475666092, 'loss': 0.020799097896087915, 'time_step': 0.005330918783164886, 'init_value': -2.3484182357788086, 'ave_value': -1.424319865922066, 'soft_opc': nan} step=7138




2022-04-20 16:46.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164544/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:46.26 [info     ] FQE_20220420164544: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00016693034804010965, 'time_algorithm_update': 0.005213290812021278, 'loss': 0.021837861821369594, 'time_step': 0.005458412400211196, 'init_value': -2.440798759460449, 'ave_value': -1.4930579895296276, 'soft_opc': nan} step=7304




2022-04-20 16:46.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164544/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:46.27 [info     ] FQE_20220420164544: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00016723770693124058, 'time_algorithm_update': 0.0050534124833991724, 'loss': 0.022239921635566616, 'time_step': 0.005294690649193454, 'init_value': -2.3992857933044434, 'ave_value': -1.4300967958646718, 'soft_opc': nan} step=7470




2022-04-20 16:46.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164544/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:46.28 [info     ] FQE_20220420164544: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.0001665712839149567, 'time_algorithm_update': 0.005021922559623259, 'loss': 0.02299008776955531, 'time_step': 0.005262293011309153, 'init_value': -2.5482542514801025, 'ave_value': -1.5261645547214997, 'soft_opc': nan} step=7636




2022-04-20 16:46.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164544/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:46.29 [info     ] FQE_20220420164544: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.0001673727150422981, 'time_algorithm_update': 0.0048260616968913254, 'loss': 0.024423818812980485, 'time_step': 0.005064193024692765, 'init_value': -2.690119743347168, 'ave_value': -1.6400210720902257, 'soft_opc': nan} step=7802




2022-04-20 16:46.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164544/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:46.30 [info     ] FQE_20220420164544: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00016611168183476092, 'time_algorithm_update': 0.004548048398580896, 'loss': 0.024507423702217864, 'time_step': 0.004789052239383559, 'init_value': -2.7608189582824707, 'ave_value': -1.695326158884275, 'soft_opc': nan} step=7968




2022-04-20 16:46.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164544/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:46.31 [info     ] FQE_20220420164544: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00017221002693635872, 'time_algorithm_update': 0.005027669021882206, 'loss': 0.025181620114578317, 'time_step': 0.005275645887995341, 'init_value': -2.8289990425109863, 'ave_value': -1.7351772573617128, 'soft_opc': nan} step=8134




2022-04-20 16:46.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164544/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:46.32 [info     ] FQE_20220420164544: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00016961471143975314, 'time_algorithm_update': 0.005065627844936876, 'loss': 0.025896482906691415, 'time_step': 0.005311477615172605, 'init_value': -2.822957754135132, 'ave_value': -1.7445660292233869, 'soft_opc': nan} step=8300




2022-04-20 16:46.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164544/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.43210892e-01 -3.25999953e-02
 -1.38000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.28049971e-01  6.00000000e-01  4.67532035e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.87189108e-01  2.46000047e-02
 -8.00013420e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.61927906e-02  2.08952959e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.53789108e-01  1.32000047e-02
  8.79998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -9.71586383e-02 -6.00000000e-01 -5.33093061e-02]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.39489108e-01 -4.75999953e-02
 -9.30001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  1.41298638e-01  5.43892365e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.9573146

2022-04-20 16:46.32 [info     ] Directory is created at d3rlpy_logs/FQE_20220420164632
2022-04-20 16:46.32 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 16:46.32 [debug    ] Building models...
2022-04-20 16:46.32 [debug    ] Models have been built.
2022-04-20 16:46.32 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420164632/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 16:46.34 [info     ] FQE_20220420164632: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00016960155132204988, 'time_algorithm_update': 0.005038596862970397, 'loss': 0.02963637628018596, 'time_step': 0.0052828054095423495, 'init_value': -1.076361894607544, 'ave_value': -1.074861643913093, 'soft_opc': nan} step=344




2022-04-20 16:46.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164632/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:46.36 [info     ] FQE_20220420164632: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.0001710216666376868, 'time_algorithm_update': 0.004979918169420819, 'loss': 0.023733713488137825, 'time_step': 0.005229403806287189, 'init_value': -1.7537354230880737, 'ave_value': -1.7708493866227768, 'soft_opc': nan} step=688




2022-04-20 16:46.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164632/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:46.38 [info     ] FQE_20220420164632: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00016930699348449707, 'time_algorithm_update': 0.0048104684020197665, 'loss': 0.02623962430110158, 'time_step': 0.005057289156802865, 'init_value': -2.6002535820007324, 'ave_value': -2.6607856152428164, 'soft_opc': nan} step=1032




2022-04-20 16:46.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164632/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:46.40 [info     ] FQE_20220420164632: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00016973392907963244, 'time_algorithm_update': 0.005049519067586854, 'loss': 0.030321986178954152, 'time_step': 0.005292719879815745, 'init_value': -3.0421085357666016, 'ave_value': -3.175962335703609, 'soft_opc': nan} step=1376




2022-04-20 16:46.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164632/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:46.42 [info     ] FQE_20220420164632: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.0001704672048258227, 'time_algorithm_update': 0.005035579204559326, 'loss': 0.038973483668510306, 'time_step': 0.00528414859328159, 'init_value': -3.6507644653320312, 'ave_value': -3.8790792519981796, 'soft_opc': nan} step=1720




2022-04-20 16:46.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164632/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:46.44 [info     ] FQE_20220420164632: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00017291168833887853, 'time_algorithm_update': 0.005098751118016797, 'loss': 0.04743584291468006, 'time_step': 0.005346817332644796, 'init_value': -4.072735786437988, 'ave_value': -4.361322084190072, 'soft_opc': nan} step=2064




2022-04-20 16:46.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164632/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:46.46 [info     ] FQE_20220420164632: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.0001715830592221992, 'time_algorithm_update': 0.004968232886735783, 'loss': 0.057230534716942456, 'time_step': 0.00521367025929828, 'init_value': -4.718719959259033, 'ave_value': -5.087619811867003, 'soft_opc': nan} step=2408




2022-04-20 16:46.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164632/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:46.47 [info     ] FQE_20220420164632: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00016909976338231288, 'time_algorithm_update': 0.004584061544994975, 'loss': 0.07251160236097179, 'time_step': 0.0048279485037160475, 'init_value': -5.0381083488464355, 'ave_value': -5.485610152778492, 'soft_opc': nan} step=2752




2022-04-20 16:46.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164632/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:46.49 [info     ] FQE_20220420164632: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00016779955043349156, 'time_algorithm_update': 0.0049806590690169225, 'loss': 0.08357798818737096, 'time_step': 0.0052253201950428095, 'init_value': -5.4370293617248535, 'ave_value': -5.940027286793286, 'soft_opc': nan} step=3096




2022-04-20 16:46.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164632/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:46.51 [info     ] FQE_20220420164632: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00017160662384920343, 'time_algorithm_update': 0.005074602919955587, 'loss': 0.10189102789073057, 'time_step': 0.005324698464815007, 'init_value': -5.688657283782959, 'ave_value': -6.285589953526007, 'soft_opc': nan} step=3440




2022-04-20 16:46.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164632/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:46.53 [info     ] FQE_20220420164632: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00016769420268923738, 'time_algorithm_update': 0.005041706007580424, 'loss': 0.1148136051285059, 'time_step': 0.005286117625790973, 'init_value': -5.78542423248291, 'ave_value': -6.492925328433489, 'soft_opc': nan} step=3784




2022-04-20 16:46.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164632/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:46.55 [info     ] FQE_20220420164632: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00017526537873024164, 'time_algorithm_update': 0.005067477392595868, 'loss': 0.1323320332557223, 'time_step': 0.005318650672602099, 'init_value': -6.1774139404296875, 'ave_value': -6.933524808007594, 'soft_opc': nan} step=4128




2022-04-20 16:46.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164632/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:46.57 [info     ] FQE_20220420164632: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.0001696320467217024, 'time_algorithm_update': 0.004762265571328097, 'loss': 0.1481476489530313, 'time_step': 0.005008121562558551, 'init_value': -6.2495317459106445, 'ave_value': -7.109964145394147, 'soft_opc': nan} step=4472




2022-04-20 16:46.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164632/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:46.59 [info     ] FQE_20220420164632: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.0001707180987956912, 'time_algorithm_update': 0.005009268605431845, 'loss': 0.17081428194136986, 'time_step': 0.005254118941551031, 'init_value': -6.815301895141602, 'ave_value': -7.848188649187167, 'soft_opc': nan} step=4816




2022-04-20 16:46.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164632/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:47.01 [info     ] FQE_20220420164632: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00017295327297476835, 'time_algorithm_update': 0.005069639793662138, 'loss': 0.1841177733041086, 'time_step': 0.005318833645000015, 'init_value': -7.099306106567383, 'ave_value': -8.100868731961263, 'soft_opc': nan} step=5160




2022-04-20 16:47.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164632/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:47.03 [info     ] FQE_20220420164632: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00016835539839988532, 'time_algorithm_update': 0.005027037027270295, 'loss': 0.20854504961996925, 'time_step': 0.00527299490085868, 'init_value': -7.381142616271973, 'ave_value': -8.552660829386348, 'soft_opc': nan} step=5504




2022-04-20 16:47.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164632/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:47.05 [info     ] FQE_20220420164632: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00016745855641919514, 'time_algorithm_update': 0.0040000690970309945, 'loss': 0.2272343712284901, 'time_step': 0.004242398711137994, 'init_value': -7.367000579833984, 'ave_value': -8.570961942053232, 'soft_opc': nan} step=5848




2022-04-20 16:47.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164632/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:47.06 [info     ] FQE_20220420164632: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.000164933675943419, 'time_algorithm_update': 0.003487958464511605, 'loss': 0.24818461856176688, 'time_step': 0.003727359134097432, 'init_value': -7.66140079498291, 'ave_value': -8.987080574539482, 'soft_opc': nan} step=6192




2022-04-20 16:47.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164632/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:47.07 [info     ] FQE_20220420164632: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00016615972962490347, 'time_algorithm_update': 0.0035601022631623026, 'loss': 0.2741520811836127, 'time_step': 0.0038016597891962806, 'init_value': -7.943238258361816, 'ave_value': -9.35161698958973, 'soft_opc': nan} step=6536




2022-04-20 16:47.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164632/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:47.09 [info     ] FQE_20220420164632: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.0001626860263735749, 'time_algorithm_update': 0.003135625013085299, 'loss': 0.2825136564801945, 'time_step': 0.0033750139003576236, 'init_value': -7.853113174438477, 'ave_value': -9.35760178198589, 'soft_opc': nan} step=6880




2022-04-20 16:47.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164632/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:47.10 [info     ] FQE_20220420164632: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00016254741092060887, 'time_algorithm_update': 0.003459766853687375, 'loss': 0.3016740363957577, 'time_step': 0.0036977286948714147, 'init_value': -8.072762489318848, 'ave_value': -9.671999925716051, 'soft_opc': nan} step=7224




2022-04-20 16:47.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164632/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:47.11 [info     ] FQE_20220420164632: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00016568843708481898, 'time_algorithm_update': 0.0035245869048806124, 'loss': 0.324956429672839, 'time_step': 0.0037653626397598622, 'init_value': -8.073591232299805, 'ave_value': -9.710221263850192, 'soft_opc': nan} step=7568




2022-04-20 16:47.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164632/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:47.13 [info     ] FQE_20220420164632: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00016736637714297273, 'time_algorithm_update': 0.003582184397897055, 'loss': 0.3391111405230625, 'time_step': 0.0038248092629188713, 'init_value': -8.558012008666992, 'ave_value': -10.198063514011634, 'soft_opc': nan} step=7912




2022-04-20 16:47.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164632/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:47.14 [info     ] FQE_20220420164632: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.0001659511133681896, 'time_algorithm_update': 0.0035216607326684995, 'loss': 0.36083124215216483, 'time_step': 0.003763336774914764, 'init_value': -8.720287322998047, 'ave_value': -10.408795966620783, 'soft_opc': nan} step=8256




2022-04-20 16:47.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164632/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:47.16 [info     ] FQE_20220420164632: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00016789311586424361, 'time_algorithm_update': 0.003522470246913821, 'loss': 0.37520027621974084, 'time_step': 0.0037686665390813073, 'init_value': -9.122464179992676, 'ave_value': -10.952412856557629, 'soft_opc': nan} step=8600




2022-04-20 16:47.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164632/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:47.17 [info     ] FQE_20220420164632: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00016567457553952239, 'time_algorithm_update': 0.003502863784169042, 'loss': 0.3839837699684553, 'time_step': 0.003742285939150078, 'init_value': -9.587651252746582, 'ave_value': -11.504433814161414, 'soft_opc': nan} step=8944




2022-04-20 16:47.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164632/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:47.18 [info     ] FQE_20220420164632: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.0001659213110458019, 'time_algorithm_update': 0.003520607255225958, 'loss': 0.3912446676147019, 'time_step': 0.0037607599136441254, 'init_value': -9.773183822631836, 'ave_value': -11.800874354255695, 'soft_opc': nan} step=9288




2022-04-20 16:47.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164632/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:47.20 [info     ] FQE_20220420164632: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.0001647479312364445, 'time_algorithm_update': 0.003428805706112884, 'loss': 0.4145584892113368, 'time_step': 0.0036680615225503613, 'init_value': -10.133384704589844, 'ave_value': -12.139710176513564, 'soft_opc': nan} step=9632




2022-04-20 16:47.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164632/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:47.21 [info     ] FQE_20220420164632: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00016331949899362963, 'time_algorithm_update': 0.0035423629505689754, 'loss': 0.4338784338960554, 'time_step': 0.003781923027925713, 'init_value': -10.367866516113281, 'ave_value': -12.268045890824197, 'soft_opc': nan} step=9976




2022-04-20 16:47.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164632/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:47.23 [info     ] FQE_20220420164632: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00016708776008251102, 'time_algorithm_update': 0.003527353669321814, 'loss': 0.4453171725861382, 'time_step': 0.0037698981373809103, 'init_value': -10.50666332244873, 'ave_value': -12.385149864627989, 'soft_opc': nan} step=10320




2022-04-20 16:47.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164632/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:47.24 [info     ] FQE_20220420164632: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00016776489657025005, 'time_algorithm_update': 0.0035315419352331826, 'loss': 0.45468637093273534, 'time_step': 0.0037744301696156345, 'init_value': -10.694171905517578, 'ave_value': -12.532853385894182, 'soft_opc': nan} step=10664




2022-04-20 16:47.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164632/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:47.26 [info     ] FQE_20220420164632: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00016324880511261696, 'time_algorithm_update': 0.0035009016824323076, 'loss': 0.4634912375274093, 'time_step': 0.00373881223589875, 'init_value': -10.878602981567383, 'ave_value': -12.568992468688704, 'soft_opc': nan} step=11008




2022-04-20 16:47.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164632/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:47.27 [info     ] FQE_20220420164632: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00016406109166699787, 'time_algorithm_update': 0.0034421606119288957, 'loss': 0.48549920285857, 'time_step': 0.003681753956994345, 'init_value': -11.137344360351562, 'ave_value': -12.71671618265708, 'soft_opc': nan} step=11352




2022-04-20 16:47.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164632/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:47.28 [info     ] FQE_20220420164632: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00016977897910184638, 'time_algorithm_update': 0.0035086412762486657, 'loss': 0.4971883964973913, 'time_step': 0.0037563664968623674, 'init_value': -11.585908889770508, 'ave_value': -13.099503301075021, 'soft_opc': nan} step=11696




2022-04-20 16:47.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164632/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:47.30 [info     ] FQE_20220420164632: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00016455040421596792, 'time_algorithm_update': 0.0035176318745280422, 'loss': 0.5223484355685583, 'time_step': 0.0037555937157120814, 'init_value': -11.892967224121094, 'ave_value': -13.088712802838097, 'soft_opc': nan} step=12040




2022-04-20 16:47.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164632/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:47.31 [info     ] FQE_20220420164632: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00016605022341706032, 'time_algorithm_update': 0.00352139251176701, 'loss': 0.5273797814192813, 'time_step': 0.003761859134186146, 'init_value': -12.037949562072754, 'ave_value': -13.25899413329977, 'soft_opc': nan} step=12384




2022-04-20 16:47.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164632/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:47.33 [info     ] FQE_20220420164632: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00016789588817330293, 'time_algorithm_update': 0.003556163505066273, 'loss': 0.5470404797125348, 'time_step': 0.0037980079650878906, 'init_value': -12.33580207824707, 'ave_value': -13.389142734713449, 'soft_opc': nan} step=12728




2022-04-20 16:47.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164632/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:47.34 [info     ] FQE_20220420164632: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00016715221626814023, 'time_algorithm_update': 0.003547317759935246, 'loss': 0.5499269423497364, 'time_step': 0.0037937386091365373, 'init_value': -12.403600692749023, 'ave_value': -13.48413150641031, 'soft_opc': nan} step=13072




2022-04-20 16:47.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164632/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:47.36 [info     ] FQE_20220420164632: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00016591507335041844, 'time_algorithm_update': 0.0035891990328944007, 'loss': 0.5659397801135255, 'time_step': 0.0038332010424414345, 'init_value': -13.189123153686523, 'ave_value': -13.878410591104545, 'soft_opc': nan} step=13416




2022-04-20 16:47.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164632/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:47.37 [info     ] FQE_20220420164632: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00016478882279506949, 'time_algorithm_update': 0.003518562677294709, 'loss': 0.5788548806909645, 'time_step': 0.0037593758383462597, 'init_value': -13.429084777832031, 'ave_value': -14.006032571195087, 'soft_opc': nan} step=13760




2022-04-20 16:47.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164632/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:47.38 [info     ] FQE_20220420164632: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00016716746396796647, 'time_algorithm_update': 0.003504200730212899, 'loss': 0.5820195511373323, 'time_step': 0.003747967093489891, 'init_value': -13.660953521728516, 'ave_value': -14.101264494284777, 'soft_opc': nan} step=14104




2022-04-20 16:47.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164632/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:47.40 [info     ] FQE_20220420164632: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.000164321688718574, 'time_algorithm_update': 0.003058379472688187, 'loss': 0.5976611630568758, 'time_step': 0.0032963038876999257, 'init_value': -14.107887268066406, 'ave_value': -14.314653269218297, 'soft_opc': nan} step=14448




2022-04-20 16:47.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164632/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:47.41 [info     ] FQE_20220420164632: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00016640993051750716, 'time_algorithm_update': 0.003499080968457599, 'loss': 0.6067934661811261, 'time_step': 0.003741751576578894, 'init_value': -14.56075668334961, 'ave_value': -14.313161917951017, 'soft_opc': nan} step=14792




2022-04-20 16:47.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164632/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:47.42 [info     ] FQE_20220420164632: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.0001652171445447345, 'time_algorithm_update': 0.003568051859389904, 'loss': 0.6134561987886162, 'time_step': 0.003809189380601395, 'init_value': -14.659700393676758, 'ave_value': -14.256634337637943, 'soft_opc': nan} step=15136




2022-04-20 16:47.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164632/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:47.44 [info     ] FQE_20220420164632: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00016895976177481718, 'time_algorithm_update': 0.003558361946150314, 'loss': 0.629239232609655, 'time_step': 0.003801939099334007, 'init_value': -14.861380577087402, 'ave_value': -14.225941737386323, 'soft_opc': nan} step=15480




2022-04-20 16:47.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164632/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:47.45 [info     ] FQE_20220420164632: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.0001664639905441639, 'time_algorithm_update': 0.0036090896573177603, 'loss': 0.6515209501349303, 'time_step': 0.0038505217363668043, 'init_value': -15.548882484436035, 'ave_value': -14.75721995607291, 'soft_opc': nan} step=15824




2022-04-20 16:47.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164632/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:47.47 [info     ] FQE_20220420164632: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00016535090845684673, 'time_algorithm_update': 0.003545937843100969, 'loss': 0.6638281121525134, 'time_step': 0.003783976615861405, 'init_value': -15.506834030151367, 'ave_value': -14.536773751553584, 'soft_opc': nan} step=16168




2022-04-20 16:47.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164632/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:47.48 [info     ] FQE_20220420164632: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00016611953114354335, 'time_algorithm_update': 0.0035177122714907623, 'loss': 0.6776892339897364, 'time_step': 0.0037598907947540283, 'init_value': -15.938326835632324, 'ave_value': -15.003129657547133, 'soft_opc': nan} step=16512




2022-04-20 16:47.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164632/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:47.50 [info     ] FQE_20220420164632: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.0001643521841182265, 'time_algorithm_update': 0.0035121149794999943, 'loss': 0.6852536195845798, 'time_step': 0.0037529177443925725, 'init_value': -16.25904655456543, 'ave_value': -15.102377694589714, 'soft_opc': nan} step=16856




2022-04-20 16:47.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164632/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:47.51 [info     ] FQE_20220420164632: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00016382128693336664, 'time_algorithm_update': 0.0036036226638527804, 'loss': 0.7003365383039467, 'time_step': 0.003842563823212025, 'init_value': -16.53889274597168, 'ave_value': -15.245226521180893, 'soft_opc': nan} step=17200




2022-04-20 16:47.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164632/model_17200.pt
search iteration:  16
using hyper params:  [0.006745825040519846, 0.005148803635010107, 8.77501226333221e-05, 1]
2022-04-20 16:47.51 [debug    ] RoundIterator is selected.
2022-04-20 16:47.51 [info     ] Directory is created at d3rlpy_logs/TD3PlusBC_20220420164751
2022-04-20 16:47.51 [debug    ] Fitting scaler...              scaler=standard
2022-04-20 16:47.51 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 16:47.51 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 16:47.51 [info     ] Parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164751/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.006745825040519

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:47.54 [info     ] TD3PlusBC_20220420164751: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.0003263315959283483, 'time_algorithm_update': 0.006924554618478519, 'critic_loss': 0.36504955097422964, 'actor_loss': 0.06526358582471546, 'time_step': 0.0073285200442487035, 'td_error': 0.8096448992472288, 'init_value': -0.5897201299667358, 'ave_value': 0.12699801850350792} step=342
2022-04-20 16:47.54 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164751/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:47.57 [info     ] TD3PlusBC_20220420164751: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.0003216643082468133, 'time_algorithm_update': 0.0069258080588446725, 'critic_loss': 0.1677839306145035, 'actor_loss': -0.007416124210545891, 'time_step': 0.007326966837832802, 'td_error': 0.805480035603622, 'init_value': -0.7794572710990906, 'ave_value': 0.2296046472821586} step=684
2022-04-20 16:47.57 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164751/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:48.00 [info     ] TD3PlusBC_20220420164751: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.00032730339563380906, 'time_algorithm_update': 0.006915489135429873, 'critic_loss': 0.21467093803119242, 'actor_loss': -0.01818924654296964, 'time_step': 0.007324875446788052, 'td_error': 0.8010591348785299, 'init_value': -1.0596230030059814, 'ave_value': 0.2704791479827081} step=1026
2022-04-20 16:48.00 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164751/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:48.03 [info     ] TD3PlusBC_20220420164751: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.000327069159836797, 'time_algorithm_update': 0.006942288220277306, 'critic_loss': 0.28754677769472026, 'actor_loss': 0.011806233889526792, 'time_step': 0.0073499059119419745, 'td_error': 0.8008564227267694, 'init_value': -1.3005746603012085, 'ave_value': 0.3814515187508041} step=1368
2022-04-20 16:48.03 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164751/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:48.06 [info     ] TD3PlusBC_20220420164751: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.00032440403051543657, 'time_algorithm_update': 0.006920991585268612, 'critic_loss': 0.3545262335380267, 'actor_loss': 0.012166089721416173, 'time_step': 0.00732341426157812, 'td_error': 0.8044245789268957, 'init_value': -1.622555136680603, 'ave_value': 0.4405762682677255} step=1710
2022-04-20 16:48.06 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164751/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:48.09 [info     ] TD3PlusBC_20220420164751: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.0003265198211223758, 'time_algorithm_update': 0.007044200311627304, 'critic_loss': 0.41973712090511767, 'actor_loss': 0.03417349826784162, 'time_step': 0.007450908945317854, 'td_error': 0.8092509197636855, 'init_value': -1.9367926120758057, 'ave_value': 0.4816929518922677} step=2052
2022-04-20 16:48.09 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164751/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:48.12 [info     ] TD3PlusBC_20220420164751: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.0003287443640636422, 'time_algorithm_update': 0.006900302847923591, 'critic_loss': 0.4964348490823779, 'actor_loss': 0.030544693815952154, 'time_step': 0.007309447952181275, 'td_error': 0.816551334440163, 'init_value': -2.2131874561309814, 'ave_value': 0.5534743365684004} step=2394
2022-04-20 16:48.12 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164751/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:48.15 [info     ] TD3PlusBC_20220420164751: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.0003228752236617239, 'time_algorithm_update': 0.006881363907752678, 'critic_loss': 0.569422305065986, 'actor_loss': 0.03922447283365573, 'time_step': 0.007283805406581589, 'td_error': 0.828259476416535, 'init_value': -2.477053165435791, 'ave_value': 0.6123440600027171} step=2736
2022-04-20 16:48.15 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164751/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:48.18 [info     ] TD3PlusBC_20220420164751: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.00031701235743293983, 'time_algorithm_update': 0.006805476389433208, 'critic_loss': 0.6548966930536498, 'actor_loss': 0.05093466056979191, 'time_step': 0.0072003198645965395, 'td_error': 0.8422250336348924, 'init_value': -2.8778469562530518, 'ave_value': 0.6736722140184075} step=3078
2022-04-20 16:48.18 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164751/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:48.20 [info     ] TD3PlusBC_20220420164751: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.00029032481344122633, 'time_algorithm_update': 0.0063492010908517225, 'critic_loss': 0.7431297404637114, 'actor_loss': 0.046540326259115285, 'time_step': 0.006709641183328907, 'td_error': 0.8638957500445044, 'init_value': -3.1571314334869385, 'ave_value': 0.7045594061981585} step=3420
2022-04-20 16:48.20 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164751/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:48.23 [info     ] TD3PlusBC_20220420164751: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.0002968485592401516, 'time_algorithm_update': 0.006207260472035548, 'critic_loss': 0.820415625838857, 'actor_loss': 0.04434597803748142, 'time_step': 0.00657431493725693, 'td_error': 0.8796705562122288, 'init_value': -3.499255657196045, 'ave_value': 0.7720990662859031} step=3762
2022-04-20 16:48.23 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164751/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:48.26 [info     ] TD3PlusBC_20220420164751: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.0003196482072796738, 'time_algorithm_update': 0.006831574161150302, 'critic_loss': 0.9283126464538407, 'actor_loss': 0.05292131828024373, 'time_step': 0.007233198623211063, 'td_error': 0.8939797178524126, 'init_value': -3.799034833908081, 'ave_value': 0.8080356831216413} step=4104
2022-04-20 16:48.26 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164751/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:48.29 [info     ] TD3PlusBC_20220420164751: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.0003315991128397267, 'time_algorithm_update': 0.006867731523792646, 'critic_loss': 1.0068463833882795, 'actor_loss': 0.055307726239600376, 'time_step': 0.007280095278868201, 'td_error': 0.9206447082068064, 'init_value': -4.145861625671387, 'ave_value': 0.8875714735648184} step=4446
2022-04-20 16:48.29 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164751/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:48.32 [info     ] TD3PlusBC_20220420164751: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.0003263692409671538, 'time_algorithm_update': 0.006969229519715783, 'critic_loss': 1.0874943177689587, 'actor_loss': 0.057136505929350156, 'time_step': 0.007376902981808311, 'td_error': 0.9506690181088879, 'init_value': -4.475724220275879, 'ave_value': 0.929391237874809} step=4788
2022-04-20 16:48.32 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164751/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:48.35 [info     ] TD3PlusBC_20220420164751: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.0003292783659104018, 'time_algorithm_update': 0.00692740657873321, 'critic_loss': 1.2594570235202187, 'actor_loss': 0.05701059540897085, 'time_step': 0.007336996452153077, 'td_error': 0.980121297611599, 'init_value': -4.833906173706055, 'ave_value': 0.9014626473147288} step=5130
2022-04-20 16:48.35 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164751/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:48.37 [info     ] TD3PlusBC_20220420164751: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.00032813995205170925, 'time_algorithm_update': 0.006871147462499072, 'critic_loss': 1.4122120308422903, 'actor_loss': 0.08015995673094577, 'time_step': 0.0072785664720144885, 'td_error': 0.9968567521519491, 'init_value': -5.144837856292725, 'ave_value': 1.0287423879018909} step=5472
2022-04-20 16:48.37 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164751/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:48.40 [info     ] TD3PlusBC_20220420164751: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.00033098633526361474, 'time_algorithm_update': 0.006951057422927945, 'critic_loss': 1.5269567292858983, 'actor_loss': 0.04730516689562658, 'time_step': 0.007363170908208479, 'td_error': 1.0128606396650284, 'init_value': -5.513728141784668, 'ave_value': 1.076321080746297} step=5814
2022-04-20 16:48.40 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164751/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:48.43 [info     ] TD3PlusBC_20220420164751: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.00032328165065475373, 'time_algorithm_update': 0.00701198731249536, 'critic_loss': 1.669493626955657, 'actor_loss': 0.05673462470546801, 'time_step': 0.007412539588080512, 'td_error': 1.0595791260991596, 'init_value': -5.714784622192383, 'ave_value': 1.1229744323161388} step=6156
2022-04-20 16:48.43 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164751/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:48.46 [info     ] TD3PlusBC_20220420164751: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.0003245636733651858, 'time_algorithm_update': 0.00690082151290269, 'critic_loss': 1.8783851996151328, 'actor_loss': 0.06490465843973801, 'time_step': 0.007305568421793263, 'td_error': 1.091541389064654, 'init_value': -6.147824764251709, 'ave_value': 1.1527502181034888} step=6498
2022-04-20 16:48.46 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164751/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:48.49 [info     ] TD3PlusBC_20220420164751: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.00032398296378509346, 'time_algorithm_update': 0.006976790595472905, 'critic_loss': 2.0096805954014347, 'actor_loss': 0.05739371798801841, 'time_step': 0.007379539528785393, 'td_error': 1.1095654866790554, 'init_value': -6.483119010925293, 'ave_value': 1.260219479156658} step=6840
2022-04-20 16:48.49 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164751/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:48.52 [info     ] TD3PlusBC_20220420164751: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.0003279154760795727, 'time_algorithm_update': 0.007005392459400913, 'critic_loss': 2.210544529888365, 'actor_loss': 0.08887261108696809, 'time_step': 0.007414393954806858, 'td_error': 1.1404616464007695, 'init_value': -6.68657922744751, 'ave_value': 1.3823192465697156} step=7182
2022-04-20 16:48.52 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164751/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:48.55 [info     ] TD3PlusBC_20220420164751: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.0003257160298308434, 'time_algorithm_update': 0.006842990367733247, 'critic_loss': 2.4511680158606746, 'actor_loss': 0.06141543333903391, 'time_step': 0.007248741144325301, 'td_error': 1.1851103890370573, 'init_value': -7.153250217437744, 'ave_value': 1.3076994352636708} step=7524
2022-04-20 16:48.55 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164751/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:48.58 [info     ] TD3PlusBC_20220420164751: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.00032751741465072184, 'time_algorithm_update': 0.006962468749598453, 'critic_loss': 2.6695567429588554, 'actor_loss': 0.04815060091994659, 'time_step': 0.007368861583241245, 'td_error': 1.2229393192179328, 'init_value': -7.374733924865723, 'ave_value': 1.414021004768726} step=7866
2022-04-20 16:48.58 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164751/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:49.01 [info     ] TD3PlusBC_20220420164751: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.00032170334754631536, 'time_algorithm_update': 0.006893748428389343, 'critic_loss': 2.7977863062194914, 'actor_loss': 0.08150517748810394, 'time_step': 0.007294671577319764, 'td_error': 1.235947123144101, 'init_value': -7.7769975662231445, 'ave_value': 1.4906786028857906} step=8208
2022-04-20 16:49.01 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164751/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:49.05 [info     ] TD3PlusBC_20220420164751: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.00032871369032831915, 'time_algorithm_update': 0.008963075994748122, 'critic_loss': 3.0053038217170895, 'actor_loss': 0.08034490704623579, 'time_step': 0.009371461226926213, 'td_error': 1.301694720040628, 'init_value': -8.308473587036133, 'ave_value': 1.2644153710025539} step=8550
2022-04-20 16:49.05 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164751/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:49.08 [info     ] TD3PlusBC_20220420164751: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.0003324245151720549, 'time_algorithm_update': 0.008938502149972303, 'critic_loss': 3.3000304010527874, 'actor_loss': 0.05275873308293304, 'time_step': 0.009354399658783137, 'td_error': 1.3379726544815316, 'init_value': -8.395174980163574, 'ave_value': 1.523873183826941} step=8892
2022-04-20 16:49.08 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164751/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:49.12 [info     ] TD3PlusBC_20220420164751: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.00031481151692351405, 'time_algorithm_update': 0.008270421920464052, 'critic_loss': 3.4118998317342055, 'actor_loss': 0.07234355900981272, 'time_step': 0.008660251634162768, 'td_error': 1.3575788524992745, 'init_value': -8.922947883605957, 'ave_value': 1.5602247816917427} step=9234
2022-04-20 16:49.12 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164751/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:49.15 [info     ] TD3PlusBC_20220420164751: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.00029683461663318657, 'time_algorithm_update': 0.008460877234475654, 'critic_loss': 3.720496235012311, 'actor_loss': 0.0764170171834572, 'time_step': 0.008828422479462205, 'td_error': 1.4126265235158373, 'init_value': -9.141498565673828, 'ave_value': 1.5693745563843766} step=9576
2022-04-20 16:49.15 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164751/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:49.19 [info     ] TD3PlusBC_20220420164751: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.0003290601641113995, 'time_algorithm_update': 0.008930691501550507, 'critic_loss': 3.9354877198300167, 'actor_loss': 0.06806365609691854, 'time_step': 0.00933922313110173, 'td_error': 1.44259058576773, 'init_value': -9.594381332397461, 'ave_value': 1.5798906324775301} step=9918
2022-04-20 16:49.19 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164751/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:49.23 [info     ] TD3PlusBC_20220420164751: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.0003328797412894623, 'time_algorithm_update': 0.008932836571632073, 'critic_loss': 4.189833986131768, 'actor_loss': 0.07830925100640944, 'time_step': 0.009348232843722517, 'td_error': 1.488243317823873, 'init_value': -9.883745193481445, 'ave_value': 1.6294052329823911} step=10260
2022-04-20 16:49.23 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164751/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:49.26 [info     ] TD3PlusBC_20220420164751: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.000326940887852719, 'time_algorithm_update': 0.008810729311223616, 'critic_loss': 4.472212287243346, 'actor_loss': 0.07129292266323553, 'time_step': 0.009219088749578821, 'td_error': 1.5400907887919566, 'init_value': -10.28731918334961, 'ave_value': 1.6579982648262972} step=10602
2022-04-20 16:49.26 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164751/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:49.30 [info     ] TD3PlusBC_20220420164751: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.0003309682098745603, 'time_algorithm_update': 0.008538703472293609, 'critic_loss': 4.643598579523856, 'actor_loss': 0.06992899626493454, 'time_step': 0.008950677531504492, 'td_error': 1.5860572339771195, 'init_value': -10.462081909179688, 'ave_value': 1.746776106456538} step=10944
2022-04-20 16:49.30 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164751/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:49.34 [info     ] TD3PlusBC_20220420164751: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.00032792105312235873, 'time_algorithm_update': 0.008899819781208596, 'critic_loss': 4.8940663613074005, 'actor_loss': 0.07709363932933724, 'time_step': 0.00930956999460856, 'td_error': 1.6227347419403484, 'init_value': -10.930598258972168, 'ave_value': 1.7743888542167674} step=11286
2022-04-20 16:49.34 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164751/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:49.37 [info     ] TD3PlusBC_20220420164751: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.0003321603027700681, 'time_algorithm_update': 0.008827211564047295, 'critic_loss': 5.214468773694066, 'actor_loss': 0.08121380771984134, 'time_step': 0.009241946259437249, 'td_error': 1.6725058173181317, 'init_value': -11.1489839553833, 'ave_value': 1.866754910755936} step=11628
2022-04-20 16:49.37 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164751/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:49.41 [info     ] TD3PlusBC_20220420164751: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.00032872484441389115, 'time_algorithm_update': 0.008607515814708687, 'critic_loss': 5.550516601194415, 'actor_loss': 0.07448404158155124, 'time_step': 0.00901416310092859, 'td_error': 1.7167425115917239, 'init_value': -11.717185020446777, 'ave_value': 1.846322038729994} step=11970
2022-04-20 16:49.41 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164751/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:49.45 [info     ] TD3PlusBC_20220420164751: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.00033274170948050874, 'time_algorithm_update': 0.00886151595422399, 'critic_loss': 5.701336035254406, 'actor_loss': 0.07828721859388882, 'time_step': 0.00927447157296521, 'td_error': 1.7477144003773661, 'init_value': -12.210569381713867, 'ave_value': 1.8366743448893497} step=12312
2022-04-20 16:49.45 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164751/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:49.48 [info     ] TD3PlusBC_20220420164751: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.0003230048899064984, 'time_algorithm_update': 0.008427338293421339, 'critic_loss': 6.033471104694389, 'actor_loss': 0.0919418382017236, 'time_step': 0.008832539731298971, 'td_error': 1.795898892932614, 'init_value': -12.640076637268066, 'ave_value': 1.8415695143795598} step=12654
2022-04-20 16:49.48 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164751/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:49.52 [info     ] TD3PlusBC_20220420164751: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.00033163327222679093, 'time_algorithm_update': 0.0089178656974034, 'critic_loss': 6.460561820463828, 'actor_loss': 0.08424086642195606, 'time_step': 0.009328469198349624, 'td_error': 1.85401523168423, 'init_value': -12.529560089111328, 'ave_value': 2.0096266618079026} step=12996
2022-04-20 16:49.52 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164751/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:49.56 [info     ] TD3PlusBC_20220420164751: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.0003322885747541461, 'time_algorithm_update': 0.009034401492068642, 'critic_loss': 6.704987057119782, 'actor_loss': 0.08984048490287268, 'time_step': 0.009445581519812868, 'td_error': 1.8937457687670227, 'init_value': -13.340261459350586, 'ave_value': 2.0006294835609726} step=13338
2022-04-20 16:49.56 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164751/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:49.59 [info     ] TD3PlusBC_20220420164751: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.00032847457461886935, 'time_algorithm_update': 0.008516797545360543, 'critic_loss': 6.917801969232615, 'actor_loss': 0.07850182535704116, 'time_step': 0.008923879840917755, 'td_error': 1.9421020297980172, 'init_value': -13.534184455871582, 'ave_value': 2.078063220137747} step=13680
2022-04-20 16:49.59 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164751/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:50.03 [info     ] TD3PlusBC_20220420164751: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.00033273892095911573, 'time_algorithm_update': 0.009003717996920759, 'critic_loss': 7.123124007244556, 'actor_loss': 0.08359903164687213, 'time_step': 0.00941772488822714, 'td_error': 1.9683728488894376, 'init_value': -13.760993957519531, 'ave_value': 2.136891641231455} step=14022
2022-04-20 16:50.03 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164751/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:50.07 [info     ] TD3PlusBC_20220420164751: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.0003285735671283209, 'time_algorithm_update': 0.008434308202643143, 'critic_loss': 7.4196382586719, 'actor_loss': 0.09716657396645574, 'time_step': 0.008842028372469004, 'td_error': 2.060326300722073, 'init_value': -14.342226028442383, 'ave_value': 2.030711664459878} step=14364
2022-04-20 16:50.07 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164751/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:50.10 [info     ] TD3PlusBC_20220420164751: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.0003357393699779845, 'time_algorithm_update': 0.008969276272065459, 'critic_loss': 7.752020537504676, 'actor_loss': 0.10128720671112774, 'time_step': 0.009385504220661364, 'td_error': 2.1149220829111686, 'init_value': -14.529947280883789, 'ave_value': 2.0885742632131015} step=14706
2022-04-20 16:50.10 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164751/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:50.14 [info     ] TD3PlusBC_20220420164751: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.00032867046824672767, 'time_algorithm_update': 0.008908708193148786, 'critic_loss': 8.096460262237237, 'actor_loss': 0.0966023441120895, 'time_step': 0.009318125178242287, 'td_error': 2.208184747513512, 'init_value': -14.586584091186523, 'ave_value': 2.2050367086905953} step=15048
2022-04-20 16:50.14 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164751/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:50.18 [info     ] TD3PlusBC_20220420164751: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.0003300486949452183, 'time_algorithm_update': 0.008476999070909288, 'critic_loss': 8.28229207218739, 'actor_loss': 0.09210725958671487, 'time_step': 0.0088860737650018, 'td_error': 2.236697050247374, 'init_value': -14.943960189819336, 'ave_value': 2.2762179188331304} step=15390
2022-04-20 16:50.18 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164751/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:50.21 [info     ] TD3PlusBC_20220420164751: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.00033032336430242887, 'time_algorithm_update': 0.008964917813128198, 'critic_loss': 8.768899284259618, 'actor_loss': 0.08670444098132396, 'time_step': 0.009374758653473435, 'td_error': 2.306820585132666, 'init_value': -15.486028671264648, 'ave_value': 2.2351112819417707} step=15732
2022-04-20 16:50.21 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164751/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:50.25 [info     ] TD3PlusBC_20220420164751: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.0003268523522984912, 'time_algorithm_update': 0.00873259285040069, 'critic_loss': 8.971050950518826, 'actor_loss': 0.09803965419792292, 'time_step': 0.009140287923534014, 'td_error': 2.369933383418578, 'init_value': -15.782148361206055, 'ave_value': 2.2390662281101084} step=16074
2022-04-20 16:50.25 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164751/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:50.29 [info     ] TD3PlusBC_20220420164751: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.00033442039935909515, 'time_algorithm_update': 0.00910402320281804, 'critic_loss': 9.256636985212738, 'actor_loss': 0.10261226565255756, 'time_step': 0.009520166798641807, 'td_error': 2.4041664713203947, 'init_value': -16.371295928955078, 'ave_value': 2.293125573032208} step=16416
2022-04-20 16:50.29 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164751/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:50.32 [info     ] TD3PlusBC_20220420164751: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.00032995946226064226, 'time_algorithm_update': 0.008856429691203156, 'critic_loss': 9.510473779419012, 'actor_loss': 0.07681798431695554, 'time_step': 0.009268994916949356, 'td_error': 2.490777878171575, 'init_value': -16.589603424072266, 'ave_value': 2.2890378675353924} step=16758
2022-04-20 16:50.32 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164751/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:50.36 [info     ] TD3PlusBC_20220420164751: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.00032845226644772534, 'time_algorithm_update': 0.00846004625510054, 'critic_loss': 9.837986366790638, 'actor_loss': 0.1156750249060971, 'time_step': 0.008868502594574152, 'td_error': 2.5597307480121305, 'init_value': -16.964685440063477, 'ave_value': 2.3369709978362563} step=17100
2022-04-20 16:50.36 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420164751/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 102 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.28189108e-01 

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 16:50.37 [info     ] FQE_20220420165036: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00015975050179355116, 'time_algorithm_update': 0.005019331552896155, 'loss': 0.00859457967488701, 'time_step': 0.005253055009497218, 'init_value': -0.22861281037330627, 'ave_value': -0.1941654652084417, 'soft_opc': nan} step=166




2022-04-20 16:50.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165036/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:50.38 [info     ] FQE_20220420165036: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00016418566186744045, 'time_algorithm_update': 0.004972279789936112, 'loss': 0.006318041953785592, 'time_step': 0.005210402500198548, 'init_value': -0.36001068353652954, 'ave_value': -0.25604400793468024, 'soft_opc': nan} step=332




2022-04-20 16:50.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165036/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:50.39 [info     ] FQE_20220420165036: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00016511061105383448, 'time_algorithm_update': 0.005173273833401232, 'loss': 0.005591373265081321, 'time_step': 0.005413321127374488, 'init_value': -0.4374358355998993, 'ave_value': -0.31012252748163566, 'soft_opc': nan} step=498




2022-04-20 16:50.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165036/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:50.40 [info     ] FQE_20220420165036: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00016350056751664863, 'time_algorithm_update': 0.005005488912743258, 'loss': 0.005407218891476472, 'time_step': 0.005238782928650637, 'init_value': -0.49614131450653076, 'ave_value': -0.31964467740953306, 'soft_opc': nan} step=664




2022-04-20 16:50.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165036/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:50.41 [info     ] FQE_20220420165036: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.0001625713095607528, 'time_algorithm_update': 0.005040229084980057, 'loss': 0.005000039285710867, 'time_step': 0.005274901907128024, 'init_value': -0.5920383334159851, 'ave_value': -0.3728379250298514, 'soft_opc': nan} step=830




2022-04-20 16:50.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165036/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:50.42 [info     ] FQE_20220420165036: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.0001640693250908909, 'time_algorithm_update': 0.005047423293791622, 'loss': 0.0046933116699871885, 'time_step': 0.005285087838230363, 'init_value': -0.6098383665084839, 'ave_value': -0.3866385679041782, 'soft_opc': nan} step=996




2022-04-20 16:50.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165036/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:50.43 [info     ] FQE_20220420165036: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00016376196619976, 'time_algorithm_update': 0.005003177976033774, 'loss': 0.004561072247697556, 'time_step': 0.005237631050937147, 'init_value': -0.6554650068283081, 'ave_value': -0.40253923494486255, 'soft_opc': nan} step=1162




2022-04-20 16:50.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165036/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:50.44 [info     ] FQE_20220420165036: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00016036378331931242, 'time_algorithm_update': 0.004206868539373559, 'loss': 0.00429847085124712, 'time_step': 0.004441509763878512, 'init_value': -0.6940958499908447, 'ave_value': -0.40857690511489503, 'soft_opc': nan} step=1328




2022-04-20 16:50.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165036/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:50.45 [info     ] FQE_20220420165036: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00016280829188335374, 'time_algorithm_update': 0.005021905324545251, 'loss': 0.004005003702989499, 'time_step': 0.005259826958897602, 'init_value': -0.7833573818206787, 'ave_value': -0.47561829403411965, 'soft_opc': nan} step=1494




2022-04-20 16:50.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165036/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:50.46 [info     ] FQE_20220420165036: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.0001652240753173828, 'time_algorithm_update': 0.005087480487593685, 'loss': 0.003902184293768088, 'time_step': 0.0053284900734223515, 'init_value': -0.853908121585846, 'ave_value': -0.5225696111802717, 'soft_opc': nan} step=1660




2022-04-20 16:50.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165036/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:50.47 [info     ] FQE_20220420165036: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00016141081430825842, 'time_algorithm_update': 0.005121638975947736, 'loss': 0.0039056311417595448, 'time_step': 0.005358747689120741, 'init_value': -0.9358415007591248, 'ave_value': -0.5706222769930154, 'soft_opc': nan} step=1826




2022-04-20 16:50.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165036/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:50.48 [info     ] FQE_20220420165036: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00016218352030558758, 'time_algorithm_update': 0.004978307758469179, 'loss': 0.00372741368658145, 'time_step': 0.005213565137012896, 'init_value': -1.022202491760254, 'ave_value': -0.6363173958031578, 'soft_opc': nan} step=1992




2022-04-20 16:50.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165036/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:50.48 [info     ] FQE_20220420165036: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00016242050262818853, 'time_algorithm_update': 0.005144988197878182, 'loss': 0.0038999583107890584, 'time_step': 0.005379165511533439, 'init_value': -1.095806360244751, 'ave_value': -0.6780644614081662, 'soft_opc': nan} step=2158




2022-04-20 16:50.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165036/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:50.49 [info     ] FQE_20220420165036: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.0001631831548300134, 'time_algorithm_update': 0.005062222480773926, 'loss': 0.00382541472734659, 'time_step': 0.005298999418695289, 'init_value': -1.1495184898376465, 'ave_value': -0.715472441958683, 'soft_opc': nan} step=2324




2022-04-20 16:50.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165036/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:50.50 [info     ] FQE_20220420165036: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.0001584205282739846, 'time_algorithm_update': 0.0050885275185826315, 'loss': 0.003916899412056902, 'time_step': 0.005323954375393419, 'init_value': -1.222306728363037, 'ave_value': -0.7659960406085661, 'soft_opc': nan} step=2490




2022-04-20 16:50.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165036/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:50.51 [info     ] FQE_20220420165036: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00016181727489793157, 'time_algorithm_update': 0.005007730909140713, 'loss': 0.004266564354878368, 'time_step': 0.005245903888380671, 'init_value': -1.3596808910369873, 'ave_value': -0.8669050824736153, 'soft_opc': nan} step=2656




2022-04-20 16:50.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165036/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:50.52 [info     ] FQE_20220420165036: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00015922339565782663, 'time_algorithm_update': 0.004042684313762619, 'loss': 0.00426433048090415, 'time_step': 0.004277459110122129, 'init_value': -1.4414918422698975, 'ave_value': -0.9237902573972672, 'soft_opc': nan} step=2822




2022-04-20 16:50.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165036/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:50.53 [info     ] FQE_20220420165036: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00016484346734472067, 'time_algorithm_update': 0.0049994867968272015, 'loss': 0.004629247685815825, 'time_step': 0.005236437521785139, 'init_value': -1.4742027521133423, 'ave_value': -0.9413625225022032, 'soft_opc': nan} step=2988




2022-04-20 16:50.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165036/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:50.54 [info     ] FQE_20220420165036: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.0001708125493612634, 'time_algorithm_update': 0.005119386925754777, 'loss': 0.004775082959698132, 'time_step': 0.005367882280464632, 'init_value': -1.5753960609436035, 'ave_value': -1.0181322892104183, 'soft_opc': nan} step=3154




2022-04-20 16:50.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165036/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:50.55 [info     ] FQE_20220420165036: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00016271206269781273, 'time_algorithm_update': 0.005012805203357375, 'loss': 0.005131354968551636, 'time_step': 0.00524852936526379, 'init_value': -1.66518235206604, 'ave_value': -1.0691536689663792, 'soft_opc': nan} step=3320




2022-04-20 16:50.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165036/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:50.56 [info     ] FQE_20220420165036: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00016966210790427336, 'time_algorithm_update': 0.005036315286015889, 'loss': 0.0054458422938372985, 'time_step': 0.005279289670737393, 'init_value': -1.7650890350341797, 'ave_value': -1.1511604333111831, 'soft_opc': nan} step=3486




2022-04-20 16:50.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165036/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:50.57 [info     ] FQE_20220420165036: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00016914361930755247, 'time_algorithm_update': 0.005046361900237669, 'loss': 0.006137735081320426, 'time_step': 0.005291467689606081, 'init_value': -1.8444112539291382, 'ave_value': -1.195659678764977, 'soft_opc': nan} step=3652




2022-04-20 16:50.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165036/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:50.58 [info     ] FQE_20220420165036: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00017004415213343608, 'time_algorithm_update': 0.0050068476113928366, 'loss': 0.005832376491547708, 'time_step': 0.005251475127346544, 'init_value': -1.913601279258728, 'ave_value': -1.2146713723671867, 'soft_opc': nan} step=3818




2022-04-20 16:50.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165036/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:50.59 [info     ] FQE_20220420165036: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00016536339219794217, 'time_algorithm_update': 0.005038186728236187, 'loss': 0.006294860569437607, 'time_step': 0.005278798471014184, 'init_value': -1.9630619287490845, 'ave_value': -1.2359629200922477, 'soft_opc': nan} step=3984




2022-04-20 16:50.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165036/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:51.00 [info     ] FQE_20220420165036: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00016772316162844738, 'time_algorithm_update': 0.00509399534708046, 'loss': 0.006816944346906258, 'time_step': 0.005336441189409739, 'init_value': -2.0839576721191406, 'ave_value': -1.32405451993953, 'soft_opc': nan} step=4150




2022-04-20 16:51.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165036/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:51.01 [info     ] FQE_20220420165036: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00016389553805431687, 'time_algorithm_update': 0.004518559180110334, 'loss': 0.007350761949713062, 'time_step': 0.004758422633251512, 'init_value': -2.191514492034912, 'ave_value': -1.3707982530464997, 'soft_opc': nan} step=4316




2022-04-20 16:51.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165036/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:51.02 [info     ] FQE_20220420165036: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.0001619149403399732, 'time_algorithm_update': 0.004835617111389895, 'loss': 0.007887411338742822, 'time_step': 0.0050735947597457705, 'init_value': -2.284010410308838, 'ave_value': -1.4280972882366931, 'soft_opc': nan} step=4482




2022-04-20 16:51.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165036/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:51.03 [info     ] FQE_20220420165036: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00016174546207290097, 'time_algorithm_update': 0.005087151584855045, 'loss': 0.008519765407236922, 'time_step': 0.005325561546417604, 'init_value': -2.4528818130493164, 'ave_value': -1.5740225502902323, 'soft_opc': nan} step=4648




2022-04-20 16:51.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165036/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:51.04 [info     ] FQE_20220420165036: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00016548403774399356, 'time_algorithm_update': 0.005045586321727339, 'loss': 0.008589244330531907, 'time_step': 0.005286038640033768, 'init_value': -2.5175371170043945, 'ave_value': -1.5869140059628466, 'soft_opc': nan} step=4814




2022-04-20 16:51.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165036/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:51.05 [info     ] FQE_20220420165036: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.0001679184925125306, 'time_algorithm_update': 0.00498523338731513, 'loss': 0.009165019988424582, 'time_step': 0.005226498626800905, 'init_value': -2.5918161869049072, 'ave_value': -1.6326525403169898, 'soft_opc': nan} step=4980




2022-04-20 16:51.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165036/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:51.06 [info     ] FQE_20220420165036: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00017170877341764518, 'time_algorithm_update': 0.005131504621850439, 'loss': 0.009665549238330224, 'time_step': 0.005379655275000147, 'init_value': -2.699453115463257, 'ave_value': -1.7173160905773575, 'soft_opc': nan} step=5146




2022-04-20 16:51.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165036/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:51.07 [info     ] FQE_20220420165036: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00016584597438214774, 'time_algorithm_update': 0.005142967384981822, 'loss': 0.01046155277409602, 'time_step': 0.005385090069598462, 'init_value': -2.8179657459259033, 'ave_value': -1.7924970792891743, 'soft_opc': nan} step=5312




2022-04-20 16:51.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165036/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:51.07 [info     ] FQE_20220420165036: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00016613753445177194, 'time_algorithm_update': 0.005089453904025526, 'loss': 0.011357974437071034, 'time_step': 0.005331376948988581, 'init_value': -2.8610403537750244, 'ave_value': -1.7992450253182166, 'soft_opc': nan} step=5478




2022-04-20 16:51.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165036/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:51.08 [info     ] FQE_20220420165036: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.0001651723700833608, 'time_algorithm_update': 0.0050549420965723246, 'loss': 0.01166836715847568, 'time_step': 0.0052944263779973405, 'init_value': -3.023221969604492, 'ave_value': -1.9274790686530037, 'soft_opc': nan} step=5644




2022-04-20 16:51.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165036/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:51.09 [info     ] FQE_20220420165036: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00016543664127947335, 'time_algorithm_update': 0.0050582038350852135, 'loss': 0.011990490777653653, 'time_step': 0.005301521485110363, 'init_value': -3.076699733734131, 'ave_value': -1.9410034525568958, 'soft_opc': nan} step=5810




2022-04-20 16:51.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165036/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:51.10 [info     ] FQE_20220420165036: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.0001617224819688912, 'time_algorithm_update': 0.004428319184176893, 'loss': 0.012762048142872393, 'time_step': 0.004663325217833002, 'init_value': -3.136369466781616, 'ave_value': -1.9683336532532094, 'soft_opc': nan} step=5976




2022-04-20 16:51.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165036/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:51.11 [info     ] FQE_20220420165036: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00016342731843511742, 'time_algorithm_update': 0.005012736263045345, 'loss': 0.013180420666663762, 'time_step': 0.005254660744264901, 'init_value': -3.2143304347991943, 'ave_value': -2.0101560746025933, 'soft_opc': nan} step=6142




2022-04-20 16:51.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165036/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:51.12 [info     ] FQE_20220420165036: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00016735404370779014, 'time_algorithm_update': 0.005047452018921633, 'loss': 0.01406206021889068, 'time_step': 0.005286143486758313, 'init_value': -3.315293312072754, 'ave_value': -2.1030710643744683, 'soft_opc': nan} step=6308




2022-04-20 16:51.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165036/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:51.13 [info     ] FQE_20220420165036: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00016647649098591632, 'time_algorithm_update': 0.005078420581587826, 'loss': 0.014987630538533005, 'time_step': 0.00532311129282756, 'init_value': -3.362175464630127, 'ave_value': -2.110581619963721, 'soft_opc': nan} step=6474




2022-04-20 16:51.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165036/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:51.14 [info     ] FQE_20220420165036: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00016745458166283298, 'time_algorithm_update': 0.005164427929613964, 'loss': 0.015214334178929138, 'time_step': 0.005405575396066688, 'init_value': -3.458643913269043, 'ave_value': -2.1768494312827653, 'soft_opc': nan} step=6640




2022-04-20 16:51.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165036/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:51.15 [info     ] FQE_20220420165036: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00016016414366572737, 'time_algorithm_update': 0.0050776780369770095, 'loss': 0.01568954909593428, 'time_step': 0.005310568464807717, 'init_value': -3.5919628143310547, 'ave_value': -2.2753808936825743, 'soft_opc': nan} step=6806




2022-04-20 16:51.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165036/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:51.16 [info     ] FQE_20220420165036: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00016928724495761366, 'time_algorithm_update': 0.005066229636410633, 'loss': 0.01629895737593859, 'time_step': 0.00531064602265875, 'init_value': -3.584784984588623, 'ave_value': -2.2399442069836564, 'soft_opc': nan} step=6972




2022-04-20 16:51.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165036/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:51.17 [info     ] FQE_20220420165036: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00017121900995093655, 'time_algorithm_update': 0.005100691174886313, 'loss': 0.016815447375453246, 'time_step': 0.00535130500793457, 'init_value': -3.7163147926330566, 'ave_value': -2.2897168359710824, 'soft_opc': nan} step=7138




2022-04-20 16:51.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165036/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:51.18 [info     ] FQE_20220420165036: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00016522694783038404, 'time_algorithm_update': 0.005064247602439788, 'loss': 0.017928676120095313, 'time_step': 0.005301897784313524, 'init_value': -3.72330379486084, 'ave_value': -2.333854986364777, 'soft_opc': nan} step=7304




2022-04-20 16:51.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165036/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:51.19 [info     ] FQE_20220420165036: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00016388979302831442, 'time_algorithm_update': 0.004234249333301222, 'loss': 0.017676093957123225, 'time_step': 0.0044719986168735, 'init_value': -3.7435874938964844, 'ave_value': -2.304410056924229, 'soft_opc': nan} step=7470




2022-04-20 16:51.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165036/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:51.20 [info     ] FQE_20220420165036: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00016558313944253577, 'time_algorithm_update': 0.005035841321370688, 'loss': 0.0181607788916767, 'time_step': 0.005276786275656827, 'init_value': -3.75982403755188, 'ave_value': -2.3129998095870556, 'soft_opc': nan} step=7636




2022-04-20 16:51.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165036/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:51.21 [info     ] FQE_20220420165036: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00017336046839334877, 'time_algorithm_update': 0.005065156752804676, 'loss': 0.019154019744264478, 'time_step': 0.005315000752368605, 'init_value': -3.878558874130249, 'ave_value': -2.394299877549077, 'soft_opc': nan} step=7802




2022-04-20 16:51.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165036/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:51.22 [info     ] FQE_20220420165036: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00016513359115784428, 'time_algorithm_update': 0.005108251629105534, 'loss': 0.020298737376866615, 'time_step': 0.005347139864082796, 'init_value': -3.9678778648376465, 'ave_value': -2.4507049106262824, 'soft_opc': nan} step=7968




2022-04-20 16:51.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165036/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:51.23 [info     ] FQE_20220420165036: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00016814254852662603, 'time_algorithm_update': 0.005014551691262119, 'loss': 0.01951952941129433, 'time_step': 0.005258880465863699, 'init_value': -4.010406017303467, 'ave_value': -2.4998961951013086, 'soft_opc': nan} step=8134




2022-04-20 16:51.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165036/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:51.24 [info     ] FQE_20220420165036: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00016649516232042428, 'time_algorithm_update': 0.004980433418090085, 'loss': 0.020458957720379603, 'time_step': 0.005218589162252036, 'init_value': -4.031389236450195, 'ave_value': -2.4736881966190833, 'soft_opc': nan} step=8300




2022-04-20 16:51.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165036/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.43210892e-01 -3.25999953e-02
 -1.38000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.28049971e-01  6.00000000e-01  4.67532035e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.87189108e-01  2.46000047e-02
 -8.00013420e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.61927906e-02  2.08952959e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.53789108e-01  1.32000047e-02
  8.79998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -9.71586383e-02 -6.00000000e-01 -5.33093061e-02]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.39489108e-01 -4.75999953e-02
 -9.30001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  1.41298638e-01  5.43892365e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.9573146

2022-04-20 16:51.24 [info     ] Directory is created at d3rlpy_logs/FQE_20220420165124
2022-04-20 16:51.24 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 16:51.24 [debug    ] Building models...
2022-04-20 16:51.24 [debug    ] Models have been built.
2022-04-20 16:51.24 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420165124/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 16:51.26 [info     ] FQE_20220420165124: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.000167920145877572, 'time_algorithm_update': 0.0050596761149029395, 'loss': 0.0313744968355655, 'time_step': 0.005302570586980775, 'init_value': -0.9960176348686218, 'ave_value': -0.9902421656828206, 'soft_opc': nan} step=344




2022-04-20 16:51.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165124/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:51.28 [info     ] FQE_20220420165124: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00016689300537109375, 'time_algorithm_update': 0.004631899817045345, 'loss': 0.025910041194820647, 'time_step': 0.004872444757195406, 'init_value': -1.650682806968689, 'ave_value': -1.6355798951207519, 'soft_opc': nan} step=688




2022-04-20 16:51.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165124/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:51.30 [info     ] FQE_20220420165124: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00016820846601974134, 'time_algorithm_update': 0.005029959041018819, 'loss': 0.02814802208233105, 'time_step': 0.005272592916045078, 'init_value': -2.5034821033477783, 'ave_value': -2.4883885083032085, 'soft_opc': nan} step=1032




2022-04-20 16:51.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165124/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:51.32 [info     ] FQE_20220420165124: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00016756806262703829, 'time_algorithm_update': 0.005032907391703406, 'loss': 0.030449142785158093, 'time_step': 0.005276122065477593, 'init_value': -3.099473476409912, 'ave_value': -3.061029066229323, 'soft_opc': nan} step=1376




2022-04-20 16:51.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165124/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:51.34 [info     ] FQE_20220420165124: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00016761935034463572, 'time_algorithm_update': 0.005064345376436101, 'loss': 0.03969779327988278, 'time_step': 0.005309721758199292, 'init_value': -3.819064140319824, 'ave_value': -3.736492874958292, 'soft_opc': nan} step=1720




2022-04-20 16:51.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165124/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:51.36 [info     ] FQE_20220420165124: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00016746964565543242, 'time_algorithm_update': 0.004948880783347196, 'loss': 0.046005305395526594, 'time_step': 0.005194097064262213, 'init_value': -4.465127944946289, 'ave_value': -4.329695166764754, 'soft_opc': nan} step=2064




2022-04-20 16:51.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165124/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:51.38 [info     ] FQE_20220420165124: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00016715568165446437, 'time_algorithm_update': 0.00490571801052537, 'loss': 0.05527317242664393, 'time_step': 0.005147895840711372, 'init_value': -5.117074489593506, 'ave_value': -4.924328578568089, 'soft_opc': nan} step=2408




2022-04-20 16:51.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165124/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:51.40 [info     ] FQE_20220420165124: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00017055106717486714, 'time_algorithm_update': 0.005074609157650969, 'loss': 0.06802629134726039, 'time_step': 0.005323192407918531, 'init_value': -5.784358501434326, 'ave_value': -5.483634709161738, 'soft_opc': nan} step=2752




2022-04-20 16:51.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165124/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:51.42 [info     ] FQE_20220420165124: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00016727073248042616, 'time_algorithm_update': 0.005064206760983134, 'loss': 0.07939031051179438, 'time_step': 0.005309147890224014, 'init_value': -6.144282817840576, 'ave_value': -5.741579189131861, 'soft_opc': nan} step=3096




2022-04-20 16:51.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165124/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:51.44 [info     ] FQE_20220420165124: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00017047274944394133, 'time_algorithm_update': 0.005102144424305405, 'loss': 0.0945054151356047, 'time_step': 0.005350249451260234, 'init_value': -7.0820817947387695, 'ave_value': -6.673122312606895, 'soft_opc': nan} step=3440




2022-04-20 16:51.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165124/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:51.46 [info     ] FQE_20220420165124: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00016841638919919035, 'time_algorithm_update': 0.0045547034851340365, 'loss': 0.10699741701530509, 'time_step': 0.004799675109774568, 'init_value': -7.559825897216797, 'ave_value': -7.0942646267981715, 'soft_opc': nan} step=3784




2022-04-20 16:51.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165124/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:51.48 [info     ] FQE_20220420165124: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00017336704010187195, 'time_algorithm_update': 0.005110356003739113, 'loss': 0.11841445540749403, 'time_step': 0.005361328984415809, 'init_value': -7.933764457702637, 'ave_value': -7.460446411229015, 'soft_opc': nan} step=4128




2022-04-20 16:51.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165124/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:51.50 [info     ] FQE_20220420165124: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00017167038695756778, 'time_algorithm_update': 0.005047293596489485, 'loss': 0.12771774108824863, 'time_step': 0.005295510208883951, 'init_value': -8.354653358459473, 'ave_value': -7.978642818914529, 'soft_opc': nan} step=4472




2022-04-20 16:51.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165124/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:51.52 [info     ] FQE_20220420165124: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.0001697838306427002, 'time_algorithm_update': 0.0051036227581112885, 'loss': 0.1380844161531693, 'time_step': 0.005348008732463039, 'init_value': -8.886701583862305, 'ave_value': -8.528799994299943, 'soft_opc': nan} step=4816




2022-04-20 16:51.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165124/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:51.54 [info     ] FQE_20220420165124: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.0001704096794128418, 'time_algorithm_update': 0.005030866972235746, 'loss': 0.1482351147082396, 'time_step': 0.0052778790163439376, 'init_value': -9.286849021911621, 'ave_value': -8.959260220075056, 'soft_opc': nan} step=5160




2022-04-20 16:51.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165124/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:51.55 [info     ] FQE_20220420165124: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00016720766244932662, 'time_algorithm_update': 0.004813018926354342, 'loss': 0.15482948463799995, 'time_step': 0.005054075357525847, 'init_value': -9.83521842956543, 'ave_value': -9.593223859907468, 'soft_opc': nan} step=5504




2022-04-20 16:51.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165124/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:51.57 [info     ] FQE_20220420165124: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.000166014876476554, 'time_algorithm_update': 0.005040653916292413, 'loss': 0.1586030716141469, 'time_step': 0.0052819681722064355, 'init_value': -9.829280853271484, 'ave_value': -9.622501632380446, 'soft_opc': nan} step=5848




2022-04-20 16:51.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165124/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:51.59 [info     ] FQE_20220420165124: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00016819044601085574, 'time_algorithm_update': 0.005023772633352945, 'loss': 0.16330536050981906, 'time_step': 0.00526738166809082, 'init_value': -10.400228500366211, 'ave_value': -10.277856223048907, 'soft_opc': nan} step=6192




2022-04-20 16:51.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165124/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:52.01 [info     ] FQE_20220420165124: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00017134464064309763, 'time_algorithm_update': 0.00512652133786401, 'loss': 0.16944148323260422, 'time_step': 0.005374860624934352, 'init_value': -10.678041458129883, 'ave_value': -10.688407861918781, 'soft_opc': nan} step=6536




2022-04-20 16:52.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165124/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:52.03 [info     ] FQE_20220420165124: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00016721320706744526, 'time_algorithm_update': 0.004640026841052743, 'loss': 0.17290330572638574, 'time_step': 0.0048842333083929015, 'init_value': -10.808674812316895, 'ave_value': -10.895920708285002, 'soft_opc': nan} step=6880




2022-04-20 16:52.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165124/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:52.05 [info     ] FQE_20220420165124: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00016960848209469817, 'time_algorithm_update': 0.005052852769230687, 'loss': 0.17143773196568324, 'time_step': 0.005295488723488741, 'init_value': -10.978618621826172, 'ave_value': -11.352234914180663, 'soft_opc': nan} step=7224




2022-04-20 16:52.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165124/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:52.07 [info     ] FQE_20220420165124: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00016864718392837878, 'time_algorithm_update': 0.005080548829810564, 'loss': 0.17425951254445798, 'time_step': 0.005327914343323819, 'init_value': -11.157922744750977, 'ave_value': -11.683969157686361, 'soft_opc': nan} step=7568




2022-04-20 16:52.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165124/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:52.09 [info     ] FQE_20220420165124: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00016968818598015365, 'time_algorithm_update': 0.00502658722012542, 'loss': 0.17615766500577676, 'time_step': 0.005276136620100154, 'init_value': -11.490426063537598, 'ave_value': -12.146330710523761, 'soft_opc': nan} step=7912




2022-04-20 16:52.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165124/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:52.11 [info     ] FQE_20220420165124: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.0001679340074228686, 'time_algorithm_update': 0.005072011504062387, 'loss': 0.17951461761041956, 'time_step': 0.0053150903346926666, 'init_value': -11.699897766113281, 'ave_value': -12.480990560339063, 'soft_opc': nan} step=8256




2022-04-20 16:52.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165124/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:52.13 [info     ] FQE_20220420165124: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00016686944074408952, 'time_algorithm_update': 0.004606554674547772, 'loss': 0.18229869458023026, 'time_step': 0.004850544901781304, 'init_value': -11.966545104980469, 'ave_value': -12.782769086144167, 'soft_opc': nan} step=8600




2022-04-20 16:52.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165124/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:52.15 [info     ] FQE_20220420165124: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.0001690200594968574, 'time_algorithm_update': 0.005130005437274312, 'loss': 0.1866451182489305, 'time_step': 0.005373717740524647, 'init_value': -12.250129699707031, 'ave_value': -13.162269574358245, 'soft_opc': nan} step=8944




2022-04-20 16:52.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165124/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:52.17 [info     ] FQE_20220420165124: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00016924738883972168, 'time_algorithm_update': 0.005048564700193183, 'loss': 0.19025523565861202, 'time_step': 0.005295405554216962, 'init_value': -12.624540328979492, 'ave_value': -13.616424468037597, 'soft_opc': nan} step=9288




2022-04-20 16:52.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165124/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:52.19 [info     ] FQE_20220420165124: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00017043393711711085, 'time_algorithm_update': 0.0050672015478444654, 'loss': 0.19293797264676973, 'time_step': 0.005313255759172662, 'init_value': -12.696715354919434, 'ave_value': -13.71250651053474, 'soft_opc': nan} step=9632




2022-04-20 16:52.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165124/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:52.21 [info     ] FQE_20220420165124: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.0001668070637902548, 'time_algorithm_update': 0.0046373224535653755, 'loss': 0.1941612513984965, 'time_step': 0.004880510097326234, 'init_value': -12.936356544494629, 'ave_value': -14.047252331345199, 'soft_opc': nan} step=9976




2022-04-20 16:52.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165124/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:52.23 [info     ] FQE_20220420165124: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00017091077427531398, 'time_algorithm_update': 0.005116695581480514, 'loss': 0.19692447942919855, 'time_step': 0.005363257125366566, 'init_value': -13.293737411499023, 'ave_value': -14.456690213527647, 'soft_opc': nan} step=10320




2022-04-20 16:52.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165124/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:52.25 [info     ] FQE_20220420165124: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00017031056936397108, 'time_algorithm_update': 0.0050957570242327315, 'loss': 0.20253603032595197, 'time_step': 0.005346094453057578, 'init_value': -13.38547420501709, 'ave_value': -14.610407238939425, 'soft_opc': nan} step=10664




2022-04-20 16:52.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165124/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:52.27 [info     ] FQE_20220420165124: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00017060651335605356, 'time_algorithm_update': 0.005081551712612773, 'loss': 0.20644444430810074, 'time_step': 0.005330710217010143, 'init_value': -13.648883819580078, 'ave_value': -14.946510400147039, 'soft_opc': nan} step=11008




2022-04-20 16:52.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165124/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:52.29 [info     ] FQE_20220420165124: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00017073542572731195, 'time_algorithm_update': 0.005113409009090689, 'loss': 0.2118212241127134, 'time_step': 0.005364439515180366, 'init_value': -13.691927909851074, 'ave_value': -14.985531638818584, 'soft_opc': nan} step=11352




2022-04-20 16:52.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165124/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:52.30 [info     ] FQE_20220420165124: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00016573833864788677, 'time_algorithm_update': 0.004577523054078568, 'loss': 0.2123392803951838, 'time_step': 0.004817774129468341, 'init_value': -13.946174621582031, 'ave_value': -15.238287421192046, 'soft_opc': nan} step=11696




2022-04-20 16:52.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165124/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:52.32 [info     ] FQE_20220420165124: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00016834985378176668, 'time_algorithm_update': 0.0050653863784878754, 'loss': 0.2221889070148558, 'time_step': 0.0053098721559657605, 'init_value': -14.060272216796875, 'ave_value': -15.334346568677784, 'soft_opc': nan} step=12040




2022-04-20 16:52.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165124/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:52.34 [info     ] FQE_20220420165124: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.0001719975194265676, 'time_algorithm_update': 0.005092114210128784, 'loss': 0.22463513177624622, 'time_step': 0.005341858364814936, 'init_value': -14.211787223815918, 'ave_value': -15.526239980314543, 'soft_opc': nan} step=12384




2022-04-20 16:52.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165124/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:52.36 [info     ] FQE_20220420165124: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00016789658125056776, 'time_algorithm_update': 0.005103767611259638, 'loss': 0.23094894509589256, 'time_step': 0.005345472762751025, 'init_value': -14.439237594604492, 'ave_value': -15.922792650956811, 'soft_opc': nan} step=12728




2022-04-20 16:52.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165124/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:52.38 [info     ] FQE_20220420165124: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.0001664910205574923, 'time_algorithm_update': 0.00464509600816771, 'loss': 0.2416792355997618, 'time_step': 0.004886821258899777, 'init_value': -14.681367874145508, 'ave_value': -16.192971775868095, 'soft_opc': nan} step=13072




2022-04-20 16:52.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165124/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:52.40 [info     ] FQE_20220420165124: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00016982680143311967, 'time_algorithm_update': 0.005079843277154967, 'loss': 0.24750327414746376, 'time_step': 0.00532568471376286, 'init_value': -14.841651916503906, 'ave_value': -16.56890648853166, 'soft_opc': nan} step=13416




2022-04-20 16:52.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165124/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:52.42 [info     ] FQE_20220420165124: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00016958214515863464, 'time_algorithm_update': 0.005043309788371242, 'loss': 0.2573867606778824, 'time_step': 0.0052899662838425746, 'init_value': -14.843120574951172, 'ave_value': -16.596306416462806, 'soft_opc': nan} step=13760




2022-04-20 16:52.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165124/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:52.44 [info     ] FQE_20220420165124: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00017120879749919093, 'time_algorithm_update': 0.005092430253361546, 'loss': 0.26729044520707673, 'time_step': 0.005342687978300937, 'init_value': -15.243016242980957, 'ave_value': -16.953602820203894, 'soft_opc': nan} step=14104




2022-04-20 16:52.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165124/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:52.46 [info     ] FQE_20220420165124: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00017336773317913677, 'time_algorithm_update': 0.0050498441208240595, 'loss': 0.2772776335098802, 'time_step': 0.005302657914716144, 'init_value': -15.30826473236084, 'ave_value': -17.087264226112403, 'soft_opc': nan} step=14448




2022-04-20 16:52.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165124/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:52.48 [info     ] FQE_20220420165124: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00016687775767126748, 'time_algorithm_update': 0.004634975693946661, 'loss': 0.28337810408908787, 'time_step': 0.004876596290011739, 'init_value': -15.551443099975586, 'ave_value': -17.30032066709922, 'soft_opc': nan} step=14792




2022-04-20 16:52.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165124/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:52.50 [info     ] FQE_20220420165124: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.0001711575097815935, 'time_algorithm_update': 0.00513523539831472, 'loss': 0.2968459787702751, 'time_step': 0.00538590481114942, 'init_value': -15.91556453704834, 'ave_value': -17.548602588297506, 'soft_opc': nan} step=15136




2022-04-20 16:52.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165124/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:52.52 [info     ] FQE_20220420165124: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00017025165779646054, 'time_algorithm_update': 0.005048010238381319, 'loss': 0.30505834079109306, 'time_step': 0.005294038805850717, 'init_value': -15.82596492767334, 'ave_value': -17.524489904592667, 'soft_opc': nan} step=15480




2022-04-20 16:52.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165124/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:52.54 [info     ] FQE_20220420165124: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.0001725949520288512, 'time_algorithm_update': 0.005109780749609304, 'loss': 0.31167705342184404, 'time_step': 0.005362022061680638, 'init_value': -16.249221801757812, 'ave_value': -17.901088121124893, 'soft_opc': nan} step=15824




2022-04-20 16:52.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165124/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:52.56 [info     ] FQE_20220420165124: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00017111107360484989, 'time_algorithm_update': 0.00482412202413692, 'loss': 0.3260007689945226, 'time_step': 0.005072346260381299, 'init_value': -16.59701156616211, 'ave_value': -18.304512958303683, 'soft_opc': nan} step=16168




2022-04-20 16:52.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165124/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:52.58 [info     ] FQE_20220420165124: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.0001712815706119981, 'time_algorithm_update': 0.005136369965797247, 'loss': 0.33297272411792345, 'time_step': 0.005386324815971907, 'init_value': -16.344730377197266, 'ave_value': -18.10483263897718, 'soft_opc': nan} step=16512




2022-04-20 16:52.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165124/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:53.00 [info     ] FQE_20220420165124: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.0001685203507889149, 'time_algorithm_update': 0.005134573509526807, 'loss': 0.3426943480383691, 'time_step': 0.005380858515584191, 'init_value': -16.520973205566406, 'ave_value': -18.058010882663915, 'soft_opc': nan} step=16856




2022-04-20 16:53.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165124/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:53.02 [info     ] FQE_20220420165124: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00016829717990963957, 'time_algorithm_update': 0.00511226681775825, 'loss': 0.347252806591758, 'time_step': 0.005360044712244078, 'init_value': -16.458316802978516, 'ave_value': -18.073086680225956, 'soft_opc': nan} step=17200




2022-04-20 16:53.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165124/model_17200.pt
search iteration:  17
using hyper params:  [0.0017206062563915772, 0.0010163184262266985, 6.902659632440241e-05, 5]
2022-04-20 16:53.02 [debug    ] RoundIterator is selected.
2022-04-20 16:53.02 [info     ] Directory is created at d3rlpy_logs/TD3PlusBC_20220420165302
2022-04-20 16:53.02 [debug    ] Fitting scaler...              scaler=standard
2022-04-20 16:53.02 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 16:53.02 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 16:53.02 [info     ] Parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165302/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.001720606256

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:53.06 [info     ] TD3PlusBC_20220420165302: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.0003762928365963941, 'time_algorithm_update': 0.008486025514658432, 'critic_loss': 15.767337522311518, 'actor_loss': 2.7085539695115117, 'time_step': 0.008944776323106553, 'td_error': 0.9819769409892359, 'init_value': -7.8147101402282715, 'ave_value': -4.788526143209279} step=342
2022-04-20 16:53.06 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165302/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:53.09 [info     ] TD3PlusBC_20220420165302: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.0003713062632153606, 'time_algorithm_update': 0.008901481042828476, 'critic_loss': 3.2787298333575152, 'actor_loss': 2.565386576959264, 'time_step': 0.00934872153209664, 'td_error': 1.0569699013751443, 'init_value': -11.08851432800293, 'ave_value': -6.900892663309938} step=684
2022-04-20 16:53.09 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165302/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:53.13 [info     ] TD3PlusBC_20220420165302: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.0003791998701485974, 'time_algorithm_update': 0.008882984083298354, 'critic_loss': 4.6054281770137315, 'actor_loss': 2.5443844962538336, 'time_step': 0.009344401415328534, 'td_error': 1.261118383420665, 'init_value': -14.72601318359375, 'ave_value': -9.201863807181342} step=1026
2022-04-20 16:53.13 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165302/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:53.17 [info     ] TD3PlusBC_20220420165302: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.000374740327310841, 'time_algorithm_update': 0.008500800495259246, 'critic_loss': 6.307404204418785, 'actor_loss': 2.536774826328657, 'time_step': 0.008957170603568093, 'td_error': 1.5614560241818305, 'init_value': -18.513408660888672, 'ave_value': -11.545468208126698} step=1368
2022-04-20 16:53.17 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165302/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:53.20 [info     ] TD3PlusBC_20220420165302: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.0003741547378183108, 'time_algorithm_update': 0.008815624560529029, 'critic_loss': 8.263518867436906, 'actor_loss': 2.53059506834599, 'time_step': 0.009272775454827917, 'td_error': 1.9160378011562413, 'init_value': -22.04825782775879, 'ave_value': -13.802166332128072} step=1710
2022-04-20 16:53.21 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165302/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:53.24 [info     ] TD3PlusBC_20220420165302: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.0003768909744351928, 'time_algorithm_update': 0.008504736493205467, 'critic_loss': 10.523231319516723, 'actor_loss': 2.526841810572217, 'time_step': 0.008956494387130292, 'td_error': 2.323047077442493, 'init_value': -25.590471267700195, 'ave_value': -16.0483495603631} step=2052
2022-04-20 16:53.24 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165302/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:53.28 [info     ] TD3PlusBC_20220420165302: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.0003752004333406861, 'time_algorithm_update': 0.00891095295287015, 'critic_loss': 12.870102787575526, 'actor_loss': 2.524988727959973, 'time_step': 0.009361460892080564, 'td_error': 2.8226316175421218, 'init_value': -29.20343017578125, 'ave_value': -18.318444716896156} step=2394
2022-04-20 16:53.28 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165302/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:53.32 [info     ] TD3PlusBC_20220420165302: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.00037449493742825694, 'time_algorithm_update': 0.008840853707832202, 'critic_loss': 15.187644096842984, 'actor_loss': 2.523315003043727, 'time_step': 0.009286147809168052, 'td_error': 3.2865107283116046, 'init_value': -32.341880798339844, 'ave_value': -20.349916389049113} step=2736
2022-04-20 16:53.32 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165302/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:53.35 [info     ] TD3PlusBC_20220420165302: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.00037253878967106693, 'time_algorithm_update': 0.008625659329152246, 'critic_loss': 17.697523925736633, 'actor_loss': 2.5228979476014075, 'time_step': 0.009071488129465203, 'td_error': 3.850481464917263, 'init_value': -35.63174819946289, 'ave_value': -22.477533226226086} step=3078
2022-04-20 16:53.35 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165302/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:53.39 [info     ] TD3PlusBC_20220420165302: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.0003812877755416067, 'time_algorithm_update': 0.009021148347018058, 'critic_loss': 20.12764505615011, 'actor_loss': 2.5209860076681214, 'time_step': 0.009477144793460244, 'td_error': 4.384206041301968, 'init_value': -38.47135925292969, 'ave_value': -24.337385817219896} step=3420
2022-04-20 16:53.39 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165302/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:53.42 [info     ] TD3PlusBC_20220420165302: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.0003796927413048103, 'time_algorithm_update': 0.008847745538455003, 'critic_loss': 22.727763032355504, 'actor_loss': 2.5196298298082853, 'time_step': 0.009304826719719068, 'td_error': 4.953167291277135, 'init_value': -41.614601135253906, 'ave_value': -26.306070287528758} step=3762
2022-04-20 16:53.42 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165302/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:53.46 [info     ] TD3PlusBC_20220420165302: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.00038269319032367905, 'time_algorithm_update': 0.009006050595066003, 'critic_loss': 25.340503397043687, 'actor_loss': 2.520245613410459, 'time_step': 0.009469416406419542, 'td_error': 5.522483122041213, 'init_value': -44.36796569824219, 'ave_value': -28.161830251069638} step=4104
2022-04-20 16:53.46 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165302/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:53.50 [info     ] TD3PlusBC_20220420165302: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.0003820183681465729, 'time_algorithm_update': 0.009013527318050987, 'critic_loss': 27.819199707075867, 'actor_loss': 2.5194638361010635, 'time_step': 0.009473047758403578, 'td_error': 6.121345810112182, 'init_value': -47.04148483276367, 'ave_value': -29.945892091078584} step=4446
2022-04-20 16:53.50 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165302/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:53.54 [info     ] TD3PlusBC_20220420165302: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.0003760669663635611, 'time_algorithm_update': 0.008491621379963836, 'critic_loss': 30.433164518479018, 'actor_loss': 2.5188696454142967, 'time_step': 0.008946809155202051, 'td_error': 6.6985279019444155, 'init_value': -49.803932189941406, 'ave_value': -31.666539921493833} step=4788
2022-04-20 16:53.54 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165302/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:53.57 [info     ] TD3PlusBC_20220420165302: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.00037656402030186346, 'time_algorithm_update': 0.009015399810166387, 'critic_loss': 32.94169349001165, 'actor_loss': 2.519937009142156, 'time_step': 0.009466171961778786, 'td_error': 7.237226110496472, 'init_value': -52.291748046875, 'ave_value': -33.255308847321984} step=5130
2022-04-20 16:53.57 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165302/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:54.01 [info     ] TD3PlusBC_20220420165302: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.000377627144082945, 'time_algorithm_update': 0.008946376237255789, 'critic_loss': 35.829970524325006, 'actor_loss': 2.520221205482706, 'time_step': 0.009400291749608446, 'td_error': 7.743571838124061, 'init_value': -54.546417236328125, 'ave_value': -34.788981397593695} step=5472
2022-04-20 16:54.01 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165302/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:54.05 [info     ] TD3PlusBC_20220420165302: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.00038549356293260005, 'time_algorithm_update': 0.008719794234337165, 'critic_loss': 38.79125387626782, 'actor_loss': 2.517525073380498, 'time_step': 0.009181477870160376, 'td_error': 8.300331902247274, 'init_value': -57.034629821777344, 'ave_value': -36.338032226680134} step=5814
2022-04-20 16:54.05 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165302/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:54.08 [info     ] TD3PlusBC_20220420165302: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.0003772492994341934, 'time_algorithm_update': 0.00893523609429075, 'critic_loss': 41.88979833167896, 'actor_loss': 2.5202350574627257, 'time_step': 0.009389584524589673, 'td_error': 8.781524942771384, 'init_value': -59.06377029418945, 'ave_value': -37.717735984646424} step=6156
2022-04-20 16:54.09 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165302/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:54.12 [info     ] TD3PlusBC_20220420165302: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.0003777101025943868, 'time_algorithm_update': 0.008593930835612336, 'critic_loss': 45.33070748870136, 'actor_loss': 2.519897391224465, 'time_step': 0.00905064462918287, 'td_error': 9.293758974044609, 'init_value': -61.16472244262695, 'ave_value': -39.15368570713796} step=6498
2022-04-20 16:54.12 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165302/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:54.16 [info     ] TD3PlusBC_20220420165302: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.00037822249340035063, 'time_algorithm_update': 0.008960626975834718, 'critic_loss': 48.614070267705195, 'actor_loss': 2.5193333946473415, 'time_step': 0.009417669117799279, 'td_error': 9.649670199131094, 'init_value': -63.442344665527344, 'ave_value': -40.51373033831618} step=6840
2022-04-20 16:54.16 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165302/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:54.20 [info     ] TD3PlusBC_20220420165302: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.0003752101931655616, 'time_algorithm_update': 0.008883180674056562, 'critic_loss': 52.16771652824, 'actor_loss': 2.5205509244349966, 'time_step': 0.009339225222492776, 'td_error': 10.188564179022208, 'init_value': -65.23786926269531, 'ave_value': -41.90471749035096} step=7182
2022-04-20 16:54.20 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165302/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:54.23 [info     ] TD3PlusBC_20220420165302: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.0003781067697625411, 'time_algorithm_update': 0.008754622866535744, 'critic_loss': 55.75970402098539, 'actor_loss': 2.5208418006785434, 'time_step': 0.009210019780878435, 'td_error': 10.515223563076859, 'init_value': -66.76536560058594, 'ave_value': -42.91005024488168} step=7524
2022-04-20 16:54.23 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165302/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:54.27 [info     ] TD3PlusBC_20220420165302: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.00037839886737845796, 'time_algorithm_update': 0.008898329316524038, 'critic_loss': 59.52831174616228, 'actor_loss': 2.521077949401231, 'time_step': 0.009356690429107488, 'td_error': 11.045778458868858, 'init_value': -69.14937591552734, 'ave_value': -44.3606764012013} step=7866
2022-04-20 16:54.27 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165302/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:54.31 [info     ] TD3PlusBC_20220420165302: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.000377470986884937, 'time_algorithm_update': 0.008516776631450096, 'critic_loss': 63.40089514102155, 'actor_loss': 2.5202119322548135, 'time_step': 0.008972480980276365, 'td_error': 11.32721182414396, 'init_value': -70.8509292602539, 'ave_value': -45.38138922628594} step=8208
2022-04-20 16:54.31 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165302/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:54.34 [info     ] TD3PlusBC_20220420165302: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.0003754116638362059, 'time_algorithm_update': 0.008913666881315889, 'critic_loss': 67.10678080508583, 'actor_loss': 2.52148699760437, 'time_step': 0.00936782848068148, 'td_error': 11.712124313435881, 'init_value': -72.30207824707031, 'ave_value': -46.41966632448615} step=8550
2022-04-20 16:54.34 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165302/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:54.38 [info     ] TD3PlusBC_20220420165302: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.0003757783543993855, 'time_algorithm_update': 0.008957031177498444, 'critic_loss': 70.9714186930517, 'actor_loss': 2.521393276794612, 'time_step': 0.009412996950205307, 'td_error': 12.046045069599451, 'init_value': -73.72615814208984, 'ave_value': -47.42904675815581} step=8892
2022-04-20 16:54.38 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165302/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:54.42 [info     ] TD3PlusBC_20220420165302: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.00037926888605307416, 'time_algorithm_update': 0.008706535512243796, 'critic_loss': 74.97712852522643, 'actor_loss': 2.5208135473797895, 'time_step': 0.009163140553480003, 'td_error': 12.427195565369837, 'init_value': -75.41988372802734, 'ave_value': -48.606330445776315} step=9234
2022-04-20 16:54.42 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165302/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:54.46 [info     ] TD3PlusBC_20220420165302: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.0003772158371774774, 'time_algorithm_update': 0.008961462137991922, 'critic_loss': 79.092466209367, 'actor_loss': 2.5221274066389654, 'time_step': 0.009419535335741545, 'td_error': 12.81032699420789, 'init_value': -76.97476196289062, 'ave_value': -49.57007098222034} step=9576
2022-04-20 16:54.46 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165302/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:54.49 [info     ] TD3PlusBC_20220420165302: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.00037738314846105743, 'time_algorithm_update': 0.008644368216308236, 'critic_loss': 83.14417224460178, 'actor_loss': 2.5230444021392286, 'time_step': 0.009103579827916552, 'td_error': 12.963307179376393, 'init_value': -77.93067932128906, 'ave_value': -50.23869395466647} step=9918
2022-04-20 16:54.49 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165302/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:54.53 [info     ] TD3PlusBC_20220420165302: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.0003781423234103019, 'time_algorithm_update': 0.00892202895984315, 'critic_loss': 87.30467130426774, 'actor_loss': 2.5238312099412172, 'time_step': 0.009382615312498215, 'td_error': 13.11075409895861, 'init_value': -78.9329605102539, 'ave_value': -51.07890415658369} step=10260
2022-04-20 16:54.53 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165302/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:54.57 [info     ] TD3PlusBC_20220420165302: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.00037947244811476323, 'time_algorithm_update': 0.008902625033729955, 'critic_loss': 91.7604562190541, 'actor_loss': 2.523468321526957, 'time_step': 0.009360894822237785, 'td_error': 13.4484008172417, 'init_value': -79.94306182861328, 'ave_value': -52.07275382652711} step=10602
2022-04-20 16:54.57 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165302/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:55.00 [info     ] TD3PlusBC_20220420165302: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.0003738473033347325, 'time_algorithm_update': 0.008550245162339239, 'critic_loss': 96.01802967584621, 'actor_loss': 2.5248264862082856, 'time_step': 0.009003951535587423, 'td_error': 13.561804635265519, 'init_value': -80.56501770019531, 'ave_value': -52.60453702659086} step=10944
2022-04-20 16:55.00 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165302/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:55.04 [info     ] TD3PlusBC_20220420165302: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.00037225505761932906, 'time_algorithm_update': 0.008968938860976905, 'critic_loss': 100.46053546213965, 'actor_loss': 2.525039661697477, 'time_step': 0.00941711071639033, 'td_error': 13.75682026692522, 'init_value': -82.29830169677734, 'ave_value': -53.633137179037035} step=11286
2022-04-20 16:55.04 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165302/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:55.08 [info     ] TD3PlusBC_20220420165302: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.00037568215041132696, 'time_algorithm_update': 0.008471372531868561, 'critic_loss': 105.00283291465358, 'actor_loss': 2.5251016644706503, 'time_step': 0.008913504449944747, 'td_error': 14.040837325592806, 'init_value': -83.26394653320312, 'ave_value': -54.32869757048752} step=11628
2022-04-20 16:55.08 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165302/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:55.11 [info     ] TD3PlusBC_20220420165302: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.00037790669335259334, 'time_algorithm_update': 0.008865576041372199, 'critic_loss': 109.44810307374475, 'actor_loss': 2.526626543691981, 'time_step': 0.009305963739317063, 'td_error': 14.145575080174405, 'init_value': -83.69761657714844, 'ave_value': -54.857588120479775} step=11970
2022-04-20 16:55.11 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165302/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:55.15 [info     ] TD3PlusBC_20220420165302: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.000379608388532672, 'time_algorithm_update': 0.008993604726958693, 'critic_loss': 113.86250412255002, 'actor_loss': 2.525916223637542, 'time_step': 0.00943777644843386, 'td_error': 14.107581890221766, 'init_value': -84.01873779296875, 'ave_value': -55.376321815929735} step=12312
2022-04-20 16:55.15 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165302/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:55.19 [info     ] TD3PlusBC_20220420165302: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.0003783089375635337, 'time_algorithm_update': 0.008550795198184007, 'critic_loss': 118.59136445341055, 'actor_loss': 2.5281720496060553, 'time_step': 0.008993911464311923, 'td_error': 14.274250331619474, 'init_value': -85.15336608886719, 'ave_value': -56.06054056019086} step=12654
2022-04-20 16:55.19 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165302/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:55.23 [info     ] TD3PlusBC_20220420165302: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.00037330354166309736, 'time_algorithm_update': 0.009129000686065496, 'critic_loss': 123.3195141351711, 'actor_loss': 2.528301399353652, 'time_step': 0.00956590273226911, 'td_error': 14.354384450604247, 'init_value': -85.89854431152344, 'ave_value': -56.745467056313544} step=12996
2022-04-20 16:55.23 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165302/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:55.26 [info     ] TD3PlusBC_20220420165302: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.0003722299609267921, 'time_algorithm_update': 0.008517767950805307, 'critic_loss': 127.94685323614823, 'actor_loss': 2.528564830969649, 'time_step': 0.008955527467337267, 'td_error': 14.373652232193242, 'init_value': -86.092041015625, 'ave_value': -57.20040089680025} step=13338
2022-04-20 16:55.26 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165302/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:55.30 [info     ] TD3PlusBC_20220420165302: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.00038112464704011614, 'time_algorithm_update': 0.009040960791515328, 'critic_loss': 132.59889004802147, 'actor_loss': 2.527454960415935, 'time_step': 0.009487966347856132, 'td_error': 14.51970301783052, 'init_value': -86.70475769042969, 'ave_value': -57.696726748944755} step=13680
2022-04-20 16:55.30 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165302/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:55.34 [info     ] TD3PlusBC_20220420165302: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.000373834754988464, 'time_algorithm_update': 0.008965949566043609, 'critic_loss': 136.92589276854756, 'actor_loss': 2.5281581404613473, 'time_step': 0.009407399690639206, 'td_error': 14.572221865012953, 'init_value': -87.50575256347656, 'ave_value': -58.44858968146968} step=14022
2022-04-20 16:55.34 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165302/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:55.37 [info     ] TD3PlusBC_20220420165302: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.00037151121953774615, 'time_algorithm_update': 0.008530695535983259, 'critic_loss': 141.46092926828484, 'actor_loss': 2.5300856403440064, 'time_step': 0.008966662730389869, 'td_error': 14.483433325968212, 'init_value': -87.782470703125, 'ave_value': -58.84433469525018} step=14364
2022-04-20 16:55.37 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165302/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:55.41 [info     ] TD3PlusBC_20220420165302: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.00037600771028395983, 'time_algorithm_update': 0.0088621691653603, 'critic_loss': 145.6808364600466, 'actor_loss': 2.5308390890645702, 'time_step': 0.009302597296865363, 'td_error': 14.728909645018465, 'init_value': -88.2918930053711, 'ave_value': -59.301318776289854} step=14706
2022-04-20 16:55.41 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165302/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:55.45 [info     ] TD3PlusBC_20220420165302: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.0003735475372849849, 'time_algorithm_update': 0.00850132055449904, 'critic_loss': 149.50551395527802, 'actor_loss': 2.5303359185045924, 'time_step': 0.008935470330087762, 'td_error': 14.54098620625615, 'init_value': -88.14427185058594, 'ave_value': -59.47245811302198} step=15048
2022-04-20 16:55.45 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165302/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:55.48 [info     ] TD3PlusBC_20220420165302: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.00037998135326898583, 'time_algorithm_update': 0.008887498002303275, 'critic_loss': 153.28193022075453, 'actor_loss': 2.5300237853624665, 'time_step': 0.009332755155730666, 'td_error': 14.51180399828932, 'init_value': -88.50234985351562, 'ave_value': -59.92434046551753} step=15390
2022-04-20 16:55.48 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165302/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:55.52 [info     ] TD3PlusBC_20220420165302: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.00037749608357747394, 'time_algorithm_update': 0.00903742215786761, 'critic_loss': 156.64565698724044, 'actor_loss': 2.530922235801206, 'time_step': 0.009483510290670115, 'td_error': 14.553859333557416, 'init_value': -88.92512512207031, 'ave_value': -60.47318329536297} step=15732
2022-04-20 16:55.52 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165302/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:55.56 [info     ] TD3PlusBC_20220420165302: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.0003725164814999229, 'time_algorithm_update': 0.008559147516886393, 'critic_loss': 159.8120660614549, 'actor_loss': 2.5307207246970016, 'time_step': 0.008997595101072077, 'td_error': 14.480764958792406, 'init_value': -88.98278045654297, 'ave_value': -60.61751209091781} step=16074
2022-04-20 16:55.56 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165302/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:56.00 [info     ] TD3PlusBC_20220420165302: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.00037786416840135005, 'time_algorithm_update': 0.00892168248606007, 'critic_loss': 162.48922428331878, 'actor_loss': 2.531890574951618, 'time_step': 0.009363628967463622, 'td_error': 14.74065507788289, 'init_value': -90.02532958984375, 'ave_value': -61.315034848206956} step=16416
2022-04-20 16:56.00 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165302/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:56.03 [info     ] TD3PlusBC_20220420165302: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.0003761868727834601, 'time_algorithm_update': 0.008618342249017013, 'critic_loss': 164.78674767031308, 'actor_loss': 2.5324351564485426, 'time_step': 0.009060594770643447, 'td_error': 14.767322832320293, 'init_value': -89.79728698730469, 'ave_value': -61.43006425147356} step=16758
2022-04-20 16:56.03 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165302/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:56.07 [info     ] TD3PlusBC_20220420165302: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.0003754437318322254, 'time_algorithm_update': 0.008977035332841483, 'critic_loss': 166.89845751042952, 'actor_loss': 2.5329806637345698, 'time_step': 0.009423228035196227, 'td_error': 14.878675492718013, 'init_value': -89.84991455078125, 'ave_value': -61.73159816249005} step=17100
2022-04-20 16:56.07 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165302/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 102 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.28189108e-01 

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 16:56.08 [info     ] FQE_20220420165607: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.0001707349915102304, 'time_algorithm_update': 0.005122441843331578, 'loss': 0.008000704472469368, 'time_step': 0.005369331463273749, 'init_value': -0.4547044634819031, 'ave_value': -0.4126867841023046, 'soft_opc': nan} step=166




2022-04-20 16:56.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165607/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.09 [info     ] FQE_20220420165607: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.0001671184976416898, 'time_algorithm_update': 0.0050071535340274675, 'loss': 0.005913924662878804, 'time_step': 0.0052487074610698655, 'init_value': -0.5903527140617371, 'ave_value': -0.4989933233271848, 'soft_opc': nan} step=332




2022-04-20 16:56.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165607/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.10 [info     ] FQE_20220420165607: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00017245706305446396, 'time_algorithm_update': 0.005010907908520067, 'loss': 0.005403034482992826, 'time_step': 0.0052541523094636845, 'init_value': -0.646612286567688, 'ave_value': -0.5125288960095998, 'soft_opc': nan} step=498




2022-04-20 16:56.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165607/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.11 [info     ] FQE_20220420165607: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00016753932079636907, 'time_algorithm_update': 0.0050541176853409735, 'loss': 0.005182360848748541, 'time_step': 0.0052950195519320935, 'init_value': -0.7283302545547485, 'ave_value': -0.5545111517849807, 'soft_opc': nan} step=664




2022-04-20 16:56.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165607/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.12 [info     ] FQE_20220420165607: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00016778348440147308, 'time_algorithm_update': 0.004360088382858828, 'loss': 0.004808693111548762, 'time_step': 0.004601633692362222, 'init_value': -0.7689458727836609, 'ave_value': -0.554959379613131, 'soft_opc': nan} step=830




2022-04-20 16:56.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165607/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.13 [info     ] FQE_20220420165607: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.0001728735774396414, 'time_algorithm_update': 0.005161325615572642, 'loss': 0.004433952013860984, 'time_step': 0.005405762109411768, 'init_value': -0.8191359043121338, 'ave_value': -0.5861822664435651, 'soft_opc': nan} step=996




2022-04-20 16:56.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165607/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.14 [info     ] FQE_20220420165607: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.0001719026680452278, 'time_algorithm_update': 0.005073311817215149, 'loss': 0.004289711459472506, 'time_step': 0.0053173375416951, 'init_value': -0.8914704918861389, 'ave_value': -0.6364199237444916, 'soft_opc': nan} step=1162




2022-04-20 16:56.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165607/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.15 [info     ] FQE_20220420165607: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00017150626125105893, 'time_algorithm_update': 0.005070914705115628, 'loss': 0.0041449461377741975, 'time_step': 0.005317155137119523, 'init_value': -0.9605887532234192, 'ave_value': -0.6796417772501439, 'soft_opc': nan} step=1328




2022-04-20 16:56.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165607/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.16 [info     ] FQE_20220420165607: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00017138848821800877, 'time_algorithm_update': 0.005072735878358404, 'loss': 0.003949200194497335, 'time_step': 0.00531576340457043, 'init_value': -1.0305914878845215, 'ave_value': -0.72733249857619, 'soft_opc': nan} step=1494




2022-04-20 16:56.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165607/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.17 [info     ] FQE_20220420165607: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00016688438783209008, 'time_algorithm_update': 0.005128915051379836, 'loss': 0.004064292266140187, 'time_step': 0.005371003265840462, 'init_value': -1.1231045722961426, 'ave_value': -0.8047109606633852, 'soft_opc': nan} step=1660




2022-04-20 16:56.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165607/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.18 [info     ] FQE_20220420165607: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.0001686610371233469, 'time_algorithm_update': 0.00508312575788383, 'loss': 0.003973076037967196, 'time_step': 0.005327135683542274, 'init_value': -1.18263840675354, 'ave_value': -0.8364275583074436, 'soft_opc': nan} step=1826




2022-04-20 16:56.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165607/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.19 [info     ] FQE_20220420165607: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.0001652226390608822, 'time_algorithm_update': 0.005038969488029021, 'loss': 0.003870961199520178, 'time_step': 0.005280557885227433, 'init_value': -1.2109073400497437, 'ave_value': -0.847757026580003, 'soft_opc': nan} step=1992




2022-04-20 16:56.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165607/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.19 [info     ] FQE_20220420165607: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.0001726437763995435, 'time_algorithm_update': 0.005061047622956425, 'loss': 0.004201142398601241, 'time_step': 0.0053076341927769674, 'init_value': -1.3228098154067993, 'ave_value': -0.9228174980025033, 'soft_opc': nan} step=2158




2022-04-20 16:56.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165607/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.20 [info     ] FQE_20220420165607: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.0001685389553207949, 'time_algorithm_update': 0.004704616155969091, 'loss': 0.004421074874699116, 'time_step': 0.004945212099925581, 'init_value': -1.4384572505950928, 'ave_value': -1.0262145882791227, 'soft_opc': nan} step=2324




2022-04-20 16:56.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165607/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.21 [info     ] FQE_20220420165607: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00017165563192712255, 'time_algorithm_update': 0.00479881734733122, 'loss': 0.0045480533714787694, 'time_step': 0.005042907703353698, 'init_value': -1.5298595428466797, 'ave_value': -1.0796789676219494, 'soft_opc': nan} step=2490




2022-04-20 16:56.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165607/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.22 [info     ] FQE_20220420165607: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00017059711088617164, 'time_algorithm_update': 0.005060269171933094, 'loss': 0.005192449862692864, 'time_step': 0.005306219480123864, 'init_value': -1.629712462425232, 'ave_value': -1.1378482817730926, 'soft_opc': nan} step=2656




2022-04-20 16:56.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165607/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.23 [info     ] FQE_20220420165607: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.0001664736184729151, 'time_algorithm_update': 0.005017668367868446, 'loss': 0.005552637494584343, 'time_step': 0.005258686571236116, 'init_value': -1.7583613395690918, 'ave_value': -1.2385338698287267, 'soft_opc': nan} step=2822




2022-04-20 16:56.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165607/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.24 [info     ] FQE_20220420165607: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00017076084412724138, 'time_algorithm_update': 0.005088300590055534, 'loss': 0.005942218357007053, 'time_step': 0.005332643727222121, 'init_value': -1.8187360763549805, 'ave_value': -1.2683888109954629, 'soft_opc': nan} step=2988




2022-04-20 16:56.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165607/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.25 [info     ] FQE_20220420165607: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00017023517424801746, 'time_algorithm_update': 0.005021721483713173, 'loss': 0.006291095382696669, 'time_step': 0.0052667267351265415, 'init_value': -1.9590067863464355, 'ave_value': -1.376476179714407, 'soft_opc': nan} step=3154




2022-04-20 16:56.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165607/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.26 [info     ] FQE_20220420165607: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00016991632530488163, 'time_algorithm_update': 0.005124367863298899, 'loss': 0.0068068903595972135, 'time_step': 0.005369579935648355, 'init_value': -2.1223244667053223, 'ave_value': -1.4935967822936742, 'soft_opc': nan} step=3320




2022-04-20 16:56.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165607/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.27 [info     ] FQE_20220420165607: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00016950411968920604, 'time_algorithm_update': 0.005137858620609145, 'loss': 0.007315434888566294, 'time_step': 0.005379587770944618, 'init_value': -2.1850967407226562, 'ave_value': -1.519656121516013, 'soft_opc': nan} step=3486




2022-04-20 16:56.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165607/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.28 [info     ] FQE_20220420165607: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00017315795622676252, 'time_algorithm_update': 0.0050804758646402015, 'loss': 0.00824033179676654, 'time_step': 0.005326713424131095, 'init_value': -2.2241857051849365, 'ave_value': -1.545946602697845, 'soft_opc': nan} step=3652




2022-04-20 16:56.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165607/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.29 [info     ] FQE_20220420165607: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00016914074679455124, 'time_algorithm_update': 0.004953041134110416, 'loss': 0.008819156611781478, 'time_step': 0.005194921091378453, 'init_value': -2.44280743598938, 'ave_value': -1.688266266781736, 'soft_opc': nan} step=3818




2022-04-20 16:56.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165607/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.30 [info     ] FQE_20220420165607: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00016955582492322806, 'time_algorithm_update': 0.004338303244257548, 'loss': 0.008929341078416097, 'time_step': 0.004581564880279173, 'init_value': -2.462615728378296, 'ave_value': -1.675799676886684, 'soft_opc': nan} step=3984




2022-04-20 16:56.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165607/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.31 [info     ] FQE_20220420165607: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00017606924815350268, 'time_algorithm_update': 0.005099269280950707, 'loss': 0.009654716874685991, 'time_step': 0.005353060113378318, 'init_value': -2.5577569007873535, 'ave_value': -1.7760889598848046, 'soft_opc': nan} step=4150




2022-04-20 16:56.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165607/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.32 [info     ] FQE_20220420165607: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00017144737473453385, 'time_algorithm_update': 0.005025129720389125, 'loss': 0.010301144508201718, 'time_step': 0.005273778754544546, 'init_value': -2.6791553497314453, 'ave_value': -1.886784721612259, 'soft_opc': nan} step=4316




2022-04-20 16:56.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165607/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.33 [info     ] FQE_20220420165607: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00016616769583828478, 'time_algorithm_update': 0.005031966301332037, 'loss': 0.010759942479016462, 'time_step': 0.005271927419915257, 'init_value': -2.776212692260742, 'ave_value': -1.9160441226183293, 'soft_opc': nan} step=4482




2022-04-20 16:56.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165607/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.34 [info     ] FQE_20220420165607: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.0001717935125511813, 'time_algorithm_update': 0.005119931266968508, 'loss': 0.012095333050364202, 'time_step': 0.005365038492593421, 'init_value': -2.893609046936035, 'ave_value': -1.977141115292571, 'soft_opc': nan} step=4648




2022-04-20 16:56.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165607/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.35 [info     ] FQE_20220420165607: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00016916659941156227, 'time_algorithm_update': 0.0050370391592921985, 'loss': 0.011867102777658322, 'time_step': 0.005282031484397061, 'init_value': -2.9361000061035156, 'ave_value': -2.03036247349705, 'soft_opc': nan} step=4814




2022-04-20 16:56.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165607/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.36 [info     ] FQE_20220420165607: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00017768503671669098, 'time_algorithm_update': 0.005161002457860005, 'loss': 0.01229694845184336, 'time_step': 0.005411843219435358, 'init_value': -3.0598065853118896, 'ave_value': -2.119088576153458, 'soft_opc': nan} step=4980




2022-04-20 16:56.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165607/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.37 [info     ] FQE_20220420165607: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00017011309244546546, 'time_algorithm_update': 0.005053120923329549, 'loss': 0.013412800051569265, 'time_step': 0.005298312888087997, 'init_value': -3.175241470336914, 'ave_value': -2.191014244286595, 'soft_opc': nan} step=5146




2022-04-20 16:56.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165607/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.38 [info     ] FQE_20220420165607: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00016681688377656132, 'time_algorithm_update': 0.005039361586053687, 'loss': 0.013661268641274557, 'time_step': 0.0052805665027664365, 'init_value': -3.25410795211792, 'ave_value': -2.233423521829484, 'soft_opc': nan} step=5312




2022-04-20 16:56.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165607/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.38 [info     ] FQE_20220420165607: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00016057634928140296, 'time_algorithm_update': 0.003214681004903403, 'loss': 0.014124507477215255, 'time_step': 0.0034433603286743164, 'init_value': -3.2700042724609375, 'ave_value': -2.2291086974318834, 'soft_opc': nan} step=5478




2022-04-20 16:56.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165607/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.39 [info     ] FQE_20220420165607: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00016546105763998376, 'time_algorithm_update': 0.0035602127213075936, 'loss': 0.015192734510273146, 'time_step': 0.0038029242710894847, 'init_value': -3.4522383213043213, 'ave_value': -2.355142327740386, 'soft_opc': nan} step=5644




2022-04-20 16:56.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165607/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.40 [info     ] FQE_20220420165607: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00016574830894010612, 'time_algorithm_update': 0.003527974507894861, 'loss': 0.015918156615997024, 'time_step': 0.003768765782735434, 'init_value': -3.5536081790924072, 'ave_value': -2.437115252642213, 'soft_opc': nan} step=5810




2022-04-20 16:56.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165607/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.40 [info     ] FQE_20220420165607: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00016696912696562618, 'time_algorithm_update': 0.003518342971801758, 'loss': 0.01637596512390076, 'time_step': 0.0037576419761381954, 'init_value': -3.6508371829986572, 'ave_value': -2.500849037157657, 'soft_opc': nan} step=5976




2022-04-20 16:56.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165607/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.41 [info     ] FQE_20220420165607: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00016551850790000823, 'time_algorithm_update': 0.0035560590675078243, 'loss': 0.017102033554868078, 'time_step': 0.003797921789697854, 'init_value': -3.7747721672058105, 'ave_value': -2.59753087428776, 'soft_opc': nan} step=6142




2022-04-20 16:56.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165607/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.42 [info     ] FQE_20220420165607: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.0001660714666527438, 'time_algorithm_update': 0.0035979575421436726, 'loss': 0.01785516648424273, 'time_step': 0.0038382274558745235, 'init_value': -3.896489143371582, 'ave_value': -2.6321964846303065, 'soft_opc': nan} step=6308




2022-04-20 16:56.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165607/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.43 [info     ] FQE_20220420165607: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00016374903989125448, 'time_algorithm_update': 0.0035523879958922603, 'loss': 0.018883592924608075, 'time_step': 0.003786961716341685, 'init_value': -4.012533187866211, 'ave_value': -2.715085019110828, 'soft_opc': nan} step=6474




2022-04-20 16:56.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165607/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.43 [info     ] FQE_20220420165607: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00016041692480983505, 'time_algorithm_update': 0.003419145044074001, 'loss': 0.019657941622777383, 'time_step': 0.0036497690591467432, 'init_value': -4.072486877441406, 'ave_value': -2.7278789890040684, 'soft_opc': nan} step=6640




2022-04-20 16:56.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165607/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.44 [info     ] FQE_20220420165607: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00016585171940815017, 'time_algorithm_update': 0.003493237208171063, 'loss': 0.02013236088383696, 'time_step': 0.0037311574062669135, 'init_value': -4.1863861083984375, 'ave_value': -2.7987008279139127, 'soft_opc': nan} step=6806




2022-04-20 16:56.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165607/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.45 [info     ] FQE_20220420165607: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00016691311296210232, 'time_algorithm_update': 0.0034891797835568347, 'loss': 0.02082121359459293, 'time_step': 0.003733015922178705, 'init_value': -4.231308460235596, 'ave_value': -2.7709673217220887, 'soft_opc': nan} step=6972




2022-04-20 16:56.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165607/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.45 [info     ] FQE_20220420165607: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00016531599573342196, 'time_algorithm_update': 0.0035421747759164096, 'loss': 0.022215596447889525, 'time_step': 0.0037787721817751966, 'init_value': -4.383822917938232, 'ave_value': -2.8803313718722747, 'soft_opc': nan} step=7138




2022-04-20 16:56.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165607/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.46 [info     ] FQE_20220420165607: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00016543089625347093, 'time_algorithm_update': 0.0035373906055128717, 'loss': 0.022180304467027265, 'time_step': 0.003773886037160115, 'init_value': -4.435673713684082, 'ave_value': -2.937992631966198, 'soft_opc': nan} step=7304




2022-04-20 16:56.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165607/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.47 [info     ] FQE_20220420165607: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00016469122415565583, 'time_algorithm_update': 0.0034794017493006693, 'loss': 0.023319571668910515, 'time_step': 0.003715285335678652, 'init_value': -4.518271446228027, 'ave_value': -2.976165268147314, 'soft_opc': nan} step=7470




2022-04-20 16:56.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165607/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.47 [info     ] FQE_20220420165607: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00016695476440062006, 'time_algorithm_update': 0.003437866647559476, 'loss': 0.02330932905616979, 'time_step': 0.0036811211022985987, 'init_value': -4.557020664215088, 'ave_value': -2.9383728170247227, 'soft_opc': nan} step=7636




2022-04-20 16:56.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165607/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.48 [info     ] FQE_20220420165607: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00016512640987534122, 'time_algorithm_update': 0.0036111395043062875, 'loss': 0.023604691295087875, 'time_step': 0.0038498280996299653, 'init_value': -4.548664569854736, 'ave_value': -2.91091405337861, 'soft_opc': nan} step=7802




2022-04-20 16:56.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165607/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.49 [info     ] FQE_20220420165607: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.0001630826168749706, 'time_algorithm_update': 0.0035552648176629858, 'loss': 0.02457425743131057, 'time_step': 0.0037930054836962596, 'init_value': -4.632541656494141, 'ave_value': -2.9879642639476973, 'soft_opc': nan} step=7968




2022-04-20 16:56.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165607/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.50 [info     ] FQE_20220420165607: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.0001645289271710867, 'time_algorithm_update': 0.0035786513822624482, 'loss': 0.02475494224681934, 'time_step': 0.0038137407187955924, 'init_value': -4.638121604919434, 'ave_value': -2.9707010339056183, 'soft_opc': nan} step=8134




2022-04-20 16:56.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165607/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.50 [info     ] FQE_20220420165607: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00016482479601021273, 'time_algorithm_update': 0.003489985523453678, 'loss': 0.025343546169075037, 'time_step': 0.0037267050111150168, 'init_value': -4.683067798614502, 'ave_value': -2.977597589904929, 'soft_opc': nan} step=8300




2022-04-20 16:56.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165607/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.43210892e-01 -3.25999953e-02
 -1.38000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.28049971e-01  6.00000000e-01  4.67532035e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.87189108e-01  2.46000047e-02
 -8.00013420e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.61927906e-02  2.08952959e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.53789108e-01  1.32000047e-02
  8.79998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -9.71586383e-02 -6.00000000e-01 -5.33093061e-02]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.39489108e-01 -4.75999953e-02
 -9.30001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  1.41298638e-01  5.43892365e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.9573146

2022-04-20 16:56.51 [info     ] Directory is created at d3rlpy_logs/FQE_20220420165651
2022-04-20 16:56.51 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 16:56.51 [debug    ] Building models...
2022-04-20 16:56.51 [debug    ] Models have been built.
2022-04-20 16:56.51 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420165651/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 16:56.52 [info     ] FQE_20220420165651: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00016329732052115507, 'time_algorithm_update': 0.0035145338191542516, 'loss': 0.028675796382865586, 'time_step': 0.003751373568246531, 'init_value': -1.3004467487335205, 'ave_value': -1.2870361209318444, 'soft_opc': nan} step=344




2022-04-20 16:56.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165651/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:56.53 [info     ] FQE_20220420165651: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.0001608313516128895, 'time_algorithm_update': 0.0034878607406172643, 'loss': 0.02558270814222132, 'time_step': 0.0037210008432698805, 'init_value': -2.0721263885498047, 'ave_value': -2.0097894197850077, 'soft_opc': nan} step=688




2022-04-20 16:56.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165651/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:56.55 [info     ] FQE_20220420165651: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.0001658610133237617, 'time_algorithm_update': 0.0035211832024330315, 'loss': 0.029100876526235667, 'time_step': 0.0037605464458465576, 'init_value': -3.086747169494629, 'ave_value': -2.9408242843843797, 'soft_opc': nan} step=1032




2022-04-20 16:56.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165651/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:56.56 [info     ] FQE_20220420165651: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.0001653730869293213, 'time_algorithm_update': 0.003513808860335239, 'loss': 0.032287243994624286, 'time_step': 0.0037549345992332283, 'init_value': -3.7697885036468506, 'ave_value': -3.5721764095748463, 'soft_opc': nan} step=1376




2022-04-20 16:56.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165651/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:56.58 [info     ] FQE_20220420165651: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.0001633347466934559, 'time_algorithm_update': 0.0035122078518534817, 'loss': 0.041393555473362986, 'time_step': 0.003746428461961968, 'init_value': -4.615216255187988, 'ave_value': -4.353516339847104, 'soft_opc': nan} step=1720




2022-04-20 16:56.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165651/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:56.59 [info     ] FQE_20220420165651: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.0001658610133237617, 'time_algorithm_update': 0.0035061586734860444, 'loss': 0.04867674858136059, 'time_step': 0.0037465899489646736, 'init_value': -5.326515197753906, 'ave_value': -5.050293381599057, 'soft_opc': nan} step=2064




2022-04-20 16:56.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165651/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:57.00 [info     ] FQE_20220420165651: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.0001669928084972293, 'time_algorithm_update': 0.003553310799044232, 'loss': 0.06036310989767053, 'time_step': 0.0037948308989059092, 'init_value': -6.132763862609863, 'ave_value': -5.877522407110879, 'soft_opc': nan} step=2408




2022-04-20 16:57.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165651/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:57.02 [info     ] FQE_20220420165651: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00016858896543813306, 'time_algorithm_update': 0.0035512080026227372, 'loss': 0.07456612133767543, 'time_step': 0.0037961359633955846, 'init_value': -6.753853797912598, 'ave_value': -6.538813808293298, 'soft_opc': nan} step=2752




2022-04-20 16:57.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165651/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:57.03 [info     ] FQE_20220420165651: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00016884679018064986, 'time_algorithm_update': 0.0035031597281611243, 'loss': 0.08812563635170632, 'time_step': 0.003747802141100861, 'init_value': -7.420302391052246, 'ave_value': -7.258168539586309, 'soft_opc': nan} step=3096




2022-04-20 16:57.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165651/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:57.05 [info     ] FQE_20220420165651: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.0001669837984927865, 'time_algorithm_update': 0.0035656052966450535, 'loss': 0.10149596863274657, 'time_step': 0.0038104979104773944, 'init_value': -8.032681465148926, 'ave_value': -8.03119261134353, 'soft_opc': nan} step=3440




2022-04-20 16:57.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165651/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:57.06 [info     ] FQE_20220420165651: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00016601834186287813, 'time_algorithm_update': 0.003583901843359304, 'loss': 0.11407898141201152, 'time_step': 0.003825476696324903, 'init_value': -8.444674491882324, 'ave_value': -8.565998807248107, 'soft_opc': nan} step=3784




2022-04-20 16:57.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165651/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:57.08 [info     ] FQE_20220420165651: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.0001685598561930102, 'time_algorithm_update': 0.003477487453194552, 'loss': 0.12834401444402024, 'time_step': 0.0037190803261690363, 'init_value': -8.959796905517578, 'ave_value': -9.192154907621024, 'soft_opc': nan} step=4128




2022-04-20 16:57.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165651/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:57.09 [info     ] FQE_20220420165651: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00017008947771649028, 'time_algorithm_update': 0.003546303094819535, 'loss': 0.13977539529049293, 'time_step': 0.0037924806739008704, 'init_value': -9.42049789428711, 'ave_value': -9.852774586759692, 'soft_opc': nan} step=4472




2022-04-20 16:57.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165651/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:57.10 [info     ] FQE_20220420165651: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00016545001850571742, 'time_algorithm_update': 0.003507975922074429, 'loss': 0.15297693491176984, 'time_step': 0.0037458227124325064, 'init_value': -9.929386138916016, 'ave_value': -10.580311543302326, 'soft_opc': nan} step=4816




2022-04-20 16:57.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165651/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:57.12 [info     ] FQE_20220420165651: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00016402435857196186, 'time_algorithm_update': 0.0034901042317235192, 'loss': 0.1638428054988211, 'time_step': 0.003728358551513317, 'init_value': -10.214599609375, 'ave_value': -11.078113062710182, 'soft_opc': nan} step=5160




2022-04-20 16:57.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165651/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:57.13 [info     ] FQE_20220420165651: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00016467446504637252, 'time_algorithm_update': 0.0030985245870989424, 'loss': 0.172106986091104, 'time_step': 0.003333858279294746, 'init_value': -10.927724838256836, 'ave_value': -12.043647163072684, 'soft_opc': nan} step=5504




2022-04-20 16:57.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165651/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:57.15 [info     ] FQE_20220420165651: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00016672250836394554, 'time_algorithm_update': 0.0034863290398619894, 'loss': 0.18461535019843384, 'time_step': 0.0037273612133292265, 'init_value': -11.231359481811523, 'ave_value': -12.60701661359095, 'soft_opc': nan} step=5848




2022-04-20 16:57.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165651/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:57.16 [info     ] FQE_20220420165651: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.0001688821371211562, 'time_algorithm_update': 0.003585223541703335, 'loss': 0.18967004164750145, 'time_step': 0.0038275510765785393, 'init_value': -11.44082260131836, 'ave_value': -13.084766134017604, 'soft_opc': nan} step=6192




2022-04-20 16:57.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165651/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:57.17 [info     ] FQE_20220420165651: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00016741004101065703, 'time_algorithm_update': 0.003569533658582111, 'loss': 0.2000177662527232, 'time_step': 0.003814652908680051, 'init_value': -11.92343521118164, 'ave_value': -13.959091431623207, 'soft_opc': nan} step=6536




2022-04-20 16:57.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165651/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:57.19 [info     ] FQE_20220420165651: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.0001669588477112526, 'time_algorithm_update': 0.0035660578760989877, 'loss': 0.2048700534532843, 'time_step': 0.003806746976320134, 'init_value': -12.129984855651855, 'ave_value': -14.477298461953032, 'soft_opc': nan} step=6880




2022-04-20 16:57.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165651/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:57.20 [info     ] FQE_20220420165651: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00016887174096218374, 'time_algorithm_update': 0.003562372784281886, 'loss': 0.21327043004160703, 'time_step': 0.0038057641927586043, 'init_value': -12.592256546020508, 'ave_value': -15.269762379003831, 'soft_opc': nan} step=7224




2022-04-20 16:57.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165651/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:57.22 [info     ] FQE_20220420165651: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00016810034596642784, 'time_algorithm_update': 0.003468567548796188, 'loss': 0.22071387103303922, 'time_step': 0.003710909638293954, 'init_value': -12.595126152038574, 'ave_value': -15.731023780978495, 'soft_opc': nan} step=7568




2022-04-20 16:57.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165651/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:57.23 [info     ] FQE_20220420165651: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00016800400822661644, 'time_algorithm_update': 0.00355976473453433, 'loss': 0.22496417027270033, 'time_step': 0.0038005016570867496, 'init_value': -12.908736228942871, 'ave_value': -16.346330802086616, 'soft_opc': nan} step=7912




2022-04-20 16:57.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165651/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:57.25 [info     ] FQE_20220420165651: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00016704548236935637, 'time_algorithm_update': 0.00351968061092288, 'loss': 0.22826929810608543, 'time_step': 0.0037638891574948334, 'init_value': -12.937438011169434, 'ave_value': -16.74100318543481, 'soft_opc': nan} step=8256




2022-04-20 16:57.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165651/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:57.26 [info     ] FQE_20220420165651: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00016548051390536997, 'time_algorithm_update': 0.0035157709620719734, 'loss': 0.22887144496577772, 'time_step': 0.0037537251794060996, 'init_value': -13.06294059753418, 'ave_value': -17.283907917737626, 'soft_opc': nan} step=8600




2022-04-20 16:57.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165651/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:57.27 [info     ] FQE_20220420165651: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00016587903333264729, 'time_algorithm_update': 0.0035494704579198083, 'loss': 0.22857111539735003, 'time_step': 0.0037905636221863504, 'init_value': -13.187789916992188, 'ave_value': -17.727838111196032, 'soft_opc': nan} step=8944




2022-04-20 16:57.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165651/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:57.29 [info     ] FQE_20220420165651: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00016568289246670034, 'time_algorithm_update': 0.0035528166349544084, 'loss': 0.23004658356698793, 'time_step': 0.0037958005139994066, 'init_value': -13.232596397399902, 'ave_value': -18.070067092994762, 'soft_opc': nan} step=9288




2022-04-20 16:57.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165651/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:57.30 [info     ] FQE_20220420165651: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00016573972480241642, 'time_algorithm_update': 0.003530740737915039, 'loss': 0.22595715293725735, 'time_step': 0.0037723010362580764, 'init_value': -13.016767501831055, 'ave_value': -18.27690972890516, 'soft_opc': nan} step=9632




2022-04-20 16:57.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165651/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:57.32 [info     ] FQE_20220420165651: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00016700320465620175, 'time_algorithm_update': 0.0034965775733770328, 'loss': 0.22087605055529885, 'time_step': 0.003738693026609199, 'init_value': -12.932807922363281, 'ave_value': -18.5912342342611, 'soft_opc': nan} step=9976




2022-04-20 16:57.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165651/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:57.33 [info     ] FQE_20220420165651: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.0001673171686571698, 'time_algorithm_update': 0.0035100579261779785, 'loss': 0.22890225499002045, 'time_step': 0.003752862298211386, 'init_value': -13.204339027404785, 'ave_value': -19.236578182627756, 'soft_opc': nan} step=10320




2022-04-20 16:57.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165651/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:57.35 [info     ] FQE_20220420165651: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.0001693305581115013, 'time_algorithm_update': 0.003539260043654331, 'loss': 0.230000096486968, 'time_step': 0.003785533960475478, 'init_value': -13.415515899658203, 'ave_value': -19.764792147777587, 'soft_opc': nan} step=10664




2022-04-20 16:57.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165651/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:57.36 [info     ] FQE_20220420165651: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00016688122305759165, 'time_algorithm_update': 0.003551510877387468, 'loss': 0.23066691576156678, 'time_step': 0.003793274940446366, 'init_value': -13.379557609558105, 'ave_value': -20.056997688681513, 'soft_opc': nan} step=11008




2022-04-20 16:57.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165651/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:57.37 [info     ] FQE_20220420165651: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.0001687781755314317, 'time_algorithm_update': 0.0035228389640187107, 'loss': 0.23350624781275212, 'time_step': 0.0037686256475226824, 'init_value': -13.497329711914062, 'ave_value': -20.565980179383008, 'soft_opc': nan} step=11352




2022-04-20 16:57.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165651/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:57.39 [info     ] FQE_20220420165651: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00016949273819147156, 'time_algorithm_update': 0.003554722597432691, 'loss': 0.235534040544312, 'time_step': 0.003799405901931053, 'init_value': -13.370946884155273, 'ave_value': -20.68111141880844, 'soft_opc': nan} step=11696




2022-04-20 16:57.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165651/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:57.40 [info     ] FQE_20220420165651: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00016730192095734352, 'time_algorithm_update': 0.003532201051712036, 'loss': 0.24265434684470122, 'time_step': 0.00377406214558801, 'init_value': -13.543286323547363, 'ave_value': -21.126541614396547, 'soft_opc': nan} step=12040




2022-04-20 16:57.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165651/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:57.42 [info     ] FQE_20220420165651: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.0001692023388175077, 'time_algorithm_update': 0.0036362451176310696, 'loss': 0.24476062901166462, 'time_step': 0.003882969534674356, 'init_value': -13.689369201660156, 'ave_value': -21.583529822482006, 'soft_opc': nan} step=12384




2022-04-20 16:57.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165651/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:57.43 [info     ] FQE_20220420165651: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00016559071319047794, 'time_algorithm_update': 0.0035623575365820595, 'loss': 0.25244308486689143, 'time_step': 0.003801763750786005, 'init_value': -13.839110374450684, 'ave_value': -21.98342357784707, 'soft_opc': nan} step=12728




2022-04-20 16:57.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165651/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:57.45 [info     ] FQE_20220420165651: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00016555120778638264, 'time_algorithm_update': 0.003524854432704837, 'loss': 0.26572692141391685, 'time_step': 0.0037652212519978367, 'init_value': -14.221964836120605, 'ave_value': -22.56176207829099, 'soft_opc': nan} step=13072




2022-04-20 16:57.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165651/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:57.46 [info     ] FQE_20220420165651: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00016678072685419128, 'time_algorithm_update': 0.0035296983497087346, 'loss': 0.27633825279663987, 'time_step': 0.003771106864130774, 'init_value': -14.137158393859863, 'ave_value': -22.749138137953448, 'soft_opc': nan} step=13416




2022-04-20 16:57.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165651/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:57.47 [info     ] FQE_20220420165651: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00016805113748062487, 'time_algorithm_update': 0.003606505172197209, 'loss': 0.2816414103821613, 'time_step': 0.003851288972898971, 'init_value': -14.412206649780273, 'ave_value': -23.256468646932156, 'soft_opc': nan} step=13760




2022-04-20 16:57.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165651/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:57.49 [info     ] FQE_20220420165651: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00016972076061160066, 'time_algorithm_update': 0.003598214581955311, 'loss': 0.2940598258255916, 'time_step': 0.0038449299889941548, 'init_value': -14.787396430969238, 'ave_value': -23.688689482024333, 'soft_opc': nan} step=14104




2022-04-20 16:57.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165651/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:57.50 [info     ] FQE_20220420165651: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.0001686402531557305, 'time_algorithm_update': 0.003519161496051522, 'loss': 0.3036848801123195, 'time_step': 0.0037640062875525897, 'init_value': -14.761173248291016, 'ave_value': -23.727095785072947, 'soft_opc': nan} step=14448




2022-04-20 16:57.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165651/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:57.52 [info     ] FQE_20220420165651: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00016389891158702762, 'time_algorithm_update': 0.0035276253556096276, 'loss': 0.31187352645319216, 'time_step': 0.003766402948734372, 'init_value': -14.906688690185547, 'ave_value': -23.940381945062192, 'soft_opc': nan} step=14792




2022-04-20 16:57.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165651/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:57.53 [info     ] FQE_20220420165651: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.0001662865627643674, 'time_algorithm_update': 0.0035355118818061297, 'loss': 0.3231410666336414, 'time_step': 0.003776146922000619, 'init_value': -15.207599639892578, 'ave_value': -24.385978751840607, 'soft_opc': nan} step=15136




2022-04-20 16:57.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165651/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:57.55 [info     ] FQE_20220420165651: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00016900897026062012, 'time_algorithm_update': 0.0036125453405602033, 'loss': 0.33996959809257193, 'time_step': 0.003855908332869064, 'init_value': -15.416500091552734, 'ave_value': -24.576839541798975, 'soft_opc': nan} step=15480




2022-04-20 16:57.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165651/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:57.56 [info     ] FQE_20220420165651: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00016695746155672296, 'time_algorithm_update': 0.0035234280796938165, 'loss': 0.34488845885958697, 'time_step': 0.003765497789826504, 'init_value': -15.68450927734375, 'ave_value': -24.86156689538783, 'soft_opc': nan} step=15824




2022-04-20 16:57.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165651/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:57.57 [info     ] FQE_20220420165651: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00016900065333344216, 'time_algorithm_update': 0.0034965796526088273, 'loss': 0.35330833689320484, 'time_step': 0.00374117840168088, 'init_value': -15.868715286254883, 'ave_value': -25.204564764478604, 'soft_opc': nan} step=16168




2022-04-20 16:57.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165651/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:57.59 [info     ] FQE_20220420165651: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00016506120216014774, 'time_algorithm_update': 0.0034766148689181304, 'loss': 0.35827467921677286, 'time_step': 0.003714041654453721, 'init_value': -15.981388092041016, 'ave_value': -25.489784664328436, 'soft_opc': nan} step=16512




2022-04-20 16:57.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165651/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:58.00 [info     ] FQE_20220420165651: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.0001659116079640943, 'time_algorithm_update': 0.003330756758534631, 'loss': 0.3875569801095353, 'time_step': 0.003570659216060195, 'init_value': -16.223237991333008, 'ave_value': -25.769957312048824, 'soft_opc': nan} step=16856




2022-04-20 16:58.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165651/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:58.02 [info     ] FQE_20220420165651: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00016972076061160066, 'time_algorithm_update': 0.003491537515507188, 'loss': 0.3936893583416159, 'time_step': 0.0037368307279986007, 'init_value': -16.387653350830078, 'ave_value': -25.827394286922374, 'soft_opc': nan} step=17200




2022-04-20 16:58.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165651/model_17200.pt
search iteration:  18
using hyper params:  [0.004568050772321046, 0.007584061030152684, 5.728405795573056e-05, 3]
2022-04-20 16:58.02 [debug    ] RoundIterator is selected.
2022-04-20 16:58.02 [info     ] Directory is created at d3rlpy_logs/TD3PlusBC_20220420165802
2022-04-20 16:58.02 [debug    ] Fitting scaler...              scaler=standard
2022-04-20 16:58.02 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 16:58.02 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 16:58.02 [info     ] Parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165802/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.00456805077232

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:58.05 [info     ] TD3PlusBC_20220420165802: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.00035035540485939785, 'time_algorithm_update': 0.006800118245576557, 'critic_loss': 2.556987896910188, 'actor_loss': 2.4194308986440736, 'time_step': 0.0072305948413603485, 'td_error': 0.8310081249332526, 'init_value': -4.498513698577881, 'ave_value': -2.5513542812164696} step=342
2022-04-20 16:58.05 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165802/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:58.08 [info     ] TD3PlusBC_20220420165802: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.0003535796327200549, 'time_algorithm_update': 0.006999455000224866, 'critic_loss': 1.3330703829987007, 'actor_loss': 2.3135605667069643, 'time_step': 0.007427633854380825, 'td_error': 0.864335941236528, 'init_value': -6.4222846031188965, 'ave_value': -3.6875779255685974} step=684
2022-04-20 16:58.08 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165802/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:58.10 [info     ] TD3PlusBC_20220420165802: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.000344270154049522, 'time_algorithm_update': 0.006703161356741922, 'critic_loss': 1.906381156360894, 'actor_loss': 2.30198147422389, 'time_step': 0.007121782553823371, 'td_error': 0.9340013110721785, 'init_value': -8.476223945617676, 'ave_value': -4.901921794776309} step=1026
2022-04-20 16:58.10 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165802/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:58.13 [info     ] TD3PlusBC_20220420165802: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.0003159596906070821, 'time_algorithm_update': 0.006240778499179416, 'critic_loss': 2.6370226517755384, 'actor_loss': 2.2993398563206546, 'time_step': 0.006623944343879209, 'td_error': 1.0296020649648578, 'init_value': -10.516288757324219, 'ave_value': -5.912437509157745} step=1368
2022-04-20 16:58.13 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165802/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:58.16 [info     ] TD3PlusBC_20220420165802: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.00030774261519225717, 'time_algorithm_update': 0.0061111234084904546, 'critic_loss': 3.4909656541040768, 'actor_loss': 2.2945090790241087, 'time_step': 0.006483839966400325, 'td_error': 1.1356077036484427, 'init_value': -12.509763717651367, 'ave_value': -7.1324792983325045} step=1710
2022-04-20 16:58.16 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165802/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:58.19 [info     ] TD3PlusBC_20220420165802: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.0003476421735440081, 'time_algorithm_update': 0.006793250814515945, 'critic_loss': 4.40575825610356, 'actor_loss': 2.294893295444243, 'time_step': 0.007217306839792352, 'td_error': 1.2706597954456247, 'init_value': -15.092676162719727, 'ave_value': -8.47241648869484} step=2052
2022-04-20 16:58.19 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165802/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:58.22 [info     ] TD3PlusBC_20220420165802: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.0003502034304434793, 'time_algorithm_update': 0.006859187494244492, 'critic_loss': 5.258849766519335, 'actor_loss': 2.291166062940631, 'time_step': 0.007282759013928865, 'td_error': 1.4316276294696633, 'init_value': -17.0541934967041, 'ave_value': -9.699452352467336} step=2394
2022-04-20 16:58.22 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165802/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:58.24 [info     ] TD3PlusBC_20220420165802: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.00035400627649318406, 'time_algorithm_update': 0.0069177311066298455, 'critic_loss': 6.29900620625033, 'actor_loss': 2.2932354776482833, 'time_step': 0.007348358282568859, 'td_error': 1.6007239192553648, 'init_value': -19.105579376220703, 'ave_value': -10.819796556985795} step=2736
2022-04-20 16:58.24 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165802/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:58.27 [info     ] TD3PlusBC_20220420165802: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.00035316135451110483, 'time_algorithm_update': 0.0069857675429673225, 'critic_loss': 7.435058278647083, 'actor_loss': 2.2961766413080764, 'time_step': 0.007419395167925205, 'td_error': 1.7960669735694335, 'init_value': -20.848705291748047, 'ave_value': -11.8674769544609} step=3078
2022-04-20 16:58.27 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165802/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:58.31 [info     ] TD3PlusBC_20220420165802: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.00035725420678568167, 'time_algorithm_update': 0.008534680333053856, 'critic_loss': 8.869070852011966, 'actor_loss': 2.295373052184345, 'time_step': 0.008973304291217649, 'td_error': 1.9190117093292418, 'init_value': -23.31853675842285, 'ave_value': -13.090894702332438} step=3420
2022-04-20 16:58.31 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165802/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:58.35 [info     ] TD3PlusBC_20220420165802: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.0003578105168035853, 'time_algorithm_update': 0.008999438313712851, 'critic_loss': 10.499346993820012, 'actor_loss': 2.292428331765515, 'time_step': 0.009431119550738418, 'td_error': 2.10473914097146, 'init_value': -25.111513137817383, 'ave_value': -14.270998543627888} step=3762
2022-04-20 16:58.35 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165802/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:58.38 [info     ] TD3PlusBC_20220420165802: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.00035319272537677606, 'time_algorithm_update': 0.008506903174327828, 'critic_loss': 11.95569682400129, 'actor_loss': 2.300244919737877, 'time_step': 0.00893129173078035, 'td_error': 2.330936006941449, 'init_value': -27.08270263671875, 'ave_value': -15.371892587805025} step=4104
2022-04-20 16:58.38 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165802/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:58.42 [info     ] TD3PlusBC_20220420165802: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.00035732252555981017, 'time_algorithm_update': 0.008942340549669768, 'critic_loss': 13.641127279627392, 'actor_loss': 2.2961904082381936, 'time_step': 0.009371399182325218, 'td_error': 2.5511991761774113, 'init_value': -29.097997665405273, 'ave_value': -16.36833127566542} step=4446
2022-04-20 16:58.42 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165802/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:58.46 [info     ] TD3PlusBC_20220420165802: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.00035917828654685215, 'time_algorithm_update': 0.009076960602698967, 'critic_loss': 15.518184516862123, 'actor_loss': 2.295805823733235, 'time_step': 0.0095126796187016, 'td_error': 2.595102124322533, 'init_value': -30.898473739624023, 'ave_value': -17.41334790338913} step=4788
2022-04-20 16:58.46 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165802/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:58.49 [info     ] TD3PlusBC_20220420165802: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.0003573838730304562, 'time_algorithm_update': 0.008493648635016548, 'critic_loss': 17.293837923752633, 'actor_loss': 2.2964036394978127, 'time_step': 0.008928756267703765, 'td_error': 2.826068382649642, 'init_value': -32.69045639038086, 'ave_value': -18.35880058273782} step=5130
2022-04-20 16:58.50 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165802/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:58.53 [info     ] TD3PlusBC_20220420165802: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.0003567892208433988, 'time_algorithm_update': 0.008965512465315255, 'critic_loss': 19.724277203543146, 'actor_loss': 2.300982257776093, 'time_step': 0.009396205171507004, 'td_error': 3.114942314632431, 'init_value': -35.23564910888672, 'ave_value': -19.59335634628101} step=5472
2022-04-20 16:58.53 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165802/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:58.57 [info     ] TD3PlusBC_20220420165802: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.0003603585282264397, 'time_algorithm_update': 0.008633380744889466, 'critic_loss': 22.041234312001723, 'actor_loss': 2.297611688312731, 'time_step': 0.00906770062028316, 'td_error': 3.276591817739752, 'init_value': -36.700706481933594, 'ave_value': -20.44064796345229} step=5814
2022-04-20 16:58.57 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165802/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:59.01 [info     ] TD3PlusBC_20220420165802: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.00036082699982046385, 'time_algorithm_update': 0.009076807234022353, 'critic_loss': 24.372110040564287, 'actor_loss': 2.3000291648663973, 'time_step': 0.00951325335697821, 'td_error': 3.5553697711498375, 'init_value': -38.2161750793457, 'ave_value': -21.385203894149463} step=6156
2022-04-20 16:59.01 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165802/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:59.04 [info     ] TD3PlusBC_20220420165802: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.0003206109442906073, 'time_algorithm_update': 0.00846478743859899, 'critic_loss': 26.855906009674072, 'actor_loss': 2.300391590386106, 'time_step': 0.008857450290032995, 'td_error': 3.761744774496645, 'init_value': -39.78782272338867, 'ave_value': -22.354296196011223} step=6498
2022-04-20 16:59.04 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165802/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:59.08 [info     ] TD3PlusBC_20220420165802: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.0003334960045173154, 'time_algorithm_update': 0.008195140208417212, 'critic_loss': 29.501805542505277, 'actor_loss': 2.299798594580756, 'time_step': 0.008597434612742642, 'td_error': 4.154553298639946, 'init_value': -42.45106887817383, 'ave_value': -23.47005460394882} step=6840
2022-04-20 16:59.08 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165802/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:59.11 [info     ] TD3PlusBC_20220420165802: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.0003602888151916147, 'time_algorithm_update': 0.009020729371678759, 'critic_loss': 31.93688600105152, 'actor_loss': 2.297516223282842, 'time_step': 0.009457957674885354, 'td_error': 4.458007837735035, 'init_value': -43.79163360595703, 'ave_value': -24.2331460538384} step=7182
2022-04-20 16:59.11 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165802/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:59.15 [info     ] TD3PlusBC_20220420165802: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.00035942855634187395, 'time_algorithm_update': 0.00862078917653937, 'critic_loss': 34.76412923712479, 'actor_loss': 2.2972097508391442, 'time_step': 0.00905914752804048, 'td_error': 4.454266735720288, 'init_value': -44.39457321166992, 'ave_value': -24.953846943560436} step=7524
2022-04-20 16:59.15 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165802/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:59.19 [info     ] TD3PlusBC_20220420165802: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.0003603975675259417, 'time_algorithm_update': 0.00885042879316542, 'critic_loss': 37.68966169524611, 'actor_loss': 2.3013378207446538, 'time_step': 0.009281621341816863, 'td_error': 4.669741149536884, 'init_value': -45.40932083129883, 'ave_value': -25.6604292838283} step=7866
2022-04-20 16:59.19 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165802/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:59.23 [info     ] TD3PlusBC_20220420165802: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.00036155062112194753, 'time_algorithm_update': 0.008973318930954962, 'critic_loss': 40.50775603244179, 'actor_loss': 2.2981690286892897, 'time_step': 0.009412278905946609, 'td_error': 4.731615667924611, 'init_value': -46.363441467285156, 'ave_value': -26.491516030891255} step=8208
2022-04-20 16:59.23 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165802/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:59.26 [info     ] TD3PlusBC_20220420165802: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.0003553154872871979, 'time_algorithm_update': 0.008600895167791356, 'critic_loss': 43.39740318164491, 'actor_loss': 2.2961611775626913, 'time_step': 0.009030695547137344, 'td_error': 5.007474607711439, 'init_value': -48.01676559448242, 'ave_value': -27.31965815087747} step=8550
2022-04-20 16:59.26 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165802/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:59.30 [info     ] TD3PlusBC_20220420165802: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.0003552004607797366, 'time_algorithm_update': 0.008914201580292998, 'critic_loss': 46.27154879542122, 'actor_loss': 2.302137289828027, 'time_step': 0.009341735588876824, 'td_error': 5.233004059309359, 'init_value': -49.24708938598633, 'ave_value': -27.970181366924347} step=8892
2022-04-20 16:59.30 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165802/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:59.33 [info     ] TD3PlusBC_20220420165802: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.00036029648362544543, 'time_algorithm_update': 0.008827001727812471, 'critic_loss': 49.33768064376206, 'actor_loss': 2.2997827334710728, 'time_step': 0.00926737548315037, 'td_error': 5.119815310415826, 'init_value': -49.06520462036133, 'ave_value': -28.464376840762508} step=9234
2022-04-20 16:59.33 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165802/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:59.37 [info     ] TD3PlusBC_20220420165802: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.00036023931893688893, 'time_algorithm_update': 0.008932354157431084, 'critic_loss': 52.39286037355836, 'actor_loss': 2.299350795690079, 'time_step': 0.009363254608466612, 'td_error': 5.544894196241923, 'init_value': -51.51903533935547, 'ave_value': -29.529753085138562} step=9576
2022-04-20 16:59.37 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165802/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:59.41 [info     ] TD3PlusBC_20220420165802: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.0003583758895160162, 'time_algorithm_update': 0.008939416087858858, 'critic_loss': 54.90636253914638, 'actor_loss': 2.304025860557779, 'time_step': 0.009374853463200798, 'td_error': 5.584151651443897, 'init_value': -51.164222717285156, 'ave_value': -30.009919648661807} step=9918
2022-04-20 16:59.41 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165802/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:59.45 [info     ] TD3PlusBC_20220420165802: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.0003619417112473159, 'time_algorithm_update': 0.008587122660631325, 'critic_loss': 57.94599182842768, 'actor_loss': 2.3015166263134157, 'time_step': 0.00902527954146179, 'td_error': 5.959349890277202, 'init_value': -53.541481018066406, 'ave_value': -30.924663648104364} step=10260
2022-04-20 16:59.45 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165802/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:59.48 [info     ] TD3PlusBC_20220420165802: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.0003617005041468213, 'time_algorithm_update': 0.008984749080144872, 'critic_loss': 61.05641566382514, 'actor_loss': 2.2971812019571227, 'time_step': 0.00942556481612356, 'td_error': 5.970980937851971, 'init_value': -53.93292236328125, 'ave_value': -31.464028725498398} step=10602
2022-04-20 16:59.48 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165802/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:59.52 [info     ] TD3PlusBC_20220420165802: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.0003555706369946575, 'time_algorithm_update': 0.008886458580954033, 'critic_loss': 63.549286044829074, 'actor_loss': 2.2992723322751227, 'time_step': 0.009320880237378572, 'td_error': 6.018325644982247, 'init_value': -55.07365798950195, 'ave_value': -32.280917970524634} step=10944
2022-04-20 16:59.52 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165802/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:59.56 [info     ] TD3PlusBC_20220420165802: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.0003566072698225055, 'time_algorithm_update': 0.008816475059553894, 'critic_loss': 66.33894619188811, 'actor_loss': 2.300527654893217, 'time_step': 0.009247170554267036, 'td_error': 6.234682116213073, 'init_value': -55.65421676635742, 'ave_value': -32.64118997897483} step=11286
2022-04-20 16:59.56 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165802/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:59.59 [info     ] TD3PlusBC_20220420165802: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.0003533251801429436, 'time_algorithm_update': 0.008958550224527281, 'critic_loss': 68.8479311647471, 'actor_loss': 2.300128429256685, 'time_step': 0.009386659365648415, 'td_error': 6.170043214556563, 'init_value': -55.5517578125, 'ave_value': -33.2335128591255} step=11628
2022-04-20 16:59.59 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165802/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:00.03 [info     ] TD3PlusBC_20220420165802: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.00035602865163345785, 'time_algorithm_update': 0.008406922831172831, 'critic_loss': 71.5774613096003, 'actor_loss': 2.29840628445497, 'time_step': 0.00883849671012477, 'td_error': 6.579999282774682, 'init_value': -57.17665481567383, 'ave_value': -33.960829726034724} step=11970
2022-04-20 17:00.03 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165802/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:00.07 [info     ] TD3PlusBC_20220420165802: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.0003696226934243364, 'time_algorithm_update': 0.009137477791100218, 'critic_loss': 74.13285572208159, 'actor_loss': 2.2972247670268455, 'time_step': 0.009586755992376317, 'td_error': 6.817502783074222, 'init_value': -57.6850471496582, 'ave_value': -34.53340302060912} step=12312
2022-04-20 17:00.07 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165802/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:00.11 [info     ] TD3PlusBC_20220420165802: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.0003563270234225089, 'time_algorithm_update': 0.009019961831165336, 'critic_loss': 76.39810891179313, 'actor_loss': 2.3005790863817897, 'time_step': 0.009453254833556059, 'td_error': 6.557337316969742, 'init_value': -57.80192184448242, 'ave_value': -34.84463766306052} step=12654
2022-04-20 17:00.11 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165802/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:00.14 [info     ] TD3PlusBC_20220420165802: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.0003516827410424662, 'time_algorithm_update': 0.008551777454844693, 'critic_loss': 78.59330557661447, 'actor_loss': 2.2985844138072946, 'time_step': 0.008979608440956873, 'td_error': 6.723580221563323, 'init_value': -59.09044647216797, 'ave_value': -35.51951411607734} step=12996
2022-04-20 17:00.14 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165802/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:00.18 [info     ] TD3PlusBC_20220420165802: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.00035912530464038514, 'time_algorithm_update': 0.009019561678345441, 'critic_loss': 80.624745039912, 'actor_loss': 2.298323118198685, 'time_step': 0.009453428419012772, 'td_error': 7.0738573764230495, 'init_value': -59.789451599121094, 'ave_value': -35.99921850806857} step=13338
2022-04-20 17:00.18 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165802/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:00.22 [info     ] TD3PlusBC_20220420165802: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.0003617520917925918, 'time_algorithm_update': 0.008525947381181327, 'critic_loss': 82.8460693359375, 'actor_loss': 2.2978149199346354, 'time_step': 0.008967951027273435, 'td_error': 6.853864031809234, 'init_value': -59.434608459472656, 'ave_value': -36.58455838804419} step=13680
2022-04-20 17:00.22 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165802/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:00.25 [info     ] TD3PlusBC_20220420165802: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.00035556993986430923, 'time_algorithm_update': 0.008930552075480857, 'critic_loss': 84.58824959693597, 'actor_loss': 2.3018219373379534, 'time_step': 0.009360766550253707, 'td_error': 7.152005865608285, 'init_value': -61.71461868286133, 'ave_value': -37.04781816633678} step=14022
2022-04-20 17:00.25 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165802/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:00.29 [info     ] TD3PlusBC_20220420165802: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.0003566323665150425, 'time_algorithm_update': 0.009054429349843521, 'critic_loss': 86.55178355612951, 'actor_loss': 2.2983215078275805, 'time_step': 0.00949266779492473, 'td_error': 7.175268216897512, 'init_value': -61.35187911987305, 'ave_value': -37.51438889514571} step=14364
2022-04-20 17:00.29 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165802/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:00.33 [info     ] TD3PlusBC_20220420165802: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.00035213239011708757, 'time_algorithm_update': 0.008428892894097937, 'critic_loss': 88.58269683659425, 'actor_loss': 2.2971372367345797, 'time_step': 0.008861863822267767, 'td_error': 7.155781185500176, 'init_value': -61.3717041015625, 'ave_value': -37.843392319390134} step=14706
2022-04-20 17:00.33 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165802/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:00.36 [info     ] TD3PlusBC_20220420165802: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.00035770315872995476, 'time_algorithm_update': 0.008890176377101251, 'critic_loss': 90.60216205440767, 'actor_loss': 2.298202746095713, 'time_step': 0.0093259490721407, 'td_error': 7.3781015303537165, 'init_value': -62.95146560668945, 'ave_value': -38.36589735316297} step=15048
2022-04-20 17:00.36 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165802/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:00.40 [info     ] TD3PlusBC_20220420165802: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.0003583981976871602, 'time_algorithm_update': 0.008590876010426303, 'critic_loss': 92.11059315999348, 'actor_loss': 2.3019761830045464, 'time_step': 0.009026399829931427, 'td_error': 7.273613966887403, 'init_value': -61.85075759887695, 'ave_value': -38.53733706763803} step=15390
2022-04-20 17:00.40 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165802/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:00.44 [info     ] TD3PlusBC_20220420165802: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.00035956589102047925, 'time_algorithm_update': 0.008950454449793052, 'critic_loss': 93.77223370646873, 'actor_loss': 2.301264941343787, 'time_step': 0.009387990187483224, 'td_error': 7.634590252872563, 'init_value': -62.9677848815918, 'ave_value': -39.26111646669825} step=15732
2022-04-20 17:00.44 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165802/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:00.47 [info     ] TD3PlusBC_20220420165802: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.0003558697059140568, 'time_algorithm_update': 0.008875299615469592, 'critic_loss': 95.42645689757944, 'actor_loss': 2.3009063723491647, 'time_step': 0.009307507185908089, 'td_error': 7.633743286719964, 'init_value': -64.24372100830078, 'ave_value': -39.69384721085194} step=16074
2022-04-20 17:00.47 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165802/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:00.51 [info     ] TD3PlusBC_20220420165802: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.0003519037313628615, 'time_algorithm_update': 0.00846343221720199, 'critic_loss': 97.04646589044938, 'actor_loss': 2.3018778597402294, 'time_step': 0.008891194187409697, 'td_error': 8.014767712209425, 'init_value': -65.23680877685547, 'ave_value': -40.26651992540219} step=16416
2022-04-20 17:00.51 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165802/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:00.55 [info     ] TD3PlusBC_20220420165802: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.0003593051642702337, 'time_algorithm_update': 0.009105434194642898, 'critic_loss': 98.56511969315379, 'actor_loss': 2.300649536980523, 'time_step': 0.009543570161562914, 'td_error': 8.211052208481007, 'init_value': -65.76835632324219, 'ave_value': -40.634185437720255} step=16758
2022-04-20 17:00.55 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165802/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:00.58 [info     ] TD3PlusBC_20220420165802: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.0003554200568394354, 'time_algorithm_update': 0.008618361768666764, 'critic_loss': 100.04187741195946, 'actor_loss': 2.2983754991787917, 'time_step': 0.009048710789596825, 'td_error': 8.220845322784898, 'init_value': -65.3086166381836, 'ave_value': -40.99552967305387} step=17100
2022-04-20 17:00.58 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420165802/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 102 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.28189108e-01  9

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 17:01.00 [info     ] FQE_20220420170059: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.0001674603266888354, 'time_algorithm_update': 0.005064363939216338, 'loss': 0.005070656062530197, 'time_step': 0.005310330046228616, 'init_value': -0.5222164988517761, 'ave_value': -0.4739121270139475, 'soft_opc': nan} step=166




2022-04-20 17:01.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170059/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:01.01 [info     ] FQE_20220420170059: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00016388404800231197, 'time_algorithm_update': 0.005040441650942147, 'loss': 0.0037109925655692995, 'time_step': 0.005275605672813323, 'init_value': -0.6590681076049805, 'ave_value': -0.5437643432469519, 'soft_opc': nan} step=332




2022-04-20 17:01.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170059/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:01.01 [info     ] FQE_20220420170059: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.000166292650153838, 'time_algorithm_update': 0.00502159652939762, 'loss': 0.00342318614950426, 'time_step': 0.005261669675987887, 'init_value': -0.7338640689849854, 'ave_value': -0.5823267410870071, 'soft_opc': nan} step=498




2022-04-20 17:01.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170059/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:01.02 [info     ] FQE_20220420170059: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00016502443566379776, 'time_algorithm_update': 0.005065922277519502, 'loss': 0.0034688566131124564, 'time_step': 0.005301657929477921, 'init_value': -0.8276270031929016, 'ave_value': -0.6136261069861887, 'soft_opc': nan} step=664




2022-04-20 17:01.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170059/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:01.03 [info     ] FQE_20220420170059: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.0001619206853659756, 'time_algorithm_update': 0.00501723030963576, 'loss': 0.0033660812165969647, 'time_step': 0.0052525595010045065, 'init_value': -0.9129549264907837, 'ave_value': -0.6651522627821914, 'soft_opc': nan} step=830




2022-04-20 17:01.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170059/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:01.04 [info     ] FQE_20220420170059: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.0001642359308449619, 'time_algorithm_update': 0.00504406245358019, 'loss': 0.003322711980239634, 'time_step': 0.005283059844051499, 'init_value': -0.9170459508895874, 'ave_value': -0.6342443985713495, 'soft_opc': nan} step=996




2022-04-20 17:01.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170059/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:01.05 [info     ] FQE_20220420170059: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.0001605806580509048, 'time_algorithm_update': 0.005010537354342909, 'loss': 0.003342617873287569, 'time_step': 0.005246543022523443, 'init_value': -0.9979066252708435, 'ave_value': -0.680295640236891, 'soft_opc': nan} step=1162




2022-04-20 17:01.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170059/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:01.06 [info     ] FQE_20220420170059: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00017212097903332078, 'time_algorithm_update': 0.005045106612056135, 'loss': 0.0033642945057959354, 'time_step': 0.005291986178202802, 'init_value': -1.0543103218078613, 'ave_value': -0.699243533879787, 'soft_opc': nan} step=1328




2022-04-20 17:01.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170059/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:01.07 [info     ] FQE_20220420170059: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00016096844730606997, 'time_algorithm_update': 0.004347274102360369, 'loss': 0.00325276212505884, 'time_step': 0.004580589662115258, 'init_value': -1.091235637664795, 'ave_value': -0.706421305850014, 'soft_opc': nan} step=1494




2022-04-20 17:01.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170059/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:01.08 [info     ] FQE_20220420170059: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00016110776418662933, 'time_algorithm_update': 0.004994508731796081, 'loss': 0.0032914454574663058, 'time_step': 0.005231946347707726, 'init_value': -1.1680939197540283, 'ave_value': -0.756376606185694, 'soft_opc': nan} step=1660




2022-04-20 17:01.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170059/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:01.09 [info     ] FQE_20220420170059: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.0001669260392706078, 'time_algorithm_update': 0.005056338137890919, 'loss': 0.003268071238501334, 'time_step': 0.005296062274151538, 'init_value': -1.223797082901001, 'ave_value': -0.7704587975533695, 'soft_opc': nan} step=1826




2022-04-20 17:01.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170059/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:01.10 [info     ] FQE_20220420170059: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00016291888363390085, 'time_algorithm_update': 0.0049388164497283565, 'loss': 0.0032397171213145717, 'time_step': 0.00517871580928205, 'init_value': -1.2760565280914307, 'ave_value': -0.8119214760156365, 'soft_opc': nan} step=1992




2022-04-20 17:01.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170059/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:01.11 [info     ] FQE_20220420170059: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.0001663414828748588, 'time_algorithm_update': 0.005061724099768214, 'loss': 0.003339036235989756, 'time_step': 0.005306259695305882, 'init_value': -1.3178820610046387, 'ave_value': -0.8195979511751248, 'soft_opc': nan} step=2158




2022-04-20 17:01.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170059/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:01.12 [info     ] FQE_20220420170059: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00017085276454328056, 'time_algorithm_update': 0.005048638366791139, 'loss': 0.0033502554849171675, 'time_step': 0.005295678793665874, 'init_value': -1.432273507118225, 'ave_value': -0.8943089897299672, 'soft_opc': nan} step=2324




2022-04-20 17:01.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170059/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:01.13 [info     ] FQE_20220420170059: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00016535333840243788, 'time_algorithm_update': 0.0051292051751929595, 'loss': 0.003377792225583984, 'time_step': 0.005367185696061835, 'init_value': -1.4983439445495605, 'ave_value': -0.9349081557344746, 'soft_opc': nan} step=2490




2022-04-20 17:01.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170059/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:01.14 [info     ] FQE_20220420170059: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00016256412827824973, 'time_algorithm_update': 0.00504418022661324, 'loss': 0.003552266862944711, 'time_step': 0.0052855675479015676, 'init_value': -1.5636651515960693, 'ave_value': -0.9690550314682024, 'soft_opc': nan} step=2656




2022-04-20 17:01.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170059/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:01.15 [info     ] FQE_20220420170059: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00016921255961958184, 'time_algorithm_update': 0.005062943481537233, 'loss': 0.003551875147126974, 'time_step': 0.005300470145351915, 'init_value': -1.6234865188598633, 'ave_value': -0.9982942524189885, 'soft_opc': nan} step=2822




2022-04-20 17:01.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170059/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:01.16 [info     ] FQE_20220420170059: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.0001617109919168863, 'time_algorithm_update': 0.004152331007532327, 'loss': 0.0035907786577261134, 'time_step': 0.0043838440653789475, 'init_value': -1.6932921409606934, 'ave_value': -1.0609091996341138, 'soft_opc': nan} step=2988




2022-04-20 17:01.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170059/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:01.17 [info     ] FQE_20220420170059: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.0001639587333403438, 'time_algorithm_update': 0.005019103188112557, 'loss': 0.0038604942979347185, 'time_step': 0.00525529126086867, 'init_value': -1.8325819969177246, 'ave_value': -1.1376118696286335, 'soft_opc': nan} step=3154




2022-04-20 17:01.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170059/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:01.18 [info     ] FQE_20220420170059: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00016629695892333984, 'time_algorithm_update': 0.0050900054265217605, 'loss': 0.0040380890843744605, 'time_step': 0.005331052355019443, 'init_value': -1.9176619052886963, 'ave_value': -1.1939613500559652, 'soft_opc': nan} step=3320




2022-04-20 17:01.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170059/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:01.19 [info     ] FQE_20220420170059: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00016436663018651754, 'time_algorithm_update': 0.005139162741511701, 'loss': 0.004291018182888672, 'time_step': 0.00538355902016881, 'init_value': -2.0060582160949707, 'ave_value': -1.2578337029897952, 'soft_opc': nan} step=3486




2022-04-20 17:01.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170059/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:01.20 [info     ] FQE_20220420170059: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.0001657468726836055, 'time_algorithm_update': 0.005076230290424393, 'loss': 0.004459166910544217, 'time_step': 0.005316009004432035, 'init_value': -2.0727596282958984, 'ave_value': -1.289321455876301, 'soft_opc': nan} step=3652




2022-04-20 17:01.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170059/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:01.20 [info     ] FQE_20220420170059: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00016837809459272637, 'time_algorithm_update': 0.005092817616749959, 'loss': 0.004650175995726408, 'time_step': 0.005334921630032091, 'init_value': -2.2017335891723633, 'ave_value': -1.3546972827868418, 'soft_opc': nan} step=3818




2022-04-20 17:01.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170059/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:01.21 [info     ] FQE_20220420170059: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00016517093382686018, 'time_algorithm_update': 0.005074094577007983, 'loss': 0.004907381067232016, 'time_step': 0.005316576325749776, 'init_value': -2.271921157836914, 'ave_value': -1.4133614824765974, 'soft_opc': nan} step=3984




2022-04-20 17:01.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170059/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:01.22 [info     ] FQE_20220420170059: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.0001644815307065665, 'time_algorithm_update': 0.005002330584698413, 'loss': 0.0052462830036424145, 'time_step': 0.005240719002413462, 'init_value': -2.395353317260742, 'ave_value': -1.5147222619671543, 'soft_opc': nan} step=4150




2022-04-20 17:01.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170059/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:01.23 [info     ] FQE_20220420170059: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00016519391393086998, 'time_algorithm_update': 0.005029991448643696, 'loss': 0.005316788577530758, 'time_step': 0.005269016127988517, 'init_value': -2.497687339782715, 'ave_value': -1.5528146273985102, 'soft_opc': nan} step=4316




2022-04-20 17:01.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170059/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:01.24 [info     ] FQE_20220420170059: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00016451456460608057, 'time_algorithm_update': 0.004323427935680711, 'loss': 0.0056580974827716915, 'time_step': 0.004561842206012772, 'init_value': -2.5804924964904785, 'ave_value': -1.6219428514105243, 'soft_opc': nan} step=4482




2022-04-20 17:01.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170059/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:01.25 [info     ] FQE_20220420170059: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00016625961625432395, 'time_algorithm_update': 0.005035548325044563, 'loss': 0.006025879538312262, 'time_step': 0.005278855921274208, 'init_value': -2.699249744415283, 'ave_value': -1.7367299527735323, 'soft_opc': nan} step=4648




2022-04-20 17:01.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170059/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:01.26 [info     ] FQE_20220420170059: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00016501007309879166, 'time_algorithm_update': 0.005129440721259059, 'loss': 0.006396414249916617, 'time_step': 0.005369940436030009, 'init_value': -2.797311305999756, 'ave_value': -1.781657304433552, 'soft_opc': nan} step=4814




2022-04-20 17:01.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170059/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:01.27 [info     ] FQE_20220420170059: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00016576410776161286, 'time_algorithm_update': 0.005095905568226275, 'loss': 0.006797602142106338, 'time_step': 0.0053353137280567585, 'init_value': -2.8522286415100098, 'ave_value': -1.8595046363368228, 'soft_opc': nan} step=4980




2022-04-20 17:01.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170059/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:01.28 [info     ] FQE_20220420170059: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00016497416668627635, 'time_algorithm_update': 0.005059138838067113, 'loss': 0.007101664344179837, 'time_step': 0.005295568201915327, 'init_value': -2.9643077850341797, 'ave_value': -1.9041612957109202, 'soft_opc': nan} step=5146




2022-04-20 17:01.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170059/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:01.29 [info     ] FQE_20220420170059: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.0001692685736231057, 'time_algorithm_update': 0.005096691200532109, 'loss': 0.007157789925127347, 'time_step': 0.005343266280300646, 'init_value': -3.0698702335357666, 'ave_value': -2.0277693527910086, 'soft_opc': nan} step=5312




2022-04-20 17:01.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170059/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:01.30 [info     ] FQE_20220420170059: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00016718312918421733, 'time_algorithm_update': 0.005017330847590803, 'loss': 0.007629492972326656, 'time_step': 0.005259291235222874, 'init_value': -3.1472301483154297, 'ave_value': -2.062792848876199, 'soft_opc': nan} step=5478




2022-04-20 17:01.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170059/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:01.31 [info     ] FQE_20220420170059: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00016667325812650015, 'time_algorithm_update': 0.005076213055346386, 'loss': 0.007957921161089116, 'time_step': 0.00531773394848927, 'init_value': -3.211609125137329, 'ave_value': -2.1046993475440923, 'soft_opc': nan} step=5644




2022-04-20 17:01.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170059/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:01.32 [info     ] FQE_20220420170059: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00016531455947692134, 'time_algorithm_update': 0.0050797677901853995, 'loss': 0.00840135865440839, 'time_step': 0.005321525665650885, 'init_value': -3.3771777153015137, 'ave_value': -2.2146559398722006, 'soft_opc': nan} step=5810




2022-04-20 17:01.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170059/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:01.33 [info     ] FQE_20220420170059: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00016099286366658038, 'time_algorithm_update': 0.004867491951907973, 'loss': 0.008914121639836266, 'time_step': 0.005102013967123376, 'init_value': -3.3603427410125732, 'ave_value': -2.1587805628105325, 'soft_opc': nan} step=5976




2022-04-20 17:01.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170059/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:01.34 [info     ] FQE_20220420170059: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00016389122928481503, 'time_algorithm_update': 0.0044895252549504655, 'loss': 0.009374375909521997, 'time_step': 0.0047273377338087706, 'init_value': -3.5184106826782227, 'ave_value': -2.3014487323057544, 'soft_opc': nan} step=6142




2022-04-20 17:01.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170059/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:01.35 [info     ] FQE_20220420170059: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00016584453812564713, 'time_algorithm_update': 0.005217723099582167, 'loss': 0.009413595784017929, 'time_step': 0.005456756396465991, 'init_value': -3.5323331356048584, 'ave_value': -2.3035876965133455, 'soft_opc': nan} step=6308




2022-04-20 17:01.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170059/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:01.36 [info     ] FQE_20220420170059: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.0001690359000700066, 'time_algorithm_update': 0.0050085337765245555, 'loss': 0.009661496654770011, 'time_step': 0.005254389291786286, 'init_value': -3.596740245819092, 'ave_value': -2.3397138221910945, 'soft_opc': nan} step=6474




2022-04-20 17:01.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170059/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:01.37 [info     ] FQE_20220420170059: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00016881328031241176, 'time_algorithm_update': 0.005044833723321019, 'loss': 0.009923641971755683, 'time_step': 0.005289533052099757, 'init_value': -3.723329782485962, 'ave_value': -2.417825279975528, 'soft_opc': nan} step=6640




2022-04-20 17:01.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170059/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:01.38 [info     ] FQE_20220420170059: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00016774470547595657, 'time_algorithm_update': 0.005097930689892137, 'loss': 0.010432655882393291, 'time_step': 0.0053408346980451105, 'init_value': -3.8470020294189453, 'ave_value': -2.491738397050817, 'soft_opc': nan} step=6806




2022-04-20 17:01.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170059/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:01.39 [info     ] FQE_20220420170059: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.0001649411327867623, 'time_algorithm_update': 0.004966896700571819, 'loss': 0.011038531463716673, 'time_step': 0.005208426211253706, 'init_value': -3.899160385131836, 'ave_value': -2.5441243081345215, 'soft_opc': nan} step=6972




2022-04-20 17:01.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170059/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:01.40 [info     ] FQE_20220420170059: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00016708833625517697, 'time_algorithm_update': 0.0051476079297352985, 'loss': 0.011333942949121078, 'time_step': 0.005386389881731516, 'init_value': -4.0299224853515625, 'ave_value': -2.628220859022291, 'soft_opc': nan} step=7138




2022-04-20 17:01.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170059/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:01.41 [info     ] FQE_20220420170059: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.0001633756132010954, 'time_algorithm_update': 0.004992637289575784, 'loss': 0.011795794497333825, 'time_step': 0.005231162151658392, 'init_value': -4.126111030578613, 'ave_value': -2.706306356701765, 'soft_opc': nan} step=7304




2022-04-20 17:01.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170059/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:01.41 [info     ] FQE_20220420170059: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00016539786235395684, 'time_algorithm_update': 0.005123832139624171, 'loss': 0.012500130349815071, 'time_step': 0.005363381052591714, 'init_value': -4.193600654602051, 'ave_value': -2.740000058918654, 'soft_opc': nan} step=7470




2022-04-20 17:01.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170059/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:01.42 [info     ] FQE_20220420170059: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.0001629892602024308, 'time_algorithm_update': 0.004217792706317212, 'loss': 0.012963883727452302, 'time_step': 0.004455873765141131, 'init_value': -4.29976749420166, 'ave_value': -2.8183112239918193, 'soft_opc': nan} step=7636




2022-04-20 17:01.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170059/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:01.43 [info     ] FQE_20220420170059: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00017022081168301134, 'time_algorithm_update': 0.005165650183895984, 'loss': 0.01370368459323668, 'time_step': 0.005411642143525273, 'init_value': -4.379614353179932, 'ave_value': -2.8755557846512882, 'soft_opc': nan} step=7802




2022-04-20 17:01.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170059/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:01.44 [info     ] FQE_20220420170059: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00016796588897705078, 'time_algorithm_update': 0.005068027829549399, 'loss': 0.013762510080081242, 'time_step': 0.005309869007891919, 'init_value': -4.4061384201049805, 'ave_value': -2.890360376326082, 'soft_opc': nan} step=7968




2022-04-20 17:01.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170059/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:01.45 [info     ] FQE_20220420170059: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00016752639448786355, 'time_algorithm_update': 0.0049949238099247575, 'loss': 0.014035687557455287, 'time_step': 0.005236805203449295, 'init_value': -4.494801044464111, 'ave_value': -2.9489884532585338, 'soft_opc': nan} step=8134




2022-04-20 17:01.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170059/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:01.46 [info     ] FQE_20220420170059: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00016838814838823066, 'time_algorithm_update': 0.005087130041007536, 'loss': 0.014581730945529529, 'time_step': 0.005331733140600733, 'init_value': -4.558074951171875, 'ave_value': -3.013161750204928, 'soft_opc': nan} step=8300




2022-04-20 17:01.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170059/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.43210892e-01 -3.25999953e-02
 -1.38000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.28049971e-01  6.00000000e-01  4.67532035e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.87189108e-01  2.46000047e-02
 -8.00013420e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.61927906e-02  2.08952959e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.53789108e-01  1.32000047e-02
  8.79998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -9.71586383e-02 -6.00000000e-01 -5.33093061e-02]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.39489108e-01 -4.75999953e-02
 -9.30001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  1.41298638e-01  5.43892365e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.9573146

2022-04-20 17:01.46 [info     ] Directory is created at d3rlpy_logs/FQE_20220420170146
2022-04-20 17:01.46 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 17:01.46 [debug    ] Building models...
2022-04-20 17:01.46 [debug    ] Models have been built.
2022-04-20 17:01.46 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420170146/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 17:01.48 [info     ] FQE_20220420170146: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.0001643979272177053, 'time_algorithm_update': 0.005045583081799884, 'loss': 0.024736151908697605, 'time_step': 0.005284484042677768, 'init_value': -1.1068804264068604, 'ave_value': -1.0891031779751585, 'soft_opc': nan} step=344




2022-04-20 17:01.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170146/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:01.50 [info     ] FQE_20220420170146: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.0001675223195275595, 'time_algorithm_update': 0.004958076532496963, 'loss': 0.022231008039874045, 'time_step': 0.005199404649956282, 'init_value': -1.903214931488037, 'ave_value': -1.8656248980277292, 'soft_opc': nan} step=688




2022-04-20 17:01.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170146/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:01.52 [info     ] FQE_20220420170146: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00017211187717526457, 'time_algorithm_update': 0.0047543721143589466, 'loss': 0.022929577790417297, 'time_step': 0.0050039395343425665, 'init_value': -2.9632139205932617, 'ave_value': -2.904354930890573, 'soft_opc': nan} step=1032




2022-04-20 17:01.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170146/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:01.54 [info     ] FQE_20220420170146: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00017122057981269303, 'time_algorithm_update': 0.0050366645635560505, 'loss': 0.02469267818242918, 'time_step': 0.005283857500830362, 'init_value': -3.7595810890197754, 'ave_value': -3.724263200506165, 'soft_opc': nan} step=1376




2022-04-20 17:01.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170146/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:01.56 [info     ] FQE_20220420170146: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.0001710355281829834, 'time_algorithm_update': 0.0050670359023781706, 'loss': 0.029328096731089402, 'time_step': 0.00531542231870252, 'init_value': -4.741398334503174, 'ave_value': -4.763031945364164, 'soft_opc': nan} step=1720




2022-04-20 17:01.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170146/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:01.58 [info     ] FQE_20220420170146: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00017214029334312262, 'time_algorithm_update': 0.005095112462376439, 'loss': 0.03516865276114279, 'time_step': 0.005346314851627793, 'init_value': -5.498115539550781, 'ave_value': -5.659129432957989, 'soft_opc': nan} step=2064




2022-04-20 17:01.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170146/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:02.00 [info     ] FQE_20220420170146: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.0001689188702161922, 'time_algorithm_update': 0.004532304614089256, 'loss': 0.044049277731087494, 'time_step': 0.004774923241415689, 'init_value': -6.273421764373779, 'ave_value': -6.642554955231446, 'soft_opc': nan} step=2408




2022-04-20 17:02.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170146/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:02.02 [info     ] FQE_20220420170146: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00017096622045650037, 'time_algorithm_update': 0.005075253719507262, 'loss': 0.053412235926750096, 'time_step': 0.005324359550032505, 'init_value': -6.967676162719727, 'ave_value': -7.6351151215533415, 'soft_opc': nan} step=2752




2022-04-20 17:02.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170146/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:02.04 [info     ] FQE_20220420170146: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.0001736927864163421, 'time_algorithm_update': 0.00512709381968476, 'loss': 0.06314497755104026, 'time_step': 0.005379730185797048, 'init_value': -7.57858419418335, 'ave_value': -8.593069337860488, 'soft_opc': nan} step=3096




2022-04-20 17:02.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170146/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:02.06 [info     ] FQE_20220420170146: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00016983373220576795, 'time_algorithm_update': 0.005028943682825843, 'loss': 0.07637490345663282, 'time_step': 0.005278355160424876, 'init_value': -8.411479949951172, 'ave_value': -9.835535753568685, 'soft_opc': nan} step=3440




2022-04-20 17:02.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170146/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:02.08 [info     ] FQE_20220420170146: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.0001712129559627799, 'time_algorithm_update': 0.005050922549048135, 'loss': 0.08454344102159836, 'time_step': 0.005299504413161167, 'init_value': -8.795193672180176, 'ave_value': -10.61330766113022, 'soft_opc': nan} step=3784




2022-04-20 17:02.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170146/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:02.10 [info     ] FQE_20220420170146: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.0001731362453726835, 'time_algorithm_update': 0.004596764265104782, 'loss': 0.09793523461341339, 'time_step': 0.004846474459004956, 'init_value': -9.532119750976562, 'ave_value': -11.793862166579997, 'soft_opc': nan} step=4128




2022-04-20 17:02.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170146/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:02.12 [info     ] FQE_20220420170146: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00017289158909819847, 'time_algorithm_update': 0.00511808104293291, 'loss': 0.1117525491105436, 'time_step': 0.005366881919461627, 'init_value': -10.293584823608398, 'ave_value': -12.946639960541113, 'soft_opc': nan} step=4472




2022-04-20 17:02.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170146/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:02.14 [info     ] FQE_20220420170146: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00016957798669504565, 'time_algorithm_update': 0.00498911322549332, 'loss': 0.12312172251471946, 'time_step': 0.005235078722931618, 'init_value': -11.100252151489258, 'ave_value': -14.106620298419987, 'soft_opc': nan} step=4816




2022-04-20 17:02.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170146/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:02.16 [info     ] FQE_20220420170146: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.0001712725606075553, 'time_algorithm_update': 0.005085230566734491, 'loss': 0.13408339984693327, 'time_step': 0.005333972531695699, 'init_value': -11.738985061645508, 'ave_value': -15.066102732816576, 'soft_opc': nan} step=5160




2022-04-20 17:02.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170146/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:02.17 [info     ] FQE_20220420170146: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.0001704762148302655, 'time_algorithm_update': 0.004625341226888257, 'loss': 0.14392573489309396, 'time_step': 0.0048717627691668135, 'init_value': -12.652114868164062, 'ave_value': -16.324063466981407, 'soft_opc': nan} step=5504




2022-04-20 17:02.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170146/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:02.19 [info     ] FQE_20220420170146: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00016964036364888036, 'time_algorithm_update': 0.005013020925743635, 'loss': 0.1537207495637758, 'time_step': 0.005258104828901069, 'init_value': -13.24316692352295, 'ave_value': -17.177195084886627, 'soft_opc': nan} step=5848




2022-04-20 17:02.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170146/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:02.21 [info     ] FQE_20220420170146: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00017269544823225155, 'time_algorithm_update': 0.005046299030614453, 'loss': 0.16443758740544664, 'time_step': 0.005296803491060124, 'init_value': -13.687744140625, 'ave_value': -17.87890522115201, 'soft_opc': nan} step=6192




2022-04-20 17:02.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170146/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:02.23 [info     ] FQE_20220420170146: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00016933194426603095, 'time_algorithm_update': 0.005069589199021805, 'loss': 0.17763855006155926, 'time_step': 0.005317466896633769, 'init_value': -14.632684707641602, 'ave_value': -19.076557940179022, 'soft_opc': nan} step=6536




2022-04-20 17:02.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170146/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:02.25 [info     ] FQE_20220420170146: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00017593974290892135, 'time_algorithm_update': 0.005074483710666036, 'loss': 0.18412130066128665, 'time_step': 0.005326149768607561, 'init_value': -15.186336517333984, 'ave_value': -19.881959139213368, 'soft_opc': nan} step=6880




2022-04-20 17:02.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170146/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:02.27 [info     ] FQE_20220420170146: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00017060859258784803, 'time_algorithm_update': 0.0046010474826014315, 'loss': 0.19227281367124685, 'time_step': 0.004850208759307861, 'init_value': -15.926594734191895, 'ave_value': -20.961415098083986, 'soft_opc': nan} step=7224




2022-04-20 17:02.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170146/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:02.29 [info     ] FQE_20220420170146: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00017323396926702454, 'time_algorithm_update': 0.005061350589574769, 'loss': 0.19755591431004538, 'time_step': 0.005314683498338212, 'init_value': -16.217689514160156, 'ave_value': -21.55605479972353, 'soft_opc': nan} step=7568




2022-04-20 17:02.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170146/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:02.31 [info     ] FQE_20220420170146: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00017266633898712868, 'time_algorithm_update': 0.005049669465353322, 'loss': 0.2037679842283386, 'time_step': 0.005296876264172931, 'init_value': -17.131977081298828, 'ave_value': -22.695066173287394, 'soft_opc': nan} step=7912




2022-04-20 17:02.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170146/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:02.33 [info     ] FQE_20220420170146: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.0001740469488986703, 'time_algorithm_update': 0.005105302777401236, 'loss': 0.21462325928841047, 'time_step': 0.005357112995413847, 'init_value': -17.302146911621094, 'ave_value': -23.134241496179637, 'soft_opc': nan} step=8256




2022-04-20 17:02.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170146/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:02.35 [info     ] FQE_20220420170146: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00017008462617563647, 'time_algorithm_update': 0.004780085280884144, 'loss': 0.2155923146949431, 'time_step': 0.005027094552683276, 'init_value': -17.904178619384766, 'ave_value': -24.010704486755873, 'soft_opc': nan} step=8600




2022-04-20 17:02.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170146/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:02.37 [info     ] FQE_20220420170146: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.0001700347246125687, 'time_algorithm_update': 0.005083015491796094, 'loss': 0.22041722311811565, 'time_step': 0.005331225173417912, 'init_value': -18.438682556152344, 'ave_value': -24.807160764533254, 'soft_opc': nan} step=8944




2022-04-20 17:02.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170146/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:02.39 [info     ] FQE_20220420170146: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00017113533130911894, 'time_algorithm_update': 0.0050467093323552335, 'loss': 0.22998407961757378, 'time_step': 0.005295486644256947, 'init_value': -18.748554229736328, 'ave_value': -25.466335508710838, 'soft_opc': nan} step=9288




2022-04-20 17:02.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170146/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:02.41 [info     ] FQE_20220420170146: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00017432348672733752, 'time_algorithm_update': 0.004988202521967334, 'loss': 0.23801552105240179, 'time_step': 0.005241553450739661, 'init_value': -19.350095748901367, 'ave_value': -26.267939348774334, 'soft_opc': nan} step=9632




2022-04-20 17:02.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170146/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:02.43 [info     ] FQE_20220420170146: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00017258663510167323, 'time_algorithm_update': 0.005051292652307555, 'loss': 0.24320529022275708, 'time_step': 0.005299813525621281, 'init_value': -19.93876075744629, 'ave_value': -27.128687359078846, 'soft_opc': nan} step=9976




2022-04-20 17:02.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170146/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:02.45 [info     ] FQE_20220420170146: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00016907897106436796, 'time_algorithm_update': 0.004635756792024125, 'loss': 0.24893116231921109, 'time_step': 0.004880309104919434, 'init_value': -20.355548858642578, 'ave_value': -27.88728985634951, 'soft_opc': nan} step=10320




2022-04-20 17:02.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170146/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:02.47 [info     ] FQE_20220420170146: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00016822579295136208, 'time_algorithm_update': 0.005109839661176814, 'loss': 0.2554664125579388, 'time_step': 0.005357462306355321, 'init_value': -20.492366790771484, 'ave_value': -28.298333972444556, 'soft_opc': nan} step=10664




2022-04-20 17:02.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170146/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:02.49 [info     ] FQE_20220420170146: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00017191019169119902, 'time_algorithm_update': 0.005083899165308753, 'loss': 0.2572285232383238, 'time_step': 0.00533255796099818, 'init_value': -21.031898498535156, 'ave_value': -29.050394538981706, 'soft_opc': nan} step=11008




2022-04-20 17:02.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170146/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:02.51 [info     ] FQE_20220420170146: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00017230593880941702, 'time_algorithm_update': 0.005103364933368771, 'loss': 0.2741719069240918, 'time_step': 0.005350848270017047, 'init_value': -21.11933708190918, 'ave_value': -29.536544152141154, 'soft_opc': nan} step=11352




2022-04-20 17:02.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170146/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:02.53 [info     ] FQE_20220420170146: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.0001699425453363463, 'time_algorithm_update': 0.004920244216918945, 'loss': 0.2803168960056419, 'time_step': 0.0051651167315106055, 'init_value': -21.77496910095215, 'ave_value': -30.39724704126517, 'soft_opc': nan} step=11696




2022-04-20 17:02.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170146/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:02.54 [info     ] FQE_20220420170146: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.0001683387645455294, 'time_algorithm_update': 0.004889813966529314, 'loss': 0.28851557808094336, 'time_step': 0.005133780629135842, 'init_value': -21.77623748779297, 'ave_value': -30.644180943689367, 'soft_opc': nan} step=12040




2022-04-20 17:02.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170146/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:02.56 [info     ] FQE_20220420170146: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00016803866208985795, 'time_algorithm_update': 0.005044901093771291, 'loss': 0.2888227124416897, 'time_step': 0.005290689163429793, 'init_value': -21.999217987060547, 'ave_value': -31.111130478078717, 'soft_opc': nan} step=12384




2022-04-20 17:02.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170146/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:02.58 [info     ] FQE_20220420170146: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00016839698303577511, 'time_algorithm_update': 0.005068237005278121, 'loss': 0.29588569941670567, 'time_step': 0.005309253931045532, 'init_value': -22.331911087036133, 'ave_value': -31.77452386810973, 'soft_opc': nan} step=12728




2022-04-20 17:02.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170146/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:03.00 [info     ] FQE_20220420170146: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00017359021098114724, 'time_algorithm_update': 0.005082694597022478, 'loss': 0.30185353390731695, 'time_step': 0.005334133325621139, 'init_value': -22.445022583007812, 'ave_value': -31.96874146056068, 'soft_opc': nan} step=13072




2022-04-20 17:03.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170146/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:03.02 [info     ] FQE_20220420170146: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00017215969950653786, 'time_algorithm_update': 0.0046201486920201505, 'loss': 0.3092945970237515, 'time_step': 0.004867756782576095, 'init_value': -22.588274002075195, 'ave_value': -32.45029171438099, 'soft_opc': nan} step=13416




2022-04-20 17:03.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170146/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:03.04 [info     ] FQE_20220420170146: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00017223593800566917, 'time_algorithm_update': 0.005001418119253114, 'loss': 0.31607447237014596, 'time_step': 0.005250401968179747, 'init_value': -23.122718811035156, 'ave_value': -32.91671136902259, 'soft_opc': nan} step=13760




2022-04-20 17:03.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170146/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:03.06 [info     ] FQE_20220420170146: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00017390625421390978, 'time_algorithm_update': 0.005068255718364272, 'loss': 0.3242933880754335, 'time_step': 0.005319365928339404, 'init_value': -23.436450958251953, 'ave_value': -33.25234683007002, 'soft_opc': nan} step=14104




2022-04-20 17:03.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170146/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:03.08 [info     ] FQE_20220420170146: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00017530419105707214, 'time_algorithm_update': 0.005025316809498986, 'loss': 0.33490059266494976, 'time_step': 0.005280002605083377, 'init_value': -23.52647590637207, 'ave_value': -33.56978173625093, 'soft_opc': nan} step=14448




2022-04-20 17:03.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170146/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:03.10 [info     ] FQE_20220420170146: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00017261505126953125, 'time_algorithm_update': 0.0051142871379852295, 'loss': 0.33868289915411626, 'time_step': 0.0053670163764510045, 'init_value': -23.496601104736328, 'ave_value': -33.648642187773646, 'soft_opc': nan} step=14792




2022-04-20 17:03.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170146/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:03.12 [info     ] FQE_20220420170146: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.0001695426397545393, 'time_algorithm_update': 0.004652196584745895, 'loss': 0.3463151985562818, 'time_step': 0.004898181488347608, 'init_value': -23.855287551879883, 'ave_value': -34.12337545463899, 'soft_opc': nan} step=15136




2022-04-20 17:03.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170146/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:03.14 [info     ] FQE_20220420170146: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00017265247744183208, 'time_algorithm_update': 0.0050232874792675635, 'loss': 0.35156472528158406, 'time_step': 0.0052724390528922855, 'init_value': -23.881561279296875, 'ave_value': -34.32629559456228, 'soft_opc': nan} step=15480




2022-04-20 17:03.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170146/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:03.16 [info     ] FQE_20220420170146: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00017177573470182196, 'time_algorithm_update': 0.0050913088543470516, 'loss': 0.3621606337702508, 'time_step': 0.005340632311133451, 'init_value': -24.080629348754883, 'ave_value': -34.56898356206245, 'soft_opc': nan} step=15824




2022-04-20 17:03.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170146/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:03.18 [info     ] FQE_20220420170146: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.0001737711041472679, 'time_algorithm_update': 0.005111142646434696, 'loss': 0.37038980352389084, 'time_step': 0.005361814138501189, 'init_value': -24.714704513549805, 'ave_value': -35.16920719549463, 'soft_opc': nan} step=16168




2022-04-20 17:03.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170146/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:03.20 [info     ] FQE_20220420170146: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00017141186913778616, 'time_algorithm_update': 0.004583092622978743, 'loss': 0.38797124200717137, 'time_step': 0.004831489435462064, 'init_value': -25.096725463867188, 'ave_value': -35.59622315614073, 'soft_opc': nan} step=16512




2022-04-20 17:03.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170146/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:03.22 [info     ] FQE_20220420170146: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.0001721513825793599, 'time_algorithm_update': 0.005071278921393461, 'loss': 0.3959947930470239, 'time_step': 0.0053204034650048545, 'init_value': -24.992313385009766, 'ave_value': -35.57313278030168, 'soft_opc': nan} step=16856




2022-04-20 17:03.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170146/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:03.24 [info     ] FQE_20220420170146: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.0001735569432724354, 'time_algorithm_update': 0.005113707032314566, 'loss': 0.3992199203010302, 'time_step': 0.005361962457035863, 'init_value': -25.523345947265625, 'ave_value': -36.193447559615514, 'soft_opc': nan} step=17200




2022-04-20 17:03.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170146/model_17200.pt
search iteration:  19
using hyper params:  [0.002755571730936746, 0.0031008072370513717, 4.557992189227535e-05, 7]
2022-04-20 17:03.24 [debug    ] RoundIterator is selected.
2022-04-20 17:03.24 [info     ] Directory is created at d3rlpy_logs/TD3PlusBC_20220420170324
2022-04-20 17:03.24 [debug    ] Fitting scaler...              scaler=standard
2022-04-20 17:03.24 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 17:03.24 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 17:03.24 [info     ] Parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170324/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0027555717309

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:03.28 [info     ] TD3PlusBC_20220420170324: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.0004002462353622704, 'time_algorithm_update': 0.008857879025197168, 'critic_loss': 13.384229824208377, 'actor_loss': 2.6600780988994397, 'time_step': 0.009339011203475862, 'td_error': 1.0499283115408524, 'init_value': -11.30113410949707, 'ave_value': -7.241209933402873} step=342
2022-04-20 17:03.28 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170324/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:03.31 [info     ] TD3PlusBC_20220420170324: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.0004003598676090352, 'time_algorithm_update': 0.008498100509420473, 'critic_loss': 5.88025499958741, 'actor_loss': 2.575429410265203, 'time_step': 0.008978719599762855, 'td_error': 1.3125519139898114, 'init_value': -15.86998462677002, 'ave_value': -10.207164360783189} step=684
2022-04-20 17:03.31 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170324/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:03.35 [info     ] TD3PlusBC_20220420170324: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.00040318324551944846, 'time_algorithm_update': 0.008832563433730812, 'critic_loss': 9.09091329574585, 'actor_loss': 2.565826201299478, 'time_step': 0.009315402187102022, 'td_error': 1.7232612435583585, 'init_value': -21.039077758789062, 'ave_value': -13.59681120953187} step=1026
2022-04-20 17:03.35 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170324/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:03.39 [info     ] TD3PlusBC_20220420170324: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.00040218425773040596, 'time_algorithm_update': 0.008506349652831317, 'critic_loss': 12.85712506478293, 'actor_loss': 2.5618449484395702, 'time_step': 0.008985226614433423, 'td_error': 2.2033747118430727, 'init_value': -25.653881072998047, 'ave_value': -16.721227306473143} step=1368
2022-04-20 17:03.39 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170324/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:03.42 [info     ] TD3PlusBC_20220420170324: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.0004058400092766299, 'time_algorithm_update': 0.008854608089603179, 'critic_loss': 17.280346045020032, 'actor_loss': 2.5603308747386375, 'time_step': 0.00933899238095646, 'td_error': 2.7830645030127474, 'init_value': -30.52390480041504, 'ave_value': -19.96094790414823} step=1710
2022-04-20 17:03.42 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170324/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:03.46 [info     ] TD3PlusBC_20220420170324: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.00040324877577218394, 'time_algorithm_update': 0.009020829758448907, 'critic_loss': 22.1597827916954, 'actor_loss': 2.5594354428743062, 'time_step': 0.009497889998363472, 'td_error': 3.4208572966032684, 'init_value': -34.978153228759766, 'ave_value': -22.91603735362897} step=2052
2022-04-20 17:03.46 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170324/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:03.50 [info     ] TD3PlusBC_20220420170324: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.00040285698851646735, 'time_algorithm_update': 0.008589676946227313, 'critic_loss': 27.44711891252395, 'actor_loss': 2.560091527581912, 'time_step': 0.009071770467256246, 'td_error': 4.0231573109421115, 'init_value': -39.1593017578125, 'ave_value': -25.922005887200786} step=2394
2022-04-20 17:03.50 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170324/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:03.54 [info     ] TD3PlusBC_20220420170324: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.0003975176671792192, 'time_algorithm_update': 0.008925218331186395, 'critic_loss': 32.83084549820214, 'actor_loss': 2.5588278826217206, 'time_step': 0.00939923071721841, 'td_error': 4.690337361803727, 'init_value': -43.038185119628906, 'ave_value': -28.650955102873613} step=2736
2022-04-20 17:03.54 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170324/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:03.57 [info     ] TD3PlusBC_20220420170324: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.0003971467938339501, 'time_algorithm_update': 0.00854949156443278, 'critic_loss': 38.6029564483821, 'actor_loss': 2.5593705344618414, 'time_step': 0.009019807765358373, 'td_error': 5.162277696032077, 'init_value': -46.617034912109375, 'ave_value': -31.058741745768987} step=3078
2022-04-20 17:03.57 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170324/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:04.01 [info     ] TD3PlusBC_20220420170324: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.00039881572388766103, 'time_algorithm_update': 0.008871434724818893, 'critic_loss': 44.24140298296834, 'actor_loss': 2.5583594556440388, 'time_step': 0.009348458016825002, 'td_error': 5.657667465398645, 'init_value': -49.91059494018555, 'ave_value': -33.62980984770147} step=3420
2022-04-20 17:04.01 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170324/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:04.05 [info     ] TD3PlusBC_20220420170324: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.00039757413473742747, 'time_algorithm_update': 0.008843156329372473, 'critic_loss': 50.27418506354616, 'actor_loss': 2.558272278099729, 'time_step': 0.009321028726142749, 'td_error': 6.0897283426918145, 'init_value': -53.19413375854492, 'ave_value': -35.83212718075116} step=3762
2022-04-20 17:04.05 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170324/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:04.08 [info     ] TD3PlusBC_20220420170324: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.00040692195557711416, 'time_algorithm_update': 0.00857452272671705, 'critic_loss': 56.325428131728145, 'actor_loss': 2.558415022509837, 'time_step': 0.009056253739964892, 'td_error': 6.6712964397993515, 'init_value': -57.128944396972656, 'ave_value': -38.3749851753738} step=4104
2022-04-20 17:04.08 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170324/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:04.12 [info     ] TD3PlusBC_20220420170324: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.00040118457281101516, 'time_algorithm_update': 0.008982949786716037, 'critic_loss': 62.5674971139919, 'actor_loss': 2.556957441463805, 'time_step': 0.009460784538447509, 'td_error': 7.137365845274054, 'init_value': -59.0885124206543, 'ave_value': -40.224654774023975} step=4446
2022-04-20 17:04.12 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170324/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:04.16 [info     ] TD3PlusBC_20220420170324: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.00040145645364683273, 'time_algorithm_update': 0.008713325561835752, 'critic_loss': 68.34023058484172, 'actor_loss': 2.5574905384353728, 'time_step': 0.009191474022223936, 'td_error': 7.737220263848986, 'init_value': -62.32489776611328, 'ave_value': -42.298107293084485} step=4788
2022-04-20 17:04.16 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170324/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:04.19 [info     ] TD3PlusBC_20220420170324: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.0004038343652647141, 'time_algorithm_update': 0.008959588251615825, 'critic_loss': 74.26382165206105, 'actor_loss': 2.556914764538146, 'time_step': 0.009437771568521422, 'td_error': 8.223389235998214, 'init_value': -66.19068145751953, 'ave_value': -44.528434623958844} step=5130
2022-04-20 17:04.19 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170324/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:04.23 [info     ] TD3PlusBC_20220420170324: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.00040717710528457375, 'time_algorithm_update': 0.008882606238649603, 'critic_loss': 79.84501277633578, 'actor_loss': 2.557869780133342, 'time_step': 0.00936742693360089, 'td_error': 8.782284587691905, 'init_value': -67.53588104248047, 'ave_value': -46.078406973935934} step=5472
2022-04-20 17:04.23 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170324/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:04.27 [info     ] TD3PlusBC_20220420170324: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.00039711263444688584, 'time_algorithm_update': 0.008452842807212072, 'critic_loss': 85.90046153152198, 'actor_loss': 2.5570289489121465, 'time_step': 0.008926283546358521, 'td_error': 9.249057678216067, 'init_value': -70.49687194824219, 'ave_value': -47.777397035637634} step=5814
2022-04-20 17:04.27 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170324/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:04.30 [info     ] TD3PlusBC_20220420170324: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.0004041187944468002, 'time_algorithm_update': 0.008843605281316747, 'critic_loss': 91.20779012936597, 'actor_loss': 2.5581824584314, 'time_step': 0.00932665387092278, 'td_error': 9.681226836350154, 'init_value': -72.30848693847656, 'ave_value': -49.39147334878174} step=6156
2022-04-20 17:04.30 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170324/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:04.34 [info     ] TD3PlusBC_20220420170324: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.0004027830926995528, 'time_algorithm_update': 0.00892401717559636, 'critic_loss': 96.82881237052338, 'actor_loss': 2.557421489068639, 'time_step': 0.009405269260294953, 'td_error': 10.372596750317404, 'init_value': -74.67662811279297, 'ave_value': -50.89802772231014} step=6498
2022-04-20 17:04.34 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170324/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:04.38 [info     ] TD3PlusBC_20220420170324: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.00040595852143583244, 'time_algorithm_update': 0.008863950333400079, 'critic_loss': 102.22015757867467, 'actor_loss': 2.55810654930204, 'time_step': 0.009340196325067888, 'td_error': 10.470045180506162, 'init_value': -76.78793334960938, 'ave_value': -52.667078130076064} step=6840
2022-04-20 17:04.38 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170324/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:04.41 [info     ] TD3PlusBC_20220420170324: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.00040150385851051376, 'time_algorithm_update': 0.008849088908636083, 'critic_loss': 107.12014201649448, 'actor_loss': 2.558501858460276, 'time_step': 0.009313512963858264, 'td_error': 10.960648452696477, 'init_value': -78.38239288330078, 'ave_value': -53.41246717851752} step=7182
2022-04-20 17:04.41 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170324/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:04.45 [info     ] TD3PlusBC_20220420170324: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.00040171230048464057, 'time_algorithm_update': 0.008537629194426955, 'critic_loss': 112.39288235268397, 'actor_loss': 2.5583732058430275, 'time_step': 0.00900303202065808, 'td_error': 11.76964659276395, 'init_value': -80.28978729248047, 'ave_value': -55.24620332565142} step=7524
2022-04-20 17:04.45 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170324/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:04.49 [info     ] TD3PlusBC_20220420170324: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.0004027761213960703, 'time_algorithm_update': 0.00896504817650332, 'critic_loss': 117.20426234864352, 'actor_loss': 2.559248515736987, 'time_step': 0.009433373373154311, 'td_error': 12.397060739395705, 'init_value': -81.80888366699219, 'ave_value': -56.536381113813135} step=7866
2022-04-20 17:04.49 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170324/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:04.52 [info     ] TD3PlusBC_20220420170324: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.00040453358700400904, 'time_algorithm_update': 0.008978040594803659, 'critic_loss': 122.0672543174342, 'actor_loss': 2.5592242569951287, 'time_step': 0.009443992062618858, 'td_error': 11.903862813797035, 'init_value': -79.14698791503906, 'ave_value': -56.87806049072393} step=8208
2022-04-20 17:04.52 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170324/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:04.56 [info     ] TD3PlusBC_20220420170324: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.00040299850597716214, 'time_algorithm_update': 0.008625492714999015, 'critic_loss': 126.39960488659597, 'actor_loss': 2.5593102842743636, 'time_step': 0.009091993521528634, 'td_error': 12.551073502736672, 'init_value': -81.07041931152344, 'ave_value': -57.976594578901825} step=8550
2022-04-20 17:04.56 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170324/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:05.00 [info     ] TD3PlusBC_20220420170324: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.00040422615252043074, 'time_algorithm_update': 0.008861757161324485, 'critic_loss': 130.8442430105823, 'actor_loss': 2.5599789703101443, 'time_step': 0.00933044626001726, 'td_error': 13.124678190780555, 'init_value': -82.82771301269531, 'ave_value': -59.100973040979554} step=8892
2022-04-20 17:05.00 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170324/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:05.03 [info     ] TD3PlusBC_20220420170324: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.0004021521897343864, 'time_algorithm_update': 0.008438787962261, 'critic_loss': 135.2792239718967, 'actor_loss': 2.5607696597339116, 'time_step': 0.008903400939807557, 'td_error': 13.121133469967958, 'init_value': -81.43603515625, 'ave_value': -59.47157138296445} step=9234
2022-04-20 17:05.03 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170324/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:05.07 [info     ] TD3PlusBC_20220420170324: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.0003991057301125331, 'time_algorithm_update': 0.00889086235336393, 'critic_loss': 139.44849520677712, 'actor_loss': 2.561323830955907, 'time_step': 0.009357997548510457, 'td_error': 14.297783701224027, 'init_value': -83.802734375, 'ave_value': -60.73110291848815} step=9576
2022-04-20 17:05.07 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170324/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:05.11 [info     ] TD3PlusBC_20220420170324: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.00039664973989564774, 'time_algorithm_update': 0.008760600062141641, 'critic_loss': 143.8326070462054, 'actor_loss': 2.561186743061445, 'time_step': 0.009225067339445414, 'td_error': 13.96241873181085, 'init_value': -84.28416442871094, 'ave_value': -61.64430637616983} step=9918
2022-04-20 17:05.11 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170324/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:05.14 [info     ] TD3PlusBC_20220420170324: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.00040423939799704746, 'time_algorithm_update': 0.00848451204467238, 'critic_loss': 147.52606410868682, 'actor_loss': 2.5621319723408122, 'time_step': 0.008954856827942252, 'td_error': 14.486715179941903, 'init_value': -84.75040435791016, 'ave_value': -62.48769851114408} step=10260
2022-04-20 17:05.14 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170324/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:05.18 [info     ] TD3PlusBC_20220420170324: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.0003995372537981, 'time_algorithm_update': 0.008978555774131017, 'critic_loss': 151.3113935593276, 'actor_loss': 2.5626062245396843, 'time_step': 0.009444378272831788, 'td_error': 14.927185281349326, 'init_value': -85.23085021972656, 'ave_value': -63.13278902486545} step=10602
2022-04-20 17:05.18 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170324/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:05.22 [info     ] TD3PlusBC_20220420170324: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.00040044282612047697, 'time_algorithm_update': 0.00849889872366922, 'critic_loss': 154.80998999612373, 'actor_loss': 2.562440144388299, 'time_step': 0.008962776228698373, 'td_error': 15.111685356363873, 'init_value': -83.8200454711914, 'ave_value': -63.140540381127984} step=10944
2022-04-20 17:05.22 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170324/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:05.26 [info     ] TD3PlusBC_20220420170324: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.00040269107149358384, 'time_algorithm_update': 0.008885744022347077, 'critic_loss': 158.6725777296992, 'actor_loss': 2.563669563036913, 'time_step': 0.009358989564996017, 'td_error': 15.325278080729978, 'init_value': -85.701904296875, 'ave_value': -64.10082906895208} step=11286
2022-04-20 17:05.26 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170324/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:05.29 [info     ] TD3PlusBC_20220420170324: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.00040063035418415625, 'time_algorithm_update': 0.008930659433554488, 'critic_loss': 161.6930673164234, 'actor_loss': 2.5638723680150437, 'time_step': 0.00939470076421548, 'td_error': 15.382364979685596, 'init_value': -85.2587661743164, 'ave_value': -64.43652938181752} step=11628
2022-04-20 17:05.29 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170324/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:05.33 [info     ] TD3PlusBC_20220420170324: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.00040051114489460547, 'time_algorithm_update': 0.008531374540942454, 'critic_loss': 164.72460315102026, 'actor_loss': 2.564070927469354, 'time_step': 0.008995404020387527, 'td_error': 15.711451016403464, 'init_value': -85.91299438476562, 'ave_value': -65.43831004550563} step=11970
2022-04-20 17:05.33 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170324/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:05.37 [info     ] TD3PlusBC_20220420170324: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.0004000363991274471, 'time_algorithm_update': 0.008840813971402352, 'critic_loss': 167.3632809376856, 'actor_loss': 2.5648805718672905, 'time_step': 0.009307527402688188, 'td_error': 15.934785766083387, 'init_value': -86.33576965332031, 'ave_value': -65.92739322287912} step=12312
2022-04-20 17:05.37 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170324/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:05.40 [info     ] TD3PlusBC_20220420170324: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.00040186915481299685, 'time_algorithm_update': 0.008453311975936444, 'critic_loss': 170.19217445418153, 'actor_loss': 2.5650954399889674, 'time_step': 0.008925713990864, 'td_error': 16.41893076963833, 'init_value': -84.90180969238281, 'ave_value': -66.24733721530568} step=12654
2022-04-20 17:05.40 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170324/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:05.44 [info     ] TD3PlusBC_20220420170324: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.0004053345897741485, 'time_algorithm_update': 0.008820809816059313, 'critic_loss': 173.14130711694906, 'actor_loss': 2.5646115130151226, 'time_step': 0.009296739310549017, 'td_error': 16.825961095256645, 'init_value': -85.4509048461914, 'ave_value': -66.99739103800692} step=12996
2022-04-20 17:05.44 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170324/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:05.48 [info     ] TD3PlusBC_20220420170324: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.00040285419999507434, 'time_algorithm_update': 0.008873068101224844, 'critic_loss': 175.4742325007567, 'actor_loss': 2.565029991997613, 'time_step': 0.00934255820268776, 'td_error': 16.52840063980877, 'init_value': -85.00816345214844, 'ave_value': -67.05515765505902} step=13338
2022-04-20 17:05.48 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170324/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:05.51 [info     ] TD3PlusBC_20220420170324: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.0004071066951194005, 'time_algorithm_update': 0.008527087886431063, 'critic_loss': 177.521317398339, 'actor_loss': 2.5650678210788302, 'time_step': 0.009006753999587388, 'td_error': 16.898911381659758, 'init_value': -84.21922302246094, 'ave_value': -67.10413436441632} step=13680
2022-04-20 17:05.51 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170324/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:05.55 [info     ] TD3PlusBC_20220420170324: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.0004059822238676729, 'time_algorithm_update': 0.008917466938844201, 'critic_loss': 179.45922702097752, 'actor_loss': 2.5657709286226864, 'time_step': 0.009387829150372779, 'td_error': 16.851955854784542, 'init_value': -83.59195709228516, 'ave_value': -67.39232345029107} step=14022
2022-04-20 17:05.55 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170324/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:05.58 [info     ] TD3PlusBC_20220420170324: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.00039896978969462436, 'time_algorithm_update': 0.008621138438843844, 'critic_loss': 180.89759342572842, 'actor_loss': 2.56525108688756, 'time_step': 0.009087542344255057, 'td_error': 16.667880925932334, 'init_value': -81.6639633178711, 'ave_value': -67.66987006760328} step=14364
2022-04-20 17:05.59 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170324/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:06.02 [info     ] TD3PlusBC_20220420170324: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.00040142996269359923, 'time_algorithm_update': 0.008859401557877747, 'critic_loss': 183.2637663501048, 'actor_loss': 2.565148689593488, 'time_step': 0.009330718837983427, 'td_error': 18.418287390274873, 'init_value': -84.19962310791016, 'ave_value': -68.29900656967483} step=14706
2022-04-20 17:06.02 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170324/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:06.06 [info     ] TD3PlusBC_20220420170324: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.0004013100562737002, 'time_algorithm_update': 0.008889705814116182, 'critic_loss': 183.9495138424879, 'actor_loss': 2.5659255326142785, 'time_step': 0.00935777725532041, 'td_error': 17.64819325331848, 'init_value': -84.06849670410156, 'ave_value': -68.48794882177249} step=15048
2022-04-20 17:06.06 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170324/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:06.09 [info     ] TD3PlusBC_20220420170324: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.00040046652855231746, 'time_algorithm_update': 0.007355680242616531, 'critic_loss': 185.0561643232379, 'actor_loss': 2.5660627766659387, 'time_step': 0.007819230793512355, 'td_error': 18.775508158036292, 'init_value': -84.87054443359375, 'ave_value': -68.99875554107616} step=15390
2022-04-20 17:06.09 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170324/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:06.12 [info     ] TD3PlusBC_20220420170324: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.0003966776251095777, 'time_algorithm_update': 0.0067748626073201495, 'critic_loss': 186.12513052510937, 'actor_loss': 2.5659777755625766, 'time_step': 0.007237016806128429, 'td_error': 18.6471327578081, 'init_value': -85.12043762207031, 'ave_value': -69.09877788374804} step=15732
2022-04-20 17:06.12 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170324/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:06.15 [info     ] TD3PlusBC_20220420170324: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.00039397554787976003, 'time_algorithm_update': 0.00689269367017244, 'critic_loss': 187.04806900024414, 'actor_loss': 2.5665061529616864, 'time_step': 0.007350400874489232, 'td_error': 18.407634573792397, 'init_value': -84.27545166015625, 'ave_value': -69.20771108571826} step=16074
2022-04-20 17:06.15 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170324/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:06.18 [info     ] TD3PlusBC_20220420170324: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.00039578320687277274, 'time_algorithm_update': 0.006799469217222336, 'critic_loss': 187.7934515434399, 'actor_loss': 2.5661112300136635, 'time_step': 0.007263428286502236, 'td_error': 19.108983446988667, 'init_value': -85.14363098144531, 'ave_value': -69.7378148251636} step=16416
2022-04-20 17:06.18 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170324/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:06.21 [info     ] TD3PlusBC_20220420170324: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.0003965061310439082, 'time_algorithm_update': 0.00678114932880067, 'critic_loss': 188.6469867996305, 'actor_loss': 2.567192930924265, 'time_step': 0.007249629288388972, 'td_error': 18.3958329405099, 'init_value': -84.94291687011719, 'ave_value': -70.0197933579185} step=16758
2022-04-20 17:06.21 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170324/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:06.24 [info     ] TD3PlusBC_20220420170324: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.00039487484602900274, 'time_algorithm_update': 0.006722032675269054, 'critic_loss': 189.14743811624092, 'actor_loss': 2.56694391596387, 'time_step': 0.007184797560262401, 'td_error': 19.10575959365338, 'init_value': -83.0577163696289, 'ave_value': -69.6110214457384} step=17100
2022-04-20 17:06.24 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170324/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 102 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.28189108e-01  9.8

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 17:06.25 [info     ] FQE_20220420170624: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.0001684039472097374, 'time_algorithm_update': 0.0036035187273140415, 'loss': 0.008114425019439623, 'time_step': 0.003845158829746476, 'init_value': -0.07272744178771973, 'ave_value': -0.04067688485615895, 'soft_opc': nan} step=166




2022-04-20 17:06.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170624/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.25 [info     ] FQE_20220420170624: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00016172822699489365, 'time_algorithm_update': 0.003489997013505683, 'loss': 0.006150583172755608, 'time_step': 0.00372035101235631, 'init_value': -0.2348528802394867, 'ave_value': -0.15290604769566876, 'soft_opc': nan} step=332




2022-04-20 17:06.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170624/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.26 [info     ] FQE_20220420170624: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00016511779233633754, 'time_algorithm_update': 0.003551272024591285, 'loss': 0.0053347118538299414, 'time_step': 0.003787853631628565, 'init_value': -0.31861749291419983, 'ave_value': -0.1925700475923247, 'soft_opc': nan} step=498




2022-04-20 17:06.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170624/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.27 [info     ] FQE_20220420170624: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00016775619552796147, 'time_algorithm_update': 0.003535942858960255, 'loss': 0.005129624588740428, 'time_step': 0.0037751657417021602, 'init_value': -0.35741284489631653, 'ave_value': -0.18890849185125852, 'soft_opc': nan} step=664




2022-04-20 17:06.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170624/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.28 [info     ] FQE_20220420170624: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00016152427857180676, 'time_algorithm_update': 0.0035981083490762367, 'loss': 0.004689447988520365, 'time_step': 0.003829833972884948, 'init_value': -0.4217997193336487, 'ave_value': -0.22669011081342358, 'soft_opc': nan} step=830




2022-04-20 17:06.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170624/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.28 [info     ] FQE_20220420170624: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00016292750117290452, 'time_algorithm_update': 0.0035560245973518096, 'loss': 0.0045426518372989385, 'time_step': 0.0037915821535041533, 'init_value': -0.4401460289955139, 'ave_value': -0.23119498865913835, 'soft_opc': nan} step=996




2022-04-20 17:06.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170624/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.29 [info     ] FQE_20220420170624: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00015856846269354764, 'time_algorithm_update': 0.0034562248781502963, 'loss': 0.004276076951681311, 'time_step': 0.003680779273251453, 'init_value': -0.48078545928001404, 'ave_value': -0.2535854549399946, 'soft_opc': nan} step=1162




2022-04-20 17:06.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170624/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.30 [info     ] FQE_20220420170624: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00016449158450207078, 'time_algorithm_update': 0.0034520338816815114, 'loss': 0.004158523930966046, 'time_step': 0.0036871461983186654, 'init_value': -0.5689182281494141, 'ave_value': -0.329932744451379, 'soft_opc': nan} step=1328




2022-04-20 17:06.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170624/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.30 [info     ] FQE_20220420170624: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00016581150422613304, 'time_algorithm_update': 0.0034677494003112056, 'loss': 0.003930159944042294, 'time_step': 0.0037040810987173794, 'init_value': -0.5925936698913574, 'ave_value': -0.34312248641283144, 'soft_opc': nan} step=1494




2022-04-20 17:06.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170624/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.31 [info     ] FQE_20220420170624: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00016266179372029132, 'time_algorithm_update': 0.0034475513251431018, 'loss': 0.00390103666381125, 'time_step': 0.00368009130638766, 'init_value': -0.661686897277832, 'ave_value': -0.40298061038106303, 'soft_opc': nan} step=1660




2022-04-20 17:06.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170624/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.32 [info     ] FQE_20220420170624: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00016557739441653332, 'time_algorithm_update': 0.003518923219428005, 'loss': 0.003879264432583453, 'time_step': 0.0037548642560660123, 'init_value': -0.7356977462768555, 'ave_value': -0.46355667162376146, 'soft_opc': nan} step=1826




2022-04-20 17:06.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170624/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.32 [info     ] FQE_20220420170624: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00016184025500194137, 'time_algorithm_update': 0.0034829177052141673, 'loss': 0.0038315522041261554, 'time_step': 0.0037146002413278602, 'init_value': -0.7492504119873047, 'ave_value': -0.4641351716517395, 'soft_opc': nan} step=1992




2022-04-20 17:06.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170624/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.33 [info     ] FQE_20220420170624: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00016531168696392015, 'time_algorithm_update': 0.003572538674595844, 'loss': 0.0040301794750077355, 'time_step': 0.0038109285285673946, 'init_value': -0.8454708456993103, 'ave_value': -0.5540073474869132, 'soft_opc': nan} step=2158




2022-04-20 17:06.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170624/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.34 [info     ] FQE_20220420170624: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00016408368765589702, 'time_algorithm_update': 0.0034700919346637034, 'loss': 0.003944183073528336, 'time_step': 0.0037037593772612423, 'init_value': -0.8997776508331299, 'ave_value': -0.5879912893782865, 'soft_opc': nan} step=2324




2022-04-20 17:06.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170624/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.35 [info     ] FQE_20220420170624: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00016232427344264755, 'time_algorithm_update': 0.0034987610506724163, 'loss': 0.004030556137482804, 'time_step': 0.0037338317158710525, 'init_value': -0.9797768592834473, 'ave_value': -0.6496277952046545, 'soft_opc': nan} step=2490




2022-04-20 17:06.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170624/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.35 [info     ] FQE_20220420170624: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.000163377049457596, 'time_algorithm_update': 0.003549233976616917, 'loss': 0.004413178286666657, 'time_step': 0.003783721521676305, 'init_value': -1.054399013519287, 'ave_value': -0.7087439571280737, 'soft_opc': nan} step=2656




2022-04-20 17:06.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170624/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.36 [info     ] FQE_20220420170624: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.0001666057540709714, 'time_algorithm_update': 0.0035342753651630447, 'loss': 0.004607760012205915, 'time_step': 0.003771777612617217, 'init_value': -1.1715177297592163, 'ave_value': -0.8022546087030892, 'soft_opc': nan} step=2822




2022-04-20 17:06.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170624/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.37 [info     ] FQE_20220420170624: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00016453610845358976, 'time_algorithm_update': 0.003506617373730763, 'loss': 0.004859522090949881, 'time_step': 0.0037411882216671862, 'init_value': -1.2165939807891846, 'ave_value': -0.8272845177432975, 'soft_opc': nan} step=2988




2022-04-20 17:06.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170624/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.37 [info     ] FQE_20220420170624: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00016483915857521883, 'time_algorithm_update': 0.0034444375210497752, 'loss': 0.005509907370629574, 'time_step': 0.003684859677969691, 'init_value': -1.3042665719985962, 'ave_value': -0.8829089207658628, 'soft_opc': nan} step=3154




2022-04-20 17:06.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170624/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.38 [info     ] FQE_20220420170624: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.0001619666455739952, 'time_algorithm_update': 0.0034359549901571617, 'loss': 0.0057385247179688546, 'time_step': 0.0036641014627663486, 'init_value': -1.4421882629394531, 'ave_value': -0.9809643275935103, 'soft_opc': nan} step=3320




2022-04-20 17:06.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170624/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.39 [info     ] FQE_20220420170624: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00016580001417412814, 'time_algorithm_update': 0.0035536159952002837, 'loss': 0.00618029871523515, 'time_step': 0.0037898959883724353, 'init_value': -1.4676225185394287, 'ave_value': -0.987544249424392, 'soft_opc': nan} step=3486




2022-04-20 17:06.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170624/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.39 [info     ] FQE_20220420170624: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00016631993902734964, 'time_algorithm_update': 0.003654702600226345, 'loss': 0.006789031644009547, 'time_step': 0.003892889942031309, 'init_value': -1.571683645248413, 'ave_value': -1.0673103991258253, 'soft_opc': nan} step=3652




2022-04-20 17:06.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170624/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.40 [info     ] FQE_20220420170624: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00016632424779685148, 'time_algorithm_update': 0.0036040989749402887, 'loss': 0.0071436287800175505, 'time_step': 0.0038455566727971457, 'init_value': -1.747572422027588, 'ave_value': -1.2262872931790783, 'soft_opc': nan} step=3818




2022-04-20 17:06.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170624/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.41 [info     ] FQE_20220420170624: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.0001654653664094856, 'time_algorithm_update': 0.0034894799611654625, 'loss': 0.007832566823905715, 'time_step': 0.0037248220788427145, 'init_value': -1.7789084911346436, 'ave_value': -1.2154918802885322, 'soft_opc': nan} step=3984




2022-04-20 17:06.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170624/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.41 [info     ] FQE_20220420170624: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00017124486256794757, 'time_algorithm_update': 0.0035851676780057243, 'loss': 0.008092039285777473, 'time_step': 0.003828713692814471, 'init_value': -1.8906193971633911, 'ave_value': -1.303932068134482, 'soft_opc': nan} step=4150




2022-04-20 17:06.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170624/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.42 [info     ] FQE_20220420170624: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00016599103628870952, 'time_algorithm_update': 0.003472487610506724, 'loss': 0.00895385663530015, 'time_step': 0.003711892897824207, 'init_value': -1.9759031534194946, 'ave_value': -1.3446897163114568, 'soft_opc': nan} step=4316




2022-04-20 17:06.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170624/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.43 [info     ] FQE_20220420170624: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.0001663903155958796, 'time_algorithm_update': 0.003451294209583696, 'loss': 0.00864555936023391, 'time_step': 0.003693169858082231, 'init_value': -2.046928882598877, 'ave_value': -1.3804214251538118, 'soft_opc': nan} step=4482




2022-04-20 17:06.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170624/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.44 [info     ] FQE_20220420170624: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00016232714595564878, 'time_algorithm_update': 0.00356623638107116, 'loss': 0.009962206021938697, 'time_step': 0.003800074738192271, 'init_value': -2.217815399169922, 'ave_value': -1.5119403598679078, 'soft_opc': nan} step=4648




2022-04-20 17:06.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170624/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.44 [info     ] FQE_20220420170624: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00016611742686076337, 'time_algorithm_update': 0.003320859139224133, 'loss': 0.010097636374874961, 'time_step': 0.0035583340978047936, 'init_value': -2.2832958698272705, 'ave_value': -1.5340946153477506, 'soft_opc': nan} step=4814




2022-04-20 17:06.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170624/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.45 [info     ] FQE_20220420170624: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00016346466110413334, 'time_algorithm_update': 0.0028474905404699854, 'loss': 0.011217810817382646, 'time_step': 0.0030815012483711704, 'init_value': -2.444363832473755, 'ave_value': -1.6589405835815916, 'soft_opc': nan} step=4980




2022-04-20 17:06.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170624/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.45 [info     ] FQE_20220420170624: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00016166934047836856, 'time_algorithm_update': 0.003577578498656491, 'loss': 0.01164077125077349, 'time_step': 0.003808211131268237, 'init_value': -2.517554759979248, 'ave_value': -1.6863130858151225, 'soft_opc': nan} step=5146




2022-04-20 17:06.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170624/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.46 [info     ] FQE_20220420170624: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00016618780342929335, 'time_algorithm_update': 0.003513547311346215, 'loss': 0.012096019010816937, 'time_step': 0.003752251705491399, 'init_value': -2.6193575859069824, 'ave_value': -1.7617683112084328, 'soft_opc': nan} step=5312




2022-04-20 17:06.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170624/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.47 [info     ] FQE_20220420170624: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.0001648923000657415, 'time_algorithm_update': 0.003579873636544469, 'loss': 0.012867480289213449, 'time_step': 0.0038143683628863597, 'init_value': -2.7525761127471924, 'ave_value': -1.8697417071005245, 'soft_opc': nan} step=5478




2022-04-20 17:06.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170624/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.48 [info     ] FQE_20220420170624: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00016870986984436772, 'time_algorithm_update': 0.003540491483297693, 'loss': 0.013993159242266094, 'time_step': 0.0037828267338764236, 'init_value': -2.847992181777954, 'ave_value': -1.919229609485682, 'soft_opc': nan} step=5644




2022-04-20 17:06.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170624/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.48 [info     ] FQE_20220420170624: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00016478888959769742, 'time_algorithm_update': 0.0034879517842488117, 'loss': 0.014753260966025698, 'time_step': 0.0037245563713901014, 'init_value': -2.9111945629119873, 'ave_value': -1.9165330195346395, 'soft_opc': nan} step=5810




2022-04-20 17:06.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170624/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.49 [info     ] FQE_20220420170624: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00016367435455322266, 'time_algorithm_update': 0.003463189285921763, 'loss': 0.015243929018356264, 'time_step': 0.0036979051957647486, 'init_value': -3.015050172805786, 'ave_value': -1.9961450905547486, 'soft_opc': nan} step=5976




2022-04-20 17:06.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170624/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.50 [info     ] FQE_20220420170624: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.0001625713095607528, 'time_algorithm_update': 0.0035151286297533885, 'loss': 0.016302307984926344, 'time_step': 0.0037481985896466725, 'init_value': -3.1772866249084473, 'ave_value': -2.1261332370005213, 'soft_opc': nan} step=6142




2022-04-20 17:06.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170624/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.50 [info     ] FQE_20220420170624: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.0001690976590995329, 'time_algorithm_update': 0.0034853406699306994, 'loss': 0.017496404990537024, 'time_step': 0.00372765150414892, 'init_value': -3.213078498840332, 'ave_value': -2.0980685284143097, 'soft_opc': nan} step=6308




2022-04-20 17:06.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170624/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.51 [info     ] FQE_20220420170624: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.0001663141940013472, 'time_algorithm_update': 0.0035813558532531, 'loss': 0.018418793159772653, 'time_step': 0.0038211546748517507, 'init_value': -3.313040256500244, 'ave_value': -2.1670723942486014, 'soft_opc': nan} step=6474




2022-04-20 17:06.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170624/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.52 [info     ] FQE_20220420170624: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00016314150339149567, 'time_algorithm_update': 0.0034618794199932053, 'loss': 0.018644648939315004, 'time_step': 0.0036959001816898943, 'init_value': -3.484774589538574, 'ave_value': -2.268824994805697, 'soft_opc': nan} step=6640




2022-04-20 17:06.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170624/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.52 [info     ] FQE_20220420170624: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00016218352030558758, 'time_algorithm_update': 0.0034947840564222223, 'loss': 0.018958454118502967, 'time_step': 0.0037267825689660497, 'init_value': -3.556857109069824, 'ave_value': -2.317953555017441, 'soft_opc': nan} step=6806




2022-04-20 17:06.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170624/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.53 [info     ] FQE_20220420170624: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00016579857791762753, 'time_algorithm_update': 0.0034599577087953866, 'loss': 0.019149184111977018, 'time_step': 0.0036963296223835773, 'init_value': -3.629420757293701, 'ave_value': -2.3547341498571472, 'soft_opc': nan} step=6972




2022-04-20 17:06.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170624/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.54 [info     ] FQE_20220420170624: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00016034080321530262, 'time_algorithm_update': 0.0035713192928268247, 'loss': 0.020948990374377155, 'time_step': 0.0038039023617664016, 'init_value': -3.7137701511383057, 'ave_value': -2.381029775591047, 'soft_opc': nan} step=7138




2022-04-20 17:06.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170624/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.55 [info     ] FQE_20220420170624: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00016248226165771484, 'time_algorithm_update': 0.003458418041826731, 'loss': 0.021417343110289335, 'time_step': 0.0036918183407151557, 'init_value': -3.814277410507202, 'ave_value': -2.45770292851302, 'soft_opc': nan} step=7304




2022-04-20 17:06.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170624/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.55 [info     ] FQE_20220420170624: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00016623519989381353, 'time_algorithm_update': 0.0035408017147018247, 'loss': 0.022332918018815165, 'time_step': 0.003776994096227439, 'init_value': -3.858668327331543, 'ave_value': -2.4508166124691835, 'soft_opc': nan} step=7470




2022-04-20 17:06.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170624/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.56 [info     ] FQE_20220420170624: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.0001614065055387566, 'time_algorithm_update': 0.0034278975911887295, 'loss': 0.023069817166079778, 'time_step': 0.003663374716977039, 'init_value': -3.9594485759735107, 'ave_value': -2.5187952114520846, 'soft_opc': nan} step=7636




2022-04-20 17:06.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170624/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.57 [info     ] FQE_20220420170624: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00016284563455236964, 'time_algorithm_update': 0.003593607121203319, 'loss': 0.02408655967804637, 'time_step': 0.003827402390629412, 'init_value': -4.062147617340088, 'ave_value': -2.584134866297245, 'soft_opc': nan} step=7802




2022-04-20 17:06.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170624/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.57 [info     ] FQE_20220420170624: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.0001642861998224833, 'time_algorithm_update': 0.003503268023571336, 'loss': 0.024897583600425398, 'time_step': 0.00374206577438906, 'init_value': -4.120122909545898, 'ave_value': -2.5933285672385415, 'soft_opc': nan} step=7968




2022-04-20 17:06.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170624/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.58 [info     ] FQE_20220420170624: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00016326789396354952, 'time_algorithm_update': 0.003577475088188447, 'loss': 0.02565103516204238, 'time_step': 0.003817668880324766, 'init_value': -4.214296817779541, 'ave_value': -2.676011894415091, 'soft_opc': nan} step=8134




2022-04-20 17:06.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170624/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.59 [info     ] FQE_20220420170624: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00016287579593888247, 'time_algorithm_update': 0.0034267040620367213, 'loss': 0.02627130678619247, 'time_step': 0.003660296819296228, 'init_value': -4.266288757324219, 'ave_value': -2.6419732345184226, 'soft_opc': nan} step=8300




2022-04-20 17:06.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170624/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.43210892e-01 -3.25999953e-02
 -1.38000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.28049971e-01  6.00000000e-01  4.67532035e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.87189108e-01  2.46000047e-02
 -8.00013420e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.61927906e-02  2.08952959e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.53789108e-01  1.32000047e-02
  8.79998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -9.71586383e-02 -6.00000000e-01 -5.33093061e-02]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.39489108e-01 -4.75999953e-02
 -9.30001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  1.41298638e-01  5.43892365e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.9573146

2022-04-20 17:06.59 [info     ] Directory is created at d3rlpy_logs/FQE_20220420170659
2022-04-20 17:06.59 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 17:06.59 [debug    ] Building models...
2022-04-20 17:06.59 [debug    ] Models have been built.
2022-04-20 17:06.59 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420170659/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/355 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 17:07.01 [info     ] FQE_20220420170659: epoch=1 step=355 epoch=1 metrics={'time_sample_batch': 0.00017287697590572733, 'time_algorithm_update': 0.003536219664022956, 'loss': 0.02768643770734189, 'time_step': 0.003784847259521484, 'init_value': -1.4863234758377075, 'ave_value': -1.4993087815041708, 'soft_opc': nan} step=355




2022-04-20 17:07.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170659/model_355.pt


Epoch 2/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.02 [info     ] FQE_20220420170659: epoch=2 step=710 epoch=2 metrics={'time_sample_batch': 0.0001666613028083049, 'time_algorithm_update': 0.0035339637541435135, 'loss': 0.024584309536386545, 'time_step': 0.0037725052363435986, 'init_value': -2.244828701019287, 'ave_value': -2.293945618040927, 'soft_opc': nan} step=710




2022-04-20 17:07.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170659/model_710.pt


Epoch 3/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.03 [info     ] FQE_20220420170659: epoch=3 step=1065 epoch=3 metrics={'time_sample_batch': 0.00016454710087306063, 'time_algorithm_update': 0.0034874425807469326, 'loss': 0.027546436670170704, 'time_step': 0.003726193602655975, 'init_value': -2.6577165126800537, 'ave_value': -2.7714415578615097, 'soft_opc': nan} step=1065




2022-04-20 17:07.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170659/model_1065.pt


Epoch 4/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.05 [info     ] FQE_20220420170659: epoch=4 step=1420 epoch=4 metrics={'time_sample_batch': 0.00016666734722298635, 'time_algorithm_update': 0.0034903291245581398, 'loss': 0.032727535024390256, 'time_step': 0.0037330587145308373, 'init_value': -3.2340991497039795, 'ave_value': -3.387456835700892, 'soft_opc': nan} step=1420




2022-04-20 17:07.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170659/model_1420.pt


Epoch 5/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.06 [info     ] FQE_20220420170659: epoch=5 step=1775 epoch=5 metrics={'time_sample_batch': 0.00016631206996004346, 'time_algorithm_update': 0.003574946900488625, 'loss': 0.038958510738128505, 'time_step': 0.00381581145273128, 'init_value': -3.7866320610046387, 'ave_value': -3.9706062306308376, 'soft_opc': nan} step=1775




2022-04-20 17:07.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170659/model_1775.pt


Epoch 6/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.08 [info     ] FQE_20220420170659: epoch=6 step=2130 epoch=6 metrics={'time_sample_batch': 0.0001688063984185877, 'time_algorithm_update': 0.003565321505909235, 'loss': 0.05286079331917662, 'time_step': 0.003808100122801015, 'init_value': -4.462170124053955, 'ave_value': -4.663574533622851, 'soft_opc': nan} step=2130




2022-04-20 17:07.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170659/model_2130.pt


Epoch 7/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.09 [info     ] FQE_20220420170659: epoch=7 step=2485 epoch=7 metrics={'time_sample_batch': 0.00017033160572320642, 'time_algorithm_update': 0.0035192207551338305, 'loss': 0.0644216782295368, 'time_step': 0.0037641531984571, 'init_value': -5.166533946990967, 'ave_value': -5.347704348531929, 'soft_opc': nan} step=2485




2022-04-20 17:07.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170659/model_2485.pt


Epoch 8/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.11 [info     ] FQE_20220420170659: epoch=8 step=2840 epoch=8 metrics={'time_sample_batch': 0.00016797226919254787, 'time_algorithm_update': 0.003462510713389222, 'loss': 0.08058176542776571, 'time_step': 0.003703649279097436, 'init_value': -5.8292460441589355, 'ave_value': -5.959716806736232, 'soft_opc': nan} step=2840




2022-04-20 17:07.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170659/model_2840.pt


Epoch 9/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.12 [info     ] FQE_20220420170659: epoch=9 step=3195 epoch=9 metrics={'time_sample_batch': 0.0001672274629834672, 'time_algorithm_update': 0.0036471830287449795, 'loss': 0.09607464073514435, 'time_step': 0.003890808535293794, 'init_value': -6.535758018493652, 'ave_value': -6.688400325501287, 'soft_opc': nan} step=3195




2022-04-20 17:07.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170659/model_3195.pt


Epoch 10/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.14 [info     ] FQE_20220420170659: epoch=10 step=3550 epoch=10 metrics={'time_sample_batch': 0.00016374856653347822, 'time_algorithm_update': 0.003482050962851081, 'loss': 0.11507548442451467, 'time_step': 0.003720512524457045, 'init_value': -7.212432384490967, 'ave_value': -7.370890184841868, 'soft_opc': nan} step=3550




2022-04-20 17:07.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170659/model_3550.pt


Epoch 11/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.15 [info     ] FQE_20220420170659: epoch=11 step=3905 epoch=11 metrics={'time_sample_batch': 0.00017046458284619828, 'time_algorithm_update': 0.003508385806016519, 'loss': 0.13031519108360082, 'time_step': 0.0037538427702138123, 'init_value': -8.02210807800293, 'ave_value': -8.202282640313014, 'soft_opc': nan} step=3905




2022-04-20 17:07.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170659/model_3905.pt


Epoch 12/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.17 [info     ] FQE_20220420170659: epoch=12 step=4260 epoch=12 metrics={'time_sample_batch': 0.0001697855935969823, 'time_algorithm_update': 0.0035142952287700816, 'loss': 0.147378767921891, 'time_step': 0.003758232358475806, 'init_value': -8.148343086242676, 'ave_value': -8.385005462938677, 'soft_opc': nan} step=4260




2022-04-20 17:07.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170659/model_4260.pt


Epoch 13/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.18 [info     ] FQE_20220420170659: epoch=13 step=4615 epoch=13 metrics={'time_sample_batch': 0.00017366677942410322, 'time_algorithm_update': 0.0035637902541899346, 'loss': 0.165092156958622, 'time_step': 0.0038137402333004375, 'init_value': -8.75369644165039, 'ave_value': -9.13084357828669, 'soft_opc': nan} step=4615




2022-04-20 17:07.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170659/model_4615.pt


Epoch 14/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.19 [info     ] FQE_20220420170659: epoch=14 step=4970 epoch=14 metrics={'time_sample_batch': 0.0001702577295437665, 'time_algorithm_update': 0.003513533632520219, 'loss': 0.19064755267660383, 'time_step': 0.0037568931848230495, 'init_value': -9.180045127868652, 'ave_value': -9.685017085880846, 'soft_opc': nan} step=4970




2022-04-20 17:07.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170659/model_4970.pt


Epoch 15/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.21 [info     ] FQE_20220420170659: epoch=15 step=5325 epoch=15 metrics={'time_sample_batch': 0.00016979633922308265, 'time_algorithm_update': 0.0035006523132324217, 'loss': 0.21443020636871668, 'time_step': 0.003745993761949136, 'init_value': -9.606191635131836, 'ave_value': -10.19085807870388, 'soft_opc': nan} step=5325




2022-04-20 17:07.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170659/model_5325.pt


Epoch 16/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.22 [info     ] FQE_20220420170659: epoch=16 step=5680 epoch=16 metrics={'time_sample_batch': 0.00016524220856142715, 'time_algorithm_update': 0.003430833279247015, 'loss': 0.23532061173357594, 'time_step': 0.003671555451943841, 'init_value': -9.819937705993652, 'ave_value': -10.533136132754567, 'soft_opc': nan} step=5680




2022-04-20 17:07.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170659/model_5680.pt


Epoch 17/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.24 [info     ] FQE_20220420170659: epoch=17 step=6035 epoch=17 metrics={'time_sample_batch': 0.00017054047383053202, 'time_algorithm_update': 0.0035373566855846994, 'loss': 0.2634760744082676, 'time_step': 0.0037826155273007673, 'init_value': -10.088759422302246, 'ave_value': -10.953717565615127, 'soft_opc': nan} step=6035




2022-04-20 17:07.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170659/model_6035.pt


Epoch 18/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.25 [info     ] FQE_20220420170659: epoch=18 step=6390 epoch=18 metrics={'time_sample_batch': 0.00016630669714699328, 'time_algorithm_update': 0.0034713765265236437, 'loss': 0.29015892125456266, 'time_step': 0.0037141551434154243, 'init_value': -10.56041145324707, 'ave_value': -11.498547869097044, 'soft_opc': nan} step=6390




2022-04-20 17:07.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170659/model_6390.pt


Epoch 19/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.27 [info     ] FQE_20220420170659: epoch=19 step=6745 epoch=19 metrics={'time_sample_batch': 0.0001639648222587478, 'time_algorithm_update': 0.0034959054329025915, 'loss': 0.32434787671016135, 'time_step': 0.0037345698182011996, 'init_value': -10.990689277648926, 'ave_value': -12.021342128311, 'soft_opc': nan} step=6745




2022-04-20 17:07.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170659/model_6745.pt


Epoch 20/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.28 [info     ] FQE_20220420170659: epoch=20 step=7100 epoch=20 metrics={'time_sample_batch': 0.0001702577295437665, 'time_algorithm_update': 0.0035437201110410017, 'loss': 0.3497937919545761, 'time_step': 0.0037899588195370957, 'init_value': -11.180387496948242, 'ave_value': -12.30640470471468, 'soft_opc': nan} step=7100




2022-04-20 17:07.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170659/model_7100.pt


Epoch 21/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.30 [info     ] FQE_20220420170659: epoch=21 step=7455 epoch=21 metrics={'time_sample_batch': 0.00016685203767158617, 'time_algorithm_update': 0.003495759023746974, 'loss': 0.36337067615503155, 'time_step': 0.0037362098693847655, 'init_value': -11.433145523071289, 'ave_value': -12.71132361492672, 'soft_opc': nan} step=7455




2022-04-20 17:07.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170659/model_7455.pt


Epoch 22/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.31 [info     ] FQE_20220420170659: epoch=22 step=7810 epoch=22 metrics={'time_sample_batch': 0.00017067748056331152, 'time_algorithm_update': 0.0035606545461735256, 'loss': 0.38959562266376657, 'time_step': 0.0038065420070164642, 'init_value': -12.050572395324707, 'ave_value': -13.465796833632075, 'soft_opc': nan} step=7810




2022-04-20 17:07.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170659/model_7810.pt


Epoch 23/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.32 [info     ] FQE_20220420170659: epoch=23 step=8165 epoch=23 metrics={'time_sample_batch': 0.0001715606367084342, 'time_algorithm_update': 0.0035435293761777206, 'loss': 0.40490321870630896, 'time_step': 0.003791490742858027, 'init_value': -12.221353530883789, 'ave_value': -13.686317798974738, 'soft_opc': nan} step=8165




2022-04-20 17:07.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170659/model_8165.pt


Epoch 24/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.34 [info     ] FQE_20220420170659: epoch=24 step=8520 epoch=24 metrics={'time_sample_batch': 0.00016823083582058758, 'time_algorithm_update': 0.0035520721489275004, 'loss': 0.4295749760587031, 'time_step': 0.003793779561217402, 'init_value': -12.430234909057617, 'ave_value': -13.934872173072058, 'soft_opc': nan} step=8520




2022-04-20 17:07.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170659/model_8520.pt


Epoch 25/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.35 [info     ] FQE_20220420170659: epoch=25 step=8875 epoch=25 metrics={'time_sample_batch': 0.0001676257227508115, 'time_algorithm_update': 0.003548677874283052, 'loss': 0.4600049442388642, 'time_step': 0.0037920468290087205, 'init_value': -12.727653503417969, 'ave_value': -14.215699699864347, 'soft_opc': nan} step=8875




2022-04-20 17:07.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170659/model_8875.pt


Epoch 26/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.37 [info     ] FQE_20220420170659: epoch=26 step=9230 epoch=26 metrics={'time_sample_batch': 0.00017161973765198612, 'time_algorithm_update': 0.0035773888440199303, 'loss': 0.4846473086572869, 'time_step': 0.0038227799912573584, 'init_value': -13.285021781921387, 'ave_value': -14.722687287605638, 'soft_opc': nan} step=9230




2022-04-20 17:07.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170659/model_9230.pt


Epoch 27/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.38 [info     ] FQE_20220420170659: epoch=27 step=9585 epoch=27 metrics={'time_sample_batch': 0.0001661052166576117, 'time_algorithm_update': 0.0034836816116118096, 'loss': 0.4944688048948285, 'time_step': 0.0037231619928924133, 'init_value': -13.22092342376709, 'ave_value': -14.6964979857168, 'soft_opc': nan} step=9585




2022-04-20 17:07.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170659/model_9585.pt


Epoch 28/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.40 [info     ] FQE_20220420170659: epoch=28 step=9940 epoch=28 metrics={'time_sample_batch': 0.000168021967713262, 'time_algorithm_update': 0.0035056349257348287, 'loss': 0.5204357172664202, 'time_step': 0.0037478937229640046, 'init_value': -13.780885696411133, 'ave_value': -15.296685361513147, 'soft_opc': nan} step=9940




2022-04-20 17:07.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170659/model_9940.pt


Epoch 29/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.41 [info     ] FQE_20220420170659: epoch=29 step=10295 epoch=29 metrics={'time_sample_batch': 0.00016884132170341383, 'time_algorithm_update': 0.0035577216618497606, 'loss': 0.5330779516675942, 'time_step': 0.0038021685371936206, 'init_value': -14.325735092163086, 'ave_value': -15.949405608741396, 'soft_opc': nan} step=10295




2022-04-20 17:07.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170659/model_10295.pt


Epoch 30/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.43 [info     ] FQE_20220420170659: epoch=30 step=10650 epoch=30 metrics={'time_sample_batch': 0.00017096895567128356, 'time_algorithm_update': 0.0035139668155723894, 'loss': 0.5608220449268398, 'time_step': 0.0037595896653726067, 'init_value': -14.62497615814209, 'ave_value': -16.244701727810515, 'soft_opc': nan} step=10650




2022-04-20 17:07.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170659/model_10650.pt


Epoch 31/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.44 [info     ] FQE_20220420170659: epoch=31 step=11005 epoch=31 metrics={'time_sample_batch': 0.00016836649935010454, 'time_algorithm_update': 0.0034839959211752447, 'loss': 0.5660146297755796, 'time_step': 0.0037248893522880445, 'init_value': -15.178475379943848, 'ave_value': -16.889689592306564, 'soft_opc': nan} step=11005




2022-04-20 17:07.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170659/model_11005.pt


Epoch 32/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.45 [info     ] FQE_20220420170659: epoch=32 step=11360 epoch=32 metrics={'time_sample_batch': 0.00016870700137715944, 'time_algorithm_update': 0.003532099388015102, 'loss': 0.6059669842917315, 'time_step': 0.0037774878488460056, 'init_value': -15.086959838867188, 'ave_value': -16.830560954103063, 'soft_opc': nan} step=11360




2022-04-20 17:07.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170659/model_11360.pt


Epoch 33/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.47 [info     ] FQE_20220420170659: epoch=33 step=11715 epoch=33 metrics={'time_sample_batch': 0.00017004281702175946, 'time_algorithm_update': 0.0035208043917803696, 'loss': 0.6042602327088236, 'time_step': 0.003765335217328139, 'init_value': -15.55310344696045, 'ave_value': -17.277704243477018, 'soft_opc': nan} step=11715




2022-04-20 17:07.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170659/model_11715.pt


Epoch 34/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.48 [info     ] FQE_20220420170659: epoch=34 step=12070 epoch=34 metrics={'time_sample_batch': 0.0001679709259892853, 'time_algorithm_update': 0.0035784043056864136, 'loss': 0.6297650207358767, 'time_step': 0.0038210445726421516, 'init_value': -15.510528564453125, 'ave_value': -17.330681458529817, 'soft_opc': nan} step=12070




2022-04-20 17:07.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170659/model_12070.pt


Epoch 35/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.50 [info     ] FQE_20220420170659: epoch=35 step=12425 epoch=35 metrics={'time_sample_batch': 0.00016666331761319874, 'time_algorithm_update': 0.003491472862136196, 'loss': 0.6582652341836774, 'time_step': 0.0037312319580937777, 'init_value': -16.349802017211914, 'ave_value': -18.163679236936908, 'soft_opc': nan} step=12425




2022-04-20 17:07.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170659/model_12425.pt


Epoch 36/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.51 [info     ] FQE_20220420170659: epoch=36 step=12780 epoch=36 metrics={'time_sample_batch': 0.00016833829208159112, 'time_algorithm_update': 0.0035492648541087836, 'loss': 0.681835209142784, 'time_step': 0.0037930528882523656, 'init_value': -16.688587188720703, 'ave_value': -18.436844187226995, 'soft_opc': nan} step=12780




2022-04-20 17:07.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170659/model_12780.pt


Epoch 37/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.53 [info     ] FQE_20220420170659: epoch=37 step=13135 epoch=37 metrics={'time_sample_batch': 0.0001714867605289943, 'time_algorithm_update': 0.003471271085067534, 'loss': 0.6859167777421609, 'time_step': 0.003717339878350916, 'init_value': -16.260343551635742, 'ave_value': -18.113940541390594, 'soft_opc': nan} step=13135




2022-04-20 17:07.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170659/model_13135.pt


Epoch 38/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.54 [info     ] FQE_20220420170659: epoch=38 step=13490 epoch=38 metrics={'time_sample_batch': 0.00016672913457306337, 'time_algorithm_update': 0.003538769735416896, 'loss': 0.7168638860780588, 'time_step': 0.0037819439256694953, 'init_value': -16.691448211669922, 'ave_value': -18.41308238026105, 'soft_opc': nan} step=13490




2022-04-20 17:07.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170659/model_13490.pt


Epoch 39/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.56 [info     ] FQE_20220420170659: epoch=39 step=13845 epoch=39 metrics={'time_sample_batch': 0.00017088836347553093, 'time_algorithm_update': 0.003911775938222106, 'loss': 0.7376432037007221, 'time_step': 0.0041601590707268515, 'init_value': -16.998064041137695, 'ave_value': -18.781876621988904, 'soft_opc': nan} step=13845




2022-04-20 17:07.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170659/model_13845.pt


Epoch 40/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.58 [info     ] FQE_20220420170659: epoch=40 step=14200 epoch=40 metrics={'time_sample_batch': 0.00017673868528554138, 'time_algorithm_update': 0.005159471404384559, 'loss': 0.7576695665139968, 'time_step': 0.005414118565304178, 'init_value': -17.412738800048828, 'ave_value': -19.027727576777664, 'soft_opc': nan} step=14200




2022-04-20 17:07.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170659/model_14200.pt


Epoch 41/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:08.00 [info     ] FQE_20220420170659: epoch=41 step=14555 epoch=41 metrics={'time_sample_batch': 0.00016760557470187336, 'time_algorithm_update': 0.005067149014540122, 'loss': 0.7678716465830803, 'time_step': 0.005309517282835195, 'init_value': -17.89842414855957, 'ave_value': -19.480813887692328, 'soft_opc': nan} step=14555




2022-04-20 17:08.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170659/model_14555.pt


Epoch 42/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:08.02 [info     ] FQE_20220420170659: epoch=42 step=14910 epoch=42 metrics={'time_sample_batch': 0.00015525683550767495, 'time_algorithm_update': 0.004939252557888837, 'loss': 0.7878463574593336, 'time_step': 0.005161397557863047, 'init_value': -18.224246978759766, 'ave_value': -19.724079810244007, 'soft_opc': nan} step=14910




2022-04-20 17:08.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170659/model_14910.pt


Epoch 43/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:08.04 [info     ] FQE_20220420170659: epoch=43 step=15265 epoch=43 metrics={'time_sample_batch': 0.00014942934815312774, 'time_algorithm_update': 0.004472183845412563, 'loss': 0.8155397779085267, 'time_step': 0.004687322025567713, 'init_value': -18.51667594909668, 'ave_value': -20.010822230554457, 'soft_opc': nan} step=15265




2022-04-20 17:08.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170659/model_15265.pt


Epoch 44/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:08.06 [info     ] FQE_20220420170659: epoch=44 step=15620 epoch=44 metrics={'time_sample_batch': 0.00015534414371974031, 'time_algorithm_update': 0.004952552956594548, 'loss': 0.8174381756341793, 'time_step': 0.00517487324459452, 'init_value': -19.096649169921875, 'ave_value': -20.59106396748139, 'soft_opc': nan} step=15620




2022-04-20 17:08.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170659/model_15620.pt


Epoch 45/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:08.08 [info     ] FQE_20220420170659: epoch=45 step=15975 epoch=45 metrics={'time_sample_batch': 0.00017332023298236685, 'time_algorithm_update': 0.005112030808354767, 'loss': 0.8355917582052275, 'time_step': 0.005361198371564838, 'init_value': -18.630327224731445, 'ave_value': -20.226317222836396, 'soft_opc': nan} step=15975




2022-04-20 17:08.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170659/model_15975.pt


Epoch 46/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:08.10 [info     ] FQE_20220420170659: epoch=46 step=16330 epoch=46 metrics={'time_sample_batch': 0.00016944979278134628, 'time_algorithm_update': 0.005025791786086391, 'loss': 0.8313768880520488, 'time_step': 0.00527312050403004, 'init_value': -18.97324562072754, 'ave_value': -20.488334591132485, 'soft_opc': nan} step=16330




2022-04-20 17:08.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170659/model_16330.pt


Epoch 47/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:08.12 [info     ] FQE_20220420170659: epoch=47 step=16685 epoch=47 metrics={'time_sample_batch': 0.0001719709853051414, 'time_algorithm_update': 0.005033570947781415, 'loss': 0.8298141455461442, 'time_step': 0.005279240809695822, 'init_value': -18.651296615600586, 'ave_value': -20.143663536976817, 'soft_opc': nan} step=16685




2022-04-20 17:08.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170659/model_16685.pt


Epoch 48/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:08.14 [info     ] FQE_20220420170659: epoch=48 step=17040 epoch=48 metrics={'time_sample_batch': 0.00017324770000618948, 'time_algorithm_update': 0.004712789159425548, 'loss': 0.8360621474263533, 'time_step': 0.004959173605475627, 'init_value': -19.164180755615234, 'ave_value': -20.755814669126863, 'soft_opc': nan} step=17040




2022-04-20 17:08.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170659/model_17040.pt


Epoch 49/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:08.16 [info     ] FQE_20220420170659: epoch=49 step=17395 epoch=49 metrics={'time_sample_batch': 0.0001722228359168684, 'time_algorithm_update': 0.005113183948355662, 'loss': 0.8480948238620456, 'time_step': 0.005362837751146773, 'init_value': -19.280969619750977, 'ave_value': -20.890027552752, 'soft_opc': nan} step=17395




2022-04-20 17:08.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170659/model_17395.pt


Epoch 50/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:08.18 [info     ] FQE_20220420170659: epoch=50 step=17750 epoch=50 metrics={'time_sample_batch': 0.00017288100551551497, 'time_algorithm_update': 0.005086481739097918, 'loss': 0.864506710331205, 'time_step': 0.005336203373653788, 'init_value': -19.480710983276367, 'ave_value': -21.180229642126168, 'soft_opc': nan} step=17750




2022-04-20 17:08.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170659/model_17750.pt
search iteration:  20
using hyper params:  [0.0005514049233496806, 0.005910429511514047, 9.187020664674929e-05, 1]
2022-04-20 17:08.18 [debug    ] RoundIterator is selected.
2022-04-20 17:08.18 [info     ] Directory is created at d3rlpy_logs/TD3PlusBC_20220420170818
2022-04-20 17:08.18 [debug    ] Fitting scaler...              scaler=standard
2022-04-20 17:08.18 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 17:08.18 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 17:08.18 [info     ] Parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170818/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0005514049233

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:08.21 [info     ] TD3PlusBC_20220420170818: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.0003152576803463941, 'time_algorithm_update': 0.00835370111186602, 'critic_loss': 0.37642113077971673, 'actor_loss': 0.0574132118221612, 'time_step': 0.008748229484111941, 'td_error': 0.8128131184629305, 'init_value': -0.43377357721328735, 'ave_value': 0.2234041058233318} step=342
2022-04-20 17:08.21 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170818/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:08.25 [info     ] TD3PlusBC_20220420170818: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.00032063813237418907, 'time_algorithm_update': 0.008860465378789177, 'critic_loss': 0.18547745530455434, 'actor_loss': -0.026005099543876815, 'time_step': 0.009260482258266874, 'td_error': 0.8040779535935992, 'init_value': -0.6593672633171082, 'ave_value': 0.31970954560053844} step=684
2022-04-20 17:08.25 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170818/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:08.29 [info     ] TD3PlusBC_20220420170818: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.0003246382663124486, 'time_algorithm_update': 0.008803810989647581, 'critic_loss': 0.22161038971522398, 'actor_loss': -0.02428431469097472, 'time_step': 0.009207636989348116, 'td_error': 0.7976442680089492, 'init_value': -0.939776599407196, 'ave_value': 0.33014347008777667} step=1026
2022-04-20 17:08.29 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170818/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:08.32 [info     ] TD3PlusBC_20220420170818: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.00031666309512846653, 'time_algorithm_update': 0.008450191620497675, 'critic_loss': 0.2899819819463624, 'actor_loss': -0.029079917687595935, 'time_step': 0.00884729728364108, 'td_error': 0.7991328629087798, 'init_value': -1.2725590467453003, 'ave_value': 0.36945204154846284} step=1368
2022-04-20 17:08.32 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170818/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:08.36 [info     ] TD3PlusBC_20220420170818: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.0003212829779463205, 'time_algorithm_update': 0.008731701220685279, 'critic_loss': 0.349524205822868, 'actor_loss': -0.01539486178267769, 'time_step': 0.009129613463641608, 'td_error': 0.8052302894447896, 'init_value': -1.432041883468628, 'ave_value': 0.512777081081048} step=1710
2022-04-20 17:08.36 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170818/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:08.40 [info     ] TD3PlusBC_20220420170818: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.0003201668722587719, 'time_algorithm_update': 0.008535576842681707, 'critic_loss': 0.4256362961877508, 'actor_loss': 0.007042496612197475, 'time_step': 0.008935524706254926, 'td_error': 0.8100082026200565, 'init_value': -1.83623468875885, 'ave_value': 0.5070181917489783} step=2052
2022-04-20 17:08.40 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170818/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:08.43 [info     ] TD3PlusBC_20220420170818: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.0003239481072676809, 'time_algorithm_update': 0.008907019046314975, 'critic_loss': 0.4935475441011769, 'actor_loss': 0.010685394229421838, 'time_step': 0.009310595473350837, 'td_error': 0.8205199302081633, 'init_value': -2.189471960067749, 'ave_value': 0.5447538195541213} step=2394
2022-04-20 17:08.43 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170818/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:08.47 [info     ] TD3PlusBC_20220420170818: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.00028846905245418435, 'time_algorithm_update': 0.00828745490626285, 'critic_loss': 0.5724855095845217, 'actor_loss': 0.010534320484127915, 'time_step': 0.008645138545343054, 'td_error': 0.8304746233496728, 'init_value': -2.423372983932495, 'ave_value': 0.6282250910330363} step=2736
2022-04-20 17:08.47 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170818/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:08.50 [info     ] TD3PlusBC_20220420170818: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.0003005816922550313, 'time_algorithm_update': 0.008139511298017892, 'critic_loss': 0.6461437552383071, 'actor_loss': 0.025130244919605423, 'time_step': 0.008515722570363541, 'td_error': 0.8446405956584446, 'init_value': -2.7431788444519043, 'ave_value': 0.6857281060557947} step=3078
2022-04-20 17:08.50 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170818/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:08.54 [info     ] TD3PlusBC_20220420170818: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.00032569720731144065, 'time_algorithm_update': 0.008822758295382673, 'critic_loss': 0.7231311146714534, 'actor_loss': 0.041696656233908835, 'time_step': 0.009228212094446372, 'td_error': 0.863910340020363, 'init_value': -3.0046234130859375, 'ave_value': 0.7636459444937305} step=3420
2022-04-20 17:08.54 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170818/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:08.57 [info     ] TD3PlusBC_20220420170818: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.0003158844005294711, 'time_algorithm_update': 0.008759691701297872, 'critic_loss': 0.8448984618598258, 'actor_loss': 0.046690073604995046, 'time_step': 0.009154982037014432, 'td_error': 0.8785719907977086, 'init_value': -3.4241766929626465, 'ave_value': 0.765171151378041} step=3762
2022-04-20 17:08.57 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170818/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:09.01 [info     ] TD3PlusBC_20220420170818: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.00032815389465867427, 'time_algorithm_update': 0.008947134715074684, 'critic_loss': 0.9268524478576337, 'actor_loss': 0.04306602608739284, 'time_step': 0.009354333431400054, 'td_error': 0.8970148524974315, 'init_value': -3.67736554145813, 'ave_value': 0.8627991786557208} step=4104
2022-04-20 17:09.01 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170818/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:09.05 [info     ] TD3PlusBC_20220420170818: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.0003257676174766139, 'time_algorithm_update': 0.008841908466049105, 'critic_loss': 1.0415202246771917, 'actor_loss': 0.03916579665757759, 'time_step': 0.009248428874545626, 'td_error': 0.9162781639775709, 'init_value': -4.0082268714904785, 'ave_value': 0.9023607616488158} step=4446
2022-04-20 17:09.05 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170818/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:09.08 [info     ] TD3PlusBC_20220420170818: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.0003211679514388592, 'time_algorithm_update': 0.008329050582751893, 'critic_loss': 1.2065895348787308, 'actor_loss': 0.05239348422888427, 'time_step': 0.008730161956876342, 'td_error': 0.9439576936661566, 'init_value': -4.378540992736816, 'ave_value': 0.9139311765741372} step=4788
2022-04-20 17:09.08 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170818/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:09.12 [info     ] TD3PlusBC_20220420170818: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.00031938817765977647, 'time_algorithm_update': 0.008844894972461008, 'critic_loss': 1.2714294974218334, 'actor_loss': 0.050795105879592614, 'time_step': 0.009242438433463113, 'td_error': 0.9538138111326522, 'init_value': -4.764053821563721, 'ave_value': 0.9992412074363731} step=5130
2022-04-20 17:09.12 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170818/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:09.16 [info     ] TD3PlusBC_20220420170818: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.00032012783295926994, 'time_algorithm_update': 0.008790675659625852, 'critic_loss': 1.4620596359522022, 'actor_loss': 0.060072754482515374, 'time_step': 0.00919104598419011, 'td_error': 0.976061672563456, 'init_value': -5.149111747741699, 'ave_value': 0.9836228413954126} step=5472
2022-04-20 17:09.16 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170818/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:09.19 [info     ] TD3PlusBC_20220420170818: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.0003264466224358096, 'time_algorithm_update': 0.00870275288297419, 'critic_loss': 1.5687334382394602, 'actor_loss': 0.04801904632334124, 'time_step': 0.009110238817003038, 'td_error': 1.0102869373773466, 'init_value': -5.356501579284668, 'ave_value': 1.1349614302251647} step=5814
2022-04-20 17:09.19 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170818/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:09.23 [info     ] TD3PlusBC_20220420170818: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.00032441936738309806, 'time_algorithm_update': 0.008800017206292404, 'critic_loss': 1.6662196119190658, 'actor_loss': 0.03993683935780274, 'time_step': 0.009204163188822785, 'td_error': 1.053158687782042, 'init_value': -5.727189540863037, 'ave_value': 1.1242163131668794} step=6156
2022-04-20 17:09.23 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170818/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:09.26 [info     ] TD3PlusBC_20220420170818: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.00031811173199213037, 'time_algorithm_update': 0.008331208201179727, 'critic_loss': 1.9784558298643569, 'actor_loss': 0.05395017363871747, 'time_step': 0.008727841907077365, 'td_error': 1.0793751289232432, 'init_value': -6.062619209289551, 'ave_value': 1.1920323477666992} step=6498
2022-04-20 17:09.26 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170818/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:09.30 [info     ] TD3PlusBC_20220420170818: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.00031929755071450396, 'time_algorithm_update': 0.008804264124373944, 'critic_loss': 2.113965539730083, 'actor_loss': 0.06286093488074186, 'time_step': 0.00920667146381579, 'td_error': 1.1168591398823862, 'init_value': -6.301658630371094, 'ave_value': 1.3057963501276062} step=6840
2022-04-20 17:09.30 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170818/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:09.34 [info     ] TD3PlusBC_20220420170818: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.000320465244047823, 'time_algorithm_update': 0.00877146483861912, 'critic_loss': 2.2408876077473514, 'actor_loss': 0.06295745872091829, 'time_step': 0.009170394194753547, 'td_error': 1.1239739484131581, 'init_value': -6.780591011047363, 'ave_value': 1.279713920324722} step=7182
2022-04-20 17:09.34 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170818/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:09.37 [info     ] TD3PlusBC_20220420170818: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.0003218323166607416, 'time_algorithm_update': 0.008419498365524917, 'critic_loss': 2.395993530837416, 'actor_loss': 0.05150428388202399, 'time_step': 0.008820279299864295, 'td_error': 1.1746633391208088, 'init_value': -7.105137825012207, 'ave_value': 1.3561230082628695} step=7524
2022-04-20 17:09.37 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170818/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:09.41 [info     ] TD3PlusBC_20220420170818: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.0003280263198049445, 'time_algorithm_update': 0.008753793281421327, 'critic_loss': 2.601466122600767, 'actor_loss': 0.05032379901287151, 'time_step': 0.009162497102168569, 'td_error': 1.1982691335787907, 'init_value': -7.267415523529053, 'ave_value': 1.492881970643695} step=7866
2022-04-20 17:09.41 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170818/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:09.44 [info     ] TD3PlusBC_20220420170818: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.00032385608606171187, 'time_algorithm_update': 0.00838206316295423, 'critic_loss': 2.7785942845525797, 'actor_loss': 0.07537652776516669, 'time_step': 0.008785161358571192, 'td_error': 1.2099965406527875, 'init_value': -7.967588901519775, 'ave_value': 1.4380689847549877} step=8208
2022-04-20 17:09.44 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170818/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:09.48 [info     ] TD3PlusBC_20220420170818: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.00032178979170949837, 'time_algorithm_update': 0.008886488557559007, 'critic_loss': 3.000528294789164, 'actor_loss': 0.06737026243269095, 'time_step': 0.0092878418359143, 'td_error': 1.2797457536532375, 'init_value': -8.069253921508789, 'ave_value': 1.4722734060853317} step=8550
2022-04-20 17:09.48 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170818/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:09.52 [info     ] TD3PlusBC_20220420170818: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.0003254546059502496, 'time_algorithm_update': 0.008848933448568422, 'critic_loss': 3.1908064122088473, 'actor_loss': 0.05902158680279353, 'time_step': 0.009252321650410256, 'td_error': 1.3022938227318592, 'init_value': -8.524003982543945, 'ave_value': 1.506372273014271} step=8892
2022-04-20 17:09.52 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170818/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:09.55 [info     ] TD3PlusBC_20220420170818: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.00032274834593834235, 'time_algorithm_update': 0.008525637158176355, 'critic_loss': 3.5001511481421734, 'actor_loss': 0.0782664664176821, 'time_step': 0.008928993989152518, 'td_error': 1.3419369954753653, 'init_value': -8.984109878540039, 'ave_value': 1.531255001527853} step=9234
2022-04-20 17:09.55 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170818/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:09.59 [info     ] TD3PlusBC_20220420170818: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.0003247965149015014, 'time_algorithm_update': 0.008835805786980523, 'critic_loss': 3.5520993831910586, 'actor_loss': 0.07025935548787926, 'time_step': 0.009238640467325846, 'td_error': 1.36518530967324, 'init_value': -9.302104949951172, 'ave_value': 1.6387394749692448} step=9576
2022-04-20 17:09.59 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170818/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:10.03 [info     ] TD3PlusBC_20220420170818: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.00032574600643581815, 'time_algorithm_update': 0.008383699327881574, 'critic_loss': 3.828919134990514, 'actor_loss': 0.08633196345198224, 'time_step': 0.0087880063475224, 'td_error': 1.4338080524853118, 'init_value': -9.47611141204834, 'ave_value': 1.6759657923110367} step=9918
2022-04-20 17:10.03 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170818/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:10.06 [info     ] TD3PlusBC_20220420170818: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.00032319799501296374, 'time_algorithm_update': 0.008773805105198196, 'critic_loss': 3.9460135034301826, 'actor_loss': 0.06822446624786534, 'time_step': 0.009175842268425122, 'td_error': 1.4227345731605843, 'init_value': -9.952404975891113, 'ave_value': 1.8331277498760605} step=10260
2022-04-20 17:10.06 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170818/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:10.10 [info     ] TD3PlusBC_20220420170818: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.0003196677269294248, 'time_algorithm_update': 0.008872899395680567, 'critic_loss': 4.251862063393955, 'actor_loss': 0.07803328601066133, 'time_step': 0.00927125640779908, 'td_error': 1.5050542549430561, 'init_value': -10.137758255004883, 'ave_value': 1.8046131412003625} step=10602
2022-04-20 17:10.10 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170818/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:10.14 [info     ] TD3PlusBC_20220420170818: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.0003266522758885434, 'time_algorithm_update': 0.008433054065146642, 'critic_loss': 4.444463101918237, 'actor_loss': 0.07306530200249968, 'time_step': 0.008840047128019278, 'td_error': 1.529348771858351, 'init_value': -10.494142532348633, 'ave_value': 1.8314024325887261} step=10944
2022-04-20 17:10.14 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170818/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:10.17 [info     ] TD3PlusBC_20220420170818: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.0003258533645094487, 'time_algorithm_update': 0.008910961318434331, 'critic_loss': 4.64761979555526, 'actor_loss': 0.0737188927044994, 'time_step': 0.009319055150126853, 'td_error': 1.5700611380872012, 'init_value': -11.342584609985352, 'ave_value': 1.6601375760047419} step=11286
2022-04-20 17:10.17 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170818/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:10.21 [info     ] TD3PlusBC_20220420170818: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.0003237926472000211, 'time_algorithm_update': 0.008530945108647933, 'critic_loss': 4.911155998358253, 'actor_loss': 0.07952239007106301, 'time_step': 0.00893353927902311, 'td_error': 1.6199939356283943, 'init_value': -11.340459823608398, 'ave_value': 1.8655200396708909} step=11628
2022-04-20 17:10.21 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170818/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:10.25 [info     ] TD3PlusBC_20220420170818: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.00032079080392045586, 'time_algorithm_update': 0.008767914353755483, 'critic_loss': 5.028141353736844, 'actor_loss': 0.054310537003285704, 'time_step': 0.009169918054725692, 'td_error': 1.7057381115765213, 'init_value': -11.406274795532227, 'ave_value': 1.9349580971482168} step=11970
2022-04-20 17:10.25 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170818/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:10.28 [info     ] TD3PlusBC_20220420170818: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.00032744770161589686, 'time_algorithm_update': 0.008768470663773386, 'critic_loss': 5.383203640841601, 'actor_loss': 0.0750539985142256, 'time_step': 0.009174756836472895, 'td_error': 1.7292018073306499, 'init_value': -11.866799354553223, 'ave_value': 1.9896843796754509} step=12312
2022-04-20 17:10.28 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170818/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:10.32 [info     ] TD3PlusBC_20220420170818: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.0003198705918607656, 'time_algorithm_update': 0.008477647402133161, 'critic_loss': 5.640256216651515, 'actor_loss': 0.07146352607952922, 'time_step': 0.00887706544664171, 'td_error': 1.7764082910590524, 'init_value': -12.354130744934082, 'ave_value': 1.9603244748407858} step=12654
2022-04-20 17:10.32 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170818/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:10.35 [info     ] TD3PlusBC_20220420170818: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.00033138160817107264, 'time_algorithm_update': 0.00888114853909141, 'critic_loss': 5.943519574159767, 'actor_loss': 0.07522087727199521, 'time_step': 0.009292553042807775, 'td_error': 1.8117218002994813, 'init_value': -12.700908660888672, 'ave_value': 2.0462887159885152} step=12996
2022-04-20 17:10.35 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170818/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:10.39 [info     ] TD3PlusBC_20220420170818: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.00032304950624878644, 'time_algorithm_update': 0.00880457225598787, 'critic_loss': 6.124481558276896, 'actor_loss': 0.072385189194254, 'time_step': 0.009207469678064536, 'td_error': 1.8716119958524382, 'init_value': -13.201202392578125, 'ave_value': 2.0438169874801293} step=13338
2022-04-20 17:10.39 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170818/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:10.43 [info     ] TD3PlusBC_20220420170818: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.000325174359550253, 'time_algorithm_update': 0.008792955972994978, 'critic_loss': 6.361133781790036, 'actor_loss': 0.08457181552610202, 'time_step': 0.009196405522307457, 'td_error': 1.926109062224373, 'init_value': -13.342663764953613, 'ave_value': 2.0844485367544263} step=13680
2022-04-20 17:10.43 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170818/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:10.46 [info     ] TD3PlusBC_20220420170818: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.0003245685532776236, 'time_algorithm_update': 0.008739661752131948, 'critic_loss': 6.625157863598818, 'actor_loss': 0.050911849397316314, 'time_step': 0.009144077524107102, 'td_error': 1.9787190348053858, 'init_value': -13.51368522644043, 'ave_value': 2.2850225996757176} step=14022
2022-04-20 17:10.46 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170818/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:10.50 [info     ] TD3PlusBC_20220420170818: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.00031774155577720954, 'time_algorithm_update': 0.008276019877160501, 'critic_loss': 7.039515276052798, 'actor_loss': 0.078069633842386, 'time_step': 0.008672309200666105, 'td_error': 2.0369017228381807, 'init_value': -14.17755126953125, 'ave_value': 2.2182494713757857} step=14364
2022-04-20 17:10.50 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170818/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:10.54 [info     ] TD3PlusBC_20220420170818: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.00032540720108656856, 'time_algorithm_update': 0.008847954677559479, 'critic_loss': 7.098204526984901, 'actor_loss': 0.07767647689981767, 'time_step': 0.009253353403325667, 'td_error': 2.096724465811422, 'init_value': -14.73780345916748, 'ave_value': 2.159068354069851} step=14706
2022-04-20 17:10.54 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170818/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:10.57 [info     ] TD3PlusBC_20220420170818: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.00032939199815716657, 'time_algorithm_update': 0.008949014178493567, 'critic_loss': 7.48482824975287, 'actor_loss': 0.07295554259802862, 'time_step': 0.009360371974476596, 'td_error': 2.1654314554607796, 'init_value': -15.385854721069336, 'ave_value': 2.171157478202923} step=15048
2022-04-20 17:10.57 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170818/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:11.01 [info     ] TD3PlusBC_20220420170818: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.00031971861744484707, 'time_algorithm_update': 0.008344765992192497, 'critic_loss': 7.733330082126528, 'actor_loss': 0.0847750232930769, 'time_step': 0.008744090621234381, 'td_error': 2.216535221998582, 'init_value': -15.332110404968262, 'ave_value': 2.289818731471225} step=15390
2022-04-20 17:11.01 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170818/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:11.05 [info     ] TD3PlusBC_20220420170818: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.0003218072199682046, 'time_algorithm_update': 0.008943078113578216, 'critic_loss': 7.980700810053195, 'actor_loss': 0.09102005646591298, 'time_step': 0.009346528360021044, 'td_error': 2.2740641923481366, 'init_value': -15.800585746765137, 'ave_value': 2.201683027656531} step=15732
2022-04-20 17:11.05 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170818/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:11.08 [info     ] TD3PlusBC_20220420170818: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.0003208932820816486, 'time_algorithm_update': 0.00841115441238671, 'critic_loss': 8.335983193757242, 'actor_loss': 0.08233571120085773, 'time_step': 0.008811367882622613, 'td_error': 2.3341583674052315, 'init_value': -16.048341751098633, 'ave_value': 2.363363352395544} step=16074
2022-04-20 17:11.08 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170818/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:11.12 [info     ] TD3PlusBC_20220420170818: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.00032387212005972166, 'time_algorithm_update': 0.008894651953937018, 'critic_loss': 8.60811125744156, 'actor_loss': 0.08936798130298218, 'time_step': 0.009297573775575873, 'td_error': 2.436051862911767, 'init_value': -16.952661514282227, 'ave_value': 2.144375977215382} step=16416
2022-04-20 17:11.12 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170818/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:11.15 [info     ] TD3PlusBC_20220420170818: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.0003251332288597062, 'time_algorithm_update': 0.008846206274646067, 'critic_loss': 8.988944165887888, 'actor_loss': 0.10483572274795053, 'time_step': 0.00925091762988888, 'td_error': 2.455008772729847, 'init_value': -16.479045867919922, 'ave_value': 2.6082328678608815} step=16758
2022-04-20 17:11.15 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170818/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:11.19 [info     ] TD3PlusBC_20220420170818: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.00032513044033831326, 'time_algorithm_update': 0.008467948924728304, 'critic_loss': 9.254162927817182, 'actor_loss': 0.0783576914790081, 'time_step': 0.008873339982060661, 'td_error': 2.511793513423355, 'init_value': -17.491153717041016, 'ave_value': 2.272407662263811} step=17100
2022-04-20 17:11.19 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420170818/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 102 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.28189108e-01  

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 17:11.20 [info     ] FQE_20220420171119: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00016022015766925122, 'time_algorithm_update': 0.00510838376470359, 'loss': 0.007921807675521416, 'time_step': 0.005343968609729445, 'init_value': -0.30786219239234924, 'ave_value': -0.298303924781178, 'soft_opc': nan} step=166




2022-04-20 17:11.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171119/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:11.21 [info     ] FQE_20220420171119: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00016437524772552122, 'time_algorithm_update': 0.005153047033103116, 'loss': 0.006219832533802164, 'time_step': 0.005391336349119623, 'init_value': -0.49318355321884155, 'ave_value': -0.42383148104690754, 'soft_opc': nan} step=332




2022-04-20 17:11.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171119/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:11.22 [info     ] FQE_20220420171119: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.0001650287444332996, 'time_algorithm_update': 0.005049209996878383, 'loss': 0.005328650754618357, 'time_step': 0.005283072770360005, 'init_value': -0.5430331230163574, 'ave_value': -0.43445590301710474, 'soft_opc': nan} step=498




2022-04-20 17:11.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171119/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:11.23 [info     ] FQE_20220420171119: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.0001636183405496988, 'time_algorithm_update': 0.005025181425623147, 'loss': 0.0050870653519586445, 'time_step': 0.0052614168948437795, 'init_value': -0.631019115447998, 'ave_value': -0.4719206760931122, 'soft_opc': nan} step=664




2022-04-20 17:11.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171119/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:11.24 [info     ] FQE_20220420171119: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00016154869493231717, 'time_algorithm_update': 0.0050060404352394935, 'loss': 0.004916234741391367, 'time_step': 0.005234010248299104, 'init_value': -0.7199161648750305, 'ave_value': -0.5182991113446586, 'soft_opc': nan} step=830




2022-04-20 17:11.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171119/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:11.25 [info     ] FQE_20220420171119: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.0001690143562224974, 'time_algorithm_update': 0.00446981550699257, 'loss': 0.004500744559981647, 'time_step': 0.004712634776012007, 'init_value': -0.7932524681091309, 'ave_value': -0.5651467474429188, 'soft_opc': nan} step=996




2022-04-20 17:11.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171119/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:11.26 [info     ] FQE_20220420171119: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00015762915094214748, 'time_algorithm_update': 0.004979061793132001, 'loss': 0.004391921057186303, 'time_step': 0.005208709153784327, 'init_value': -0.8261725902557373, 'ave_value': -0.5569421654885953, 'soft_opc': nan} step=1162




2022-04-20 17:11.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171119/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:11.27 [info     ] FQE_20220420171119: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00016103020633559628, 'time_algorithm_update': 0.005049023283533303, 'loss': 0.004327992015204067, 'time_step': 0.005281145314136183, 'init_value': -0.9027296304702759, 'ave_value': -0.5916741498385195, 'soft_opc': nan} step=1328




2022-04-20 17:11.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171119/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:11.28 [info     ] FQE_20220420171119: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.0001641526279679264, 'time_algorithm_update': 0.005130074110375829, 'loss': 0.00413745707209525, 'time_step': 0.005366313888365964, 'init_value': -0.947917640209198, 'ave_value': -0.6151929338168991, 'soft_opc': nan} step=1494




2022-04-20 17:11.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171119/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:11.29 [info     ] FQE_20220420171119: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00016551994415650885, 'time_algorithm_update': 0.0051126997154879285, 'loss': 0.004168093499744378, 'time_step': 0.005349703581936388, 'init_value': -1.0420094728469849, 'ave_value': -0.6803619648757818, 'soft_opc': nan} step=1660




2022-04-20 17:11.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171119/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:11.30 [info     ] FQE_20220420171119: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00016372175101774284, 'time_algorithm_update': 0.005015213805508901, 'loss': 0.004201666740223721, 'time_step': 0.00525362520332796, 'init_value': -1.102413296699524, 'ave_value': -0.7195657155602365, 'soft_opc': nan} step=1826




2022-04-20 17:11.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171119/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:11.31 [info     ] FQE_20220420171119: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00016450307455407568, 'time_algorithm_update': 0.005146063953997141, 'loss': 0.004174513517912612, 'time_step': 0.005385292581765048, 'init_value': -1.164851188659668, 'ave_value': -0.7582385916632038, 'soft_opc': nan} step=1992




2022-04-20 17:11.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171119/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:11.32 [info     ] FQE_20220420171119: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00015926073832684253, 'time_algorithm_update': 0.004920991070299263, 'loss': 0.00469668912392738, 'time_step': 0.0051538786256169694, 'init_value': -1.2359672784805298, 'ave_value': -0.7972695906923429, 'soft_opc': nan} step=2158




2022-04-20 17:11.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171119/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:11.33 [info     ] FQE_20220420171119: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.0001597318304590432, 'time_algorithm_update': 0.005070437867957425, 'loss': 0.004624683651175484, 'time_step': 0.005303384309791657, 'init_value': -1.283431887626648, 'ave_value': -0.8136163551763103, 'soft_opc': nan} step=2324




2022-04-20 17:11.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171119/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:11.33 [info     ] FQE_20220420171119: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.0001706028559121741, 'time_algorithm_update': 0.004743601902421698, 'loss': 0.005108585079755993, 'time_step': 0.004987669278340167, 'init_value': -1.35616135597229, 'ave_value': -0.8697570766951587, 'soft_opc': nan} step=2490




2022-04-20 17:11.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171119/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:11.34 [info     ] FQE_20220420171119: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.0001589533794357116, 'time_algorithm_update': 0.004565343799361263, 'loss': 0.005682044887651566, 'time_step': 0.0047948518431330306, 'init_value': -1.4419357776641846, 'ave_value': -0.9125205754294051, 'soft_opc': nan} step=2656




2022-04-20 17:11.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171119/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:11.35 [info     ] FQE_20220420171119: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.0001629748976374247, 'time_algorithm_update': 0.005096938236650214, 'loss': 0.005811902813624635, 'time_step': 0.005333360419215926, 'init_value': -1.5542054176330566, 'ave_value': -0.9749131494579283, 'soft_opc': nan} step=2822




2022-04-20 17:11.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171119/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:11.36 [info     ] FQE_20220420171119: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00016149411718529392, 'time_algorithm_update': 0.005083611212581037, 'loss': 0.006264065265380609, 'time_step': 0.0053216262036059276, 'init_value': -1.59666109085083, 'ave_value': -0.9874758642351439, 'soft_opc': nan} step=2988




2022-04-20 17:11.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171119/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:11.37 [info     ] FQE_20220420171119: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00016233863600765365, 'time_algorithm_update': 0.005104752908270043, 'loss': 0.0069996176676050455, 'time_step': 0.005338647279394679, 'init_value': -1.667741060256958, 'ave_value': -1.0192389848935712, 'soft_opc': nan} step=3154




2022-04-20 17:11.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171119/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:11.38 [info     ] FQE_20220420171119: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00016844559864825513, 'time_algorithm_update': 0.004960713616336684, 'loss': 0.007242475012850286, 'time_step': 0.00520139142691371, 'init_value': -1.7902958393096924, 'ave_value': -1.0911687997916528, 'soft_opc': nan} step=3320




2022-04-20 17:11.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171119/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:11.39 [info     ] FQE_20220420171119: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.0001621461776365717, 'time_algorithm_update': 0.005043558327548475, 'loss': 0.0076088985408577755, 'time_step': 0.005280187331050275, 'init_value': -1.8519482612609863, 'ave_value': -1.1356120493900668, 'soft_opc': nan} step=3486




2022-04-20 17:11.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171119/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:11.40 [info     ] FQE_20220420171119: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.0001650172543812947, 'time_algorithm_update': 0.005029368113322431, 'loss': 0.008853298473082125, 'time_step': 0.005265553313565542, 'init_value': -1.9537813663482666, 'ave_value': -1.1901140864367958, 'soft_opc': nan} step=3652




2022-04-20 17:11.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171119/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:11.41 [info     ] FQE_20220420171119: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.0001624391739626965, 'time_algorithm_update': 0.005008713308587132, 'loss': 0.009579249228496688, 'time_step': 0.005244174635553935, 'init_value': -2.0874581336975098, 'ave_value': -1.2946797637596121, 'soft_opc': nan} step=3818




2022-04-20 17:11.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171119/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:11.42 [info     ] FQE_20220420171119: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.0001666445329964879, 'time_algorithm_update': 0.0050343361245580465, 'loss': 0.009726865612621228, 'time_step': 0.005274057388305664, 'init_value': -2.1270089149475098, 'ave_value': -1.2971693600041305, 'soft_opc': nan} step=3984




2022-04-20 17:11.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171119/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:11.43 [info     ] FQE_20220420171119: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00016389697431081748, 'time_algorithm_update': 0.004368807896074042, 'loss': 0.010725376913632569, 'time_step': 0.0046028918530567585, 'init_value': -2.189199209213257, 'ave_value': -1.310316317348934, 'soft_opc': nan} step=4150




2022-04-20 17:11.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171119/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:11.44 [info     ] FQE_20220420171119: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.0001678696597915098, 'time_algorithm_update': 0.005063441862542945, 'loss': 0.011251237765178982, 'time_step': 0.005308198641581708, 'init_value': -2.3158702850341797, 'ave_value': -1.4239881545296862, 'soft_opc': nan} step=4316




2022-04-20 17:11.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171119/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:11.45 [info     ] FQE_20220420171119: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.0001640305461653744, 'time_algorithm_update': 0.005033211535718067, 'loss': 0.01166359392199553, 'time_step': 0.0052734153816498906, 'init_value': -2.3966526985168457, 'ave_value': -1.4969268760752974, 'soft_opc': nan} step=4482




2022-04-20 17:11.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171119/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:11.46 [info     ] FQE_20220420171119: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00016334688807108315, 'time_algorithm_update': 0.005133044288819094, 'loss': 0.011892261075456234, 'time_step': 0.0053707375583878485, 'init_value': -2.4946980476379395, 'ave_value': -1.541659781358599, 'soft_opc': nan} step=4648




2022-04-20 17:11.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171119/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:11.47 [info     ] FQE_20220420171119: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00016535908342844033, 'time_algorithm_update': 0.005065205585525696, 'loss': 0.012738972592324647, 'time_step': 0.005306321454335408, 'init_value': -2.581883668899536, 'ave_value': -1.610834211520515, 'soft_opc': nan} step=4814




2022-04-20 17:11.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171119/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:11.48 [info     ] FQE_20220420171119: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00016712424266769225, 'time_algorithm_update': 0.005150239151644419, 'loss': 0.01312284422645637, 'time_step': 0.005388870296708073, 'init_value': -2.696303129196167, 'ave_value': -1.689384535129543, 'soft_opc': nan} step=4980




2022-04-20 17:11.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171119/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:11.49 [info     ] FQE_20220420171119: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00016688007906258824, 'time_algorithm_update': 0.00510310264955084, 'loss': 0.014491385262016297, 'time_step': 0.005339677075305617, 'init_value': -2.838975429534912, 'ave_value': -1.7986774377927588, 'soft_opc': nan} step=5146




2022-04-20 17:11.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171119/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:11.50 [info     ] FQE_20220420171119: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00016249231545321913, 'time_algorithm_update': 0.00494896503816168, 'loss': 0.015403461866978422, 'time_step': 0.005184822771922651, 'init_value': -2.844339370727539, 'ave_value': -1.7609048153164688, 'soft_opc': nan} step=5312




2022-04-20 17:11.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171119/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:11.51 [info     ] FQE_20220420171119: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00016118532203766237, 'time_algorithm_update': 0.005092169865068183, 'loss': 0.015319207282063652, 'time_step': 0.005324533186763166, 'init_value': -2.8899011611938477, 'ave_value': -1.7708106685463372, 'soft_opc': nan} step=5478




2022-04-20 17:11.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171119/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:11.51 [info     ] FQE_20220420171119: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00015752574047410344, 'time_algorithm_update': 0.004206148974866752, 'loss': 0.016528877757871187, 'time_step': 0.004434006759919316, 'init_value': -2.986827850341797, 'ave_value': -1.8050838744771238, 'soft_opc': nan} step=5644




2022-04-20 17:11.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171119/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:11.52 [info     ] FQE_20220420171119: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.0001655529780560229, 'time_algorithm_update': 0.005086592881076307, 'loss': 0.016904386127204914, 'time_step': 0.005325858851513231, 'init_value': -3.161752223968506, 'ave_value': -1.9637728128406042, 'soft_opc': nan} step=5810




2022-04-20 17:11.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171119/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:11.53 [info     ] FQE_20220420171119: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.0001655041453350021, 'time_algorithm_update': 0.005144002925918763, 'loss': 0.017865740586807167, 'time_step': 0.00538492059133139, 'init_value': -3.164541244506836, 'ave_value': -1.9228276776552604, 'soft_opc': nan} step=5976




2022-04-20 17:11.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171119/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:11.54 [info     ] FQE_20220420171119: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00016630126769284168, 'time_algorithm_update': 0.005039615803454296, 'loss': 0.01840070171172293, 'time_step': 0.0052807776324720265, 'init_value': -3.2377140522003174, 'ave_value': -1.9669347076827812, 'soft_opc': nan} step=6142




2022-04-20 17:11.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171119/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:11.55 [info     ] FQE_20220420171119: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00016340146581810642, 'time_algorithm_update': 0.004977147263216685, 'loss': 0.01874620000150122, 'time_step': 0.0052167134112622365, 'init_value': -3.325441837310791, 'ave_value': -2.0195136002373992, 'soft_opc': nan} step=6308




2022-04-20 17:11.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171119/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:11.56 [info     ] FQE_20220420171119: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00016393718949283463, 'time_algorithm_update': 0.0049806445477956745, 'loss': 0.019230162670151388, 'time_step': 0.005214156874691148, 'init_value': -3.343514919281006, 'ave_value': -2.0326239816911587, 'soft_opc': nan} step=6474




2022-04-20 17:11.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171119/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:11.57 [info     ] FQE_20220420171119: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00016471420425966563, 'time_algorithm_update': 0.00503875692206693, 'loss': 0.01986760345108089, 'time_step': 0.005278590213821595, 'init_value': -3.414844036102295, 'ave_value': -2.0765822519992923, 'soft_opc': nan} step=6640




2022-04-20 17:11.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171119/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:11.58 [info     ] FQE_20220420171119: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.0001692671373666051, 'time_algorithm_update': 0.004993384142956102, 'loss': 0.021239979234692383, 'time_step': 0.005237465881439577, 'init_value': -3.5281834602355957, 'ave_value': -2.1445476687345484, 'soft_opc': nan} step=6806




2022-04-20 17:11.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171119/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:11.59 [info     ] FQE_20220420171119: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00016606428537024073, 'time_algorithm_update': 0.005056952855673181, 'loss': 0.022226669848055006, 'time_step': 0.005297341978693583, 'init_value': -3.5618271827697754, 'ave_value': -2.1520585038816673, 'soft_opc': nan} step=6972




2022-04-20 17:11.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171119/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:12.00 [info     ] FQE_20220420171119: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00016144097569477126, 'time_algorithm_update': 0.004092120262513678, 'loss': 0.02268524440594511, 'time_step': 0.00433070257485631, 'init_value': -3.614840507507324, 'ave_value': -2.1604745461462853, 'soft_opc': nan} step=7138




2022-04-20 17:12.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171119/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:12.01 [info     ] FQE_20220420171119: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00016641473195639, 'time_algorithm_update': 0.00503800575991711, 'loss': 0.02293334609032216, 'time_step': 0.005278838686196201, 'init_value': -3.729321002960205, 'ave_value': -2.285147723926416, 'soft_opc': nan} step=7304




2022-04-20 17:12.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171119/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:12.02 [info     ] FQE_20220420171119: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00016789407615202018, 'time_algorithm_update': 0.005103812160262142, 'loss': 0.02404558037671384, 'time_step': 0.005344119416662009, 'init_value': -3.6966629028320312, 'ave_value': -2.226710333701995, 'soft_opc': nan} step=7470




2022-04-20 17:12.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171119/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:12.03 [info     ] FQE_20220420171119: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00016325927642454584, 'time_algorithm_update': 0.005000034010553935, 'loss': 0.024298621335975736, 'time_step': 0.005235745246151844, 'init_value': -3.7026572227478027, 'ave_value': -2.23197081764774, 'soft_opc': nan} step=7636




2022-04-20 17:12.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171119/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:12.04 [info     ] FQE_20220420171119: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00016448009445006588, 'time_algorithm_update': 0.005006508654858692, 'loss': 0.024851730817098575, 'time_step': 0.005246965281934623, 'init_value': -3.7673227787017822, 'ave_value': -2.2817895854059773, 'soft_opc': nan} step=7802




2022-04-20 17:12.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171119/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:12.05 [info     ] FQE_20220420171119: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00016716158533670815, 'time_algorithm_update': 0.00501908595303455, 'loss': 0.02530812010791781, 'time_step': 0.005264769117516208, 'init_value': -3.7803988456726074, 'ave_value': -2.2760071344170214, 'soft_opc': nan} step=7968




2022-04-20 17:12.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171119/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:12.06 [info     ] FQE_20220420171119: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.0001622582056436194, 'time_algorithm_update': 0.005036471837974456, 'loss': 0.025974761315969278, 'time_step': 0.005270018635025944, 'init_value': -3.8506107330322266, 'ave_value': -2.3048120465960253, 'soft_opc': nan} step=8134




2022-04-20 17:12.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171119/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:12.07 [info     ] FQE_20220420171119: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.0001631458121609975, 'time_algorithm_update': 0.005028326827359487, 'loss': 0.026924124432177877, 'time_step': 0.005266071802162263, 'init_value': -3.84428334236145, 'ave_value': -2.3106455185257637, 'soft_opc': nan} step=8300




2022-04-20 17:12.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171119/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.43210892e-01 -3.25999953e-02
 -1.38000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.28049971e-01  6.00000000e-01  4.67532035e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.87189108e-01  2.46000047e-02
 -8.00013420e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.61927906e-02  2.08952959e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.53789108e-01  1.32000047e-02
  8.79998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -9.71586383e-02 -6.00000000e-01 -5.33093061e-02]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.39489108e-01 -4.75999953e-02
 -9.30001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  1.41298638e-01  5.43892365e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.9573146

2022-04-20 17:12.07 [info     ] Directory is created at d3rlpy_logs/FQE_20220420171207
2022-04-20 17:12.07 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 17:12.07 [debug    ] Building models...
2022-04-20 17:12.07 [debug    ] Models have been built.
2022-04-20 17:12.07 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420171207/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 17:12.09 [info     ] FQE_20220420171207: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00016526704610780228, 'time_algorithm_update': 0.004557800154353297, 'loss': 0.03095277862861579, 'time_step': 0.004795734272446743, 'init_value': -0.8843938112258911, 'ave_value': -0.8720724218264894, 'soft_opc': nan} step=344




2022-04-20 17:12.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171207/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:12.11 [info     ] FQE_20220420171207: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00017015947852023813, 'time_algorithm_update': 0.005003621411878009, 'loss': 0.026262016826157652, 'time_step': 0.005251883767371954, 'init_value': -1.6284785270690918, 'ave_value': -1.6180780936737318, 'soft_opc': nan} step=688




2022-04-20 17:12.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171207/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:12.13 [info     ] FQE_20220420171207: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00016897778178370275, 'time_algorithm_update': 0.005044451979703681, 'loss': 0.0293496478955413, 'time_step': 0.005290174207022024, 'init_value': -2.549283981323242, 'ave_value': -2.5245971188351914, 'soft_opc': nan} step=1032




2022-04-20 17:12.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171207/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:12.15 [info     ] FQE_20220420171207: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00017096968584282454, 'time_algorithm_update': 0.0050823653853216836, 'loss': 0.03342094364424431, 'time_step': 0.005328256723492644, 'init_value': -3.1751513481140137, 'ave_value': -3.119933725625008, 'soft_opc': nan} step=1376




2022-04-20 17:12.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171207/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:12.17 [info     ] FQE_20220420171207: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00016754588415456373, 'time_algorithm_update': 0.005018812279368556, 'loss': 0.0431328187967369, 'time_step': 0.005259844452835793, 'init_value': -4.080414295196533, 'ave_value': -3.9742898758869987, 'soft_opc': nan} step=1720




2022-04-20 17:12.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171207/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:12.19 [info     ] FQE_20220420171207: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00016881629478099735, 'time_algorithm_update': 0.004535618216492409, 'loss': 0.05398202338255942, 'time_step': 0.004780648059623186, 'init_value': -4.667917251586914, 'ave_value': -4.50265844566328, 'soft_opc': nan} step=2064




2022-04-20 17:12.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171207/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:12.20 [info     ] FQE_20220420171207: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.000168850255566974, 'time_algorithm_update': 0.005087221777716348, 'loss': 0.06545734958625724, 'time_step': 0.005331673594408257, 'init_value': -5.592968463897705, 'ave_value': -5.383798315181388, 'soft_opc': nan} step=2408




2022-04-20 17:12.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171207/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:12.22 [info     ] FQE_20220420171207: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.0001733407031658084, 'time_algorithm_update': 0.0050400682660036306, 'loss': 0.0816492373367425, 'time_step': 0.0052894845951435175, 'init_value': -6.025605201721191, 'ave_value': -5.720191383509485, 'soft_opc': nan} step=2752




2022-04-20 17:12.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171207/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:12.24 [info     ] FQE_20220420171207: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00016857441081557164, 'time_algorithm_update': 0.0050379509149595745, 'loss': 0.09824663333778898, 'time_step': 0.005282410355501397, 'init_value': -6.542013168334961, 'ave_value': -6.1821463760626205, 'soft_opc': nan} step=3096




2022-04-20 17:12.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171207/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:12.26 [info     ] FQE_20220420171207: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00016624913659206656, 'time_algorithm_update': 0.0045823094456694845, 'loss': 0.12235274261166883, 'time_step': 0.004824178856472636, 'init_value': -7.172115325927734, 'ave_value': -6.706326593010611, 'soft_opc': nan} step=3440




2022-04-20 17:12.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171207/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:12.28 [info     ] FQE_20220420171207: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.0001694067966106326, 'time_algorithm_update': 0.005073334588560947, 'loss': 0.139216780315998, 'time_step': 0.005319801180861717, 'init_value': -7.552821159362793, 'ave_value': -7.066638446280414, 'soft_opc': nan} step=3784




2022-04-20 17:12.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171207/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:12.30 [info     ] FQE_20220420171207: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00016868114471435547, 'time_algorithm_update': 0.005081259927084279, 'loss': 0.1598704928467267, 'time_step': 0.0053241044975990475, 'init_value': -8.016493797302246, 'ave_value': -7.497175016931703, 'soft_opc': nan} step=4128




2022-04-20 17:12.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171207/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:12.32 [info     ] FQE_20220420171207: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00016807054364404014, 'time_algorithm_update': 0.005085096109745114, 'loss': 0.1793269453743516, 'time_step': 0.0053274187930794645, 'init_value': -8.113521575927734, 'ave_value': -7.583198694996368, 'soft_opc': nan} step=4472




2022-04-20 17:12.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171207/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:12.34 [info     ] FQE_20220420171207: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.0001680144043855889, 'time_algorithm_update': 0.0050529068292573445, 'loss': 0.1988787115443238, 'time_step': 0.005296795867210211, 'init_value': -8.711997985839844, 'ave_value': -8.197922116246176, 'soft_opc': nan} step=4816




2022-04-20 17:12.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171207/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:12.36 [info     ] FQE_20220420171207: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.0001636133637539176, 'time_algorithm_update': 0.00454654804495878, 'loss': 0.22119567601746598, 'time_step': 0.004784125228260838, 'init_value': -8.806863784790039, 'ave_value': -8.350829005717962, 'soft_opc': nan} step=5160




2022-04-20 17:12.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171207/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:12.38 [info     ] FQE_20220420171207: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00017010264618452206, 'time_algorithm_update': 0.005077637212221013, 'loss': 0.23914812307609895, 'time_step': 0.005322788343873135, 'init_value': -9.2429780960083, 'ave_value': -8.861730847012687, 'soft_opc': nan} step=5504




2022-04-20 17:12.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171207/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:12.40 [info     ] FQE_20220420171207: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.0001699335353319035, 'time_algorithm_update': 0.005057917084804801, 'loss': 0.24918347969651222, 'time_step': 0.005303883968397628, 'init_value': -9.208450317382812, 'ave_value': -8.853266258863313, 'soft_opc': nan} step=5848




2022-04-20 17:12.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171207/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:12.42 [info     ] FQE_20220420171207: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00016979422680167265, 'time_algorithm_update': 0.005021597063818643, 'loss': 0.26683814372585785, 'time_step': 0.005267185527224874, 'init_value': -9.460336685180664, 'ave_value': -9.10173520206029, 'soft_opc': nan} step=6192




2022-04-20 17:12.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171207/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:12.44 [info     ] FQE_20220420171207: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00016711825548216354, 'time_algorithm_update': 0.00459012181259865, 'loss': 0.2816244839517356, 'time_step': 0.004830768635106641, 'init_value': -9.888384819030762, 'ave_value': -9.616011030265541, 'soft_opc': nan} step=6536




2022-04-20 17:12.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171207/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:12.46 [info     ] FQE_20220420171207: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.0001707180987956912, 'time_algorithm_update': 0.005115131306093793, 'loss': 0.2940073841153969, 'time_step': 0.00536254741424738, 'init_value': -9.977169036865234, 'ave_value': -9.722234148168013, 'soft_opc': nan} step=6880




2022-04-20 17:12.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171207/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:12.48 [info     ] FQE_20220420171207: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00016958907593128293, 'time_algorithm_update': 0.005021479933760887, 'loss': 0.30384694497360915, 'time_step': 0.005269336145977641, 'init_value': -10.383495330810547, 'ave_value': -10.20417965637092, 'soft_opc': nan} step=7224




2022-04-20 17:12.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171207/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:12.50 [info     ] FQE_20220420171207: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00017212920410688534, 'time_algorithm_update': 0.005125087361003078, 'loss': 0.31006623589606996, 'time_step': 0.005373068327127501, 'init_value': -10.692941665649414, 'ave_value': -10.578585014378165, 'soft_opc': nan} step=7568




2022-04-20 17:12.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171207/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:12.52 [info     ] FQE_20220420171207: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00017293386681135312, 'time_algorithm_update': 0.0051022144251091535, 'loss': 0.3138332382591745, 'time_step': 0.005351759666620299, 'init_value': -10.735976219177246, 'ave_value': -10.69102318904673, 'soft_opc': nan} step=7912




2022-04-20 17:12.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171207/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:12.53 [info     ] FQE_20220420171207: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.0001689396625341371, 'time_algorithm_update': 0.00455783064975295, 'loss': 0.32228091684018456, 'time_step': 0.004802140385605568, 'init_value': -11.345367431640625, 'ave_value': -11.425806712409951, 'soft_opc': nan} step=8256




2022-04-20 17:12.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171207/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:12.55 [info     ] FQE_20220420171207: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00017123859982157863, 'time_algorithm_update': 0.0050779553346855695, 'loss': 0.3245445573594161, 'time_step': 0.0053242354892021, 'init_value': -11.478630065917969, 'ave_value': -11.665341053401363, 'soft_opc': nan} step=8600




2022-04-20 17:12.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171207/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:12.57 [info     ] FQE_20220420171207: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.0001733663470246071, 'time_algorithm_update': 0.005089264969493068, 'loss': 0.32692836907280737, 'time_step': 0.00533739494722943, 'init_value': -11.774881362915039, 'ave_value': -12.120210028329009, 'soft_opc': nan} step=8944




2022-04-20 17:12.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171207/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:12.59 [info     ] FQE_20220420171207: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.0001756139965944512, 'time_algorithm_update': 0.005122556242831918, 'loss': 0.3293054681975221, 'time_step': 0.005376832429752793, 'init_value': -11.701179504394531, 'ave_value': -12.148585438785387, 'soft_opc': nan} step=9288




2022-04-20 17:12.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171207/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:13.01 [info     ] FQE_20220420171207: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.0001709710719973542, 'time_algorithm_update': 0.004649370908737183, 'loss': 0.3272853987887068, 'time_step': 0.00489535650541616, 'init_value': -11.742208480834961, 'ave_value': -12.351737132582914, 'soft_opc': nan} step=9632




2022-04-20 17:13.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171207/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:13.03 [info     ] FQE_20220420171207: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00017381754032401152, 'time_algorithm_update': 0.005076181056887605, 'loss': 0.33485359656841085, 'time_step': 0.005325596692950227, 'init_value': -11.82309341430664, 'ave_value': -12.455372581598642, 'soft_opc': nan} step=9976




2022-04-20 17:13.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171207/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:13.05 [info     ] FQE_20220420171207: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00016975472139757732, 'time_algorithm_update': 0.005018842081690944, 'loss': 0.335223613339368, 'time_step': 0.005262084478555724, 'init_value': -12.075942993164062, 'ave_value': -12.858615264028042, 'soft_opc': nan} step=10320




2022-04-20 17:13.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171207/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:13.07 [info     ] FQE_20220420171207: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00016834015070005904, 'time_algorithm_update': 0.005037680614826291, 'loss': 0.34205422807636476, 'time_step': 0.005281469156575757, 'init_value': -12.611371994018555, 'ave_value': -13.332041472829207, 'soft_opc': nan} step=10664




2022-04-20 17:13.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171207/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:13.09 [info     ] FQE_20220420171207: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00017255683277928553, 'time_algorithm_update': 0.00510855053746423, 'loss': 0.34420006310155743, 'time_step': 0.0053574117117149885, 'init_value': -12.35983657836914, 'ave_value': -13.274417721984388, 'soft_opc': nan} step=11008




2022-04-20 17:13.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171207/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:13.11 [info     ] FQE_20220420171207: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00016889045404833416, 'time_algorithm_update': 0.004712569159130717, 'loss': 0.34238150702291276, 'time_step': 0.004958178414854892, 'init_value': -12.583452224731445, 'ave_value': -13.688825740304415, 'soft_opc': nan} step=11352




2022-04-20 17:13.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171207/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:13.13 [info     ] FQE_20220420171207: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00016939362814260084, 'time_algorithm_update': 0.005014091037040533, 'loss': 0.34884770304441104, 'time_step': 0.005259227614070094, 'init_value': -12.996986389160156, 'ave_value': -14.122439378292745, 'soft_opc': nan} step=11696




2022-04-20 17:13.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171207/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:13.15 [info     ] FQE_20220420171207: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00017545666805533476, 'time_algorithm_update': 0.005065914503363676, 'loss': 0.34536385864322616, 'time_step': 0.0053190658258837325, 'init_value': -12.833751678466797, 'ave_value': -13.976512096253819, 'soft_opc': nan} step=12040




2022-04-20 17:13.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171207/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:13.17 [info     ] FQE_20220420171207: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00017549617345943006, 'time_algorithm_update': 0.005089490219604137, 'loss': 0.34513881539135394, 'time_step': 0.005341972722563633, 'init_value': -13.11613655090332, 'ave_value': -14.349483361253766, 'soft_opc': nan} step=12384




2022-04-20 17:13.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171207/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:13.19 [info     ] FQE_20220420171207: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00016728390094845793, 'time_algorithm_update': 0.004578794157782266, 'loss': 0.34489390375309215, 'time_step': 0.004823277162951093, 'init_value': -12.961541175842285, 'ave_value': -14.287185754626151, 'soft_opc': nan} step=12728




2022-04-20 17:13.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171207/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:13.21 [info     ] FQE_20220420171207: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.0001724272273307623, 'time_algorithm_update': 0.005084245010863903, 'loss': 0.344108379754606, 'time_step': 0.005335320566975793, 'init_value': -12.917872428894043, 'ave_value': -14.405734165719299, 'soft_opc': nan} step=13072




2022-04-20 17:13.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171207/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:13.23 [info     ] FQE_20220420171207: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00017103830049204272, 'time_algorithm_update': 0.005059100167695866, 'loss': 0.3526231235943648, 'time_step': 0.005307000043780305, 'init_value': -13.17385196685791, 'ave_value': -14.721746530054507, 'soft_opc': nan} step=13416




2022-04-20 17:13.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171207/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:13.25 [info     ] FQE_20220420171207: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.0001745397268339645, 'time_algorithm_update': 0.00501885732939077, 'loss': 0.3549705466802427, 'time_step': 0.005271363396977269, 'init_value': -13.295827865600586, 'ave_value': -14.779666379782128, 'soft_opc': nan} step=13760




2022-04-20 17:13.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171207/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:13.27 [info     ] FQE_20220420171207: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00017266772514165833, 'time_algorithm_update': 0.005027885353842447, 'loss': 0.3610768758794726, 'time_step': 0.005275101162666498, 'init_value': -13.058517456054688, 'ave_value': -14.790705399340236, 'soft_opc': nan} step=14104




2022-04-20 17:13.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171207/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:13.28 [info     ] FQE_20220420171207: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00017131553139797476, 'time_algorithm_update': 0.004760000594826632, 'loss': 0.3664842443107519, 'time_step': 0.005005803219107694, 'init_value': -13.277913093566895, 'ave_value': -15.005766759687816, 'soft_opc': nan} step=14448




2022-04-20 17:13.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171207/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:13.30 [info     ] FQE_20220420171207: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00017101889432862748, 'time_algorithm_update': 0.005025096410928771, 'loss': 0.37111506003312505, 'time_step': 0.005272511132927828, 'init_value': -13.458581924438477, 'ave_value': -15.151535332933287, 'soft_opc': nan} step=14792




2022-04-20 17:13.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171207/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:13.32 [info     ] FQE_20220420171207: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00016933887503867926, 'time_algorithm_update': 0.005031802626543267, 'loss': 0.3752232469607491, 'time_step': 0.005278594272081242, 'init_value': -13.677003860473633, 'ave_value': -15.377455631531104, 'soft_opc': nan} step=15136




2022-04-20 17:13.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171207/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:13.34 [info     ] FQE_20220420171207: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.0001692064972810967, 'time_algorithm_update': 0.005067677691925404, 'loss': 0.3846744370273163, 'time_step': 0.005311837030011554, 'init_value': -13.702220916748047, 'ave_value': -15.24397566530996, 'soft_opc': nan} step=15480




2022-04-20 17:13.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171207/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:13.36 [info     ] FQE_20220420171207: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00016879896784937658, 'time_algorithm_update': 0.004618156094883763, 'loss': 0.3879541557222648, 'time_step': 0.004860083031099896, 'init_value': -13.333617210388184, 'ave_value': -14.915581405323906, 'soft_opc': nan} step=15824




2022-04-20 17:13.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171207/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:13.38 [info     ] FQE_20220420171207: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00017325961312582327, 'time_algorithm_update': 0.005120443743328715, 'loss': 0.3814750257825349, 'time_step': 0.005371000877646513, 'init_value': -13.322378158569336, 'ave_value': -14.802116044428669, 'soft_opc': nan} step=16168




2022-04-20 17:13.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171207/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:13.40 [info     ] FQE_20220420171207: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00017472963000452794, 'time_algorithm_update': 0.0050868877144747, 'loss': 0.39643670416837773, 'time_step': 0.005340036957762962, 'init_value': -13.396990776062012, 'ave_value': -15.054686896472347, 'soft_opc': nan} step=16512




2022-04-20 17:13.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171207/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:13.42 [info     ] FQE_20220420171207: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00017084146654883095, 'time_algorithm_update': 0.005123833584231, 'loss': 0.3970492627163049, 'time_step': 0.00536954402923584, 'init_value': -13.39665412902832, 'ave_value': -14.861314340226084, 'soft_opc': nan} step=16856




2022-04-20 17:13.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171207/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:13.44 [info     ] FQE_20220420171207: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.0001742659613143566, 'time_algorithm_update': 0.004915380200674367, 'loss': 0.40266392848638516, 'time_step': 0.005167331113371738, 'init_value': -13.501907348632812, 'ave_value': -14.980415434687323, 'soft_opc': nan} step=17200




2022-04-20 17:13.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171207/model_17200.pt
search iteration:  21
using hyper params:  [0.006682698923772645, 0.004330308648086085, 8.207744578973026e-05, 5]
2022-04-20 17:13.44 [debug    ] RoundIterator is selected.
2022-04-20 17:13.44 [info     ] Directory is created at d3rlpy_logs/TD3PlusBC_20220420171344
2022-04-20 17:13.44 [debug    ] Fitting scaler...              scaler=standard
2022-04-20 17:13.44 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 17:13.44 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 17:13.44 [info     ] Parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171344/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.00668269892377

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:13.48 [info     ] TD3PlusBC_20220420171344: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.00037795270395557783, 'time_algorithm_update': 0.008847883570263957, 'critic_loss': 7.187870238956652, 'actor_loss': 2.6480652337882953, 'time_step': 0.00930741516470212, 'td_error': 0.9196755862636716, 'init_value': -8.01667308807373, 'ave_value': -4.9689103972454145} step=342
2022-04-20 17:13.48 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171344/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:13.52 [info     ] TD3PlusBC_20220420171344: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.0003739846380133378, 'time_algorithm_update': 0.008746727865341811, 'critic_loss': 3.2631203725672604, 'actor_loss': 2.545970965547171, 'time_step': 0.00919673526496218, 'td_error': 1.0433153846819245, 'init_value': -11.317686080932617, 'ave_value': -7.036368516556466} step=684
2022-04-20 17:13.52 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171344/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:13.55 [info     ] TD3PlusBC_20220420171344: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.0003717384840312757, 'time_algorithm_update': 0.008317499132881387, 'critic_loss': 4.952152963967351, 'actor_loss': 2.534691039581745, 'time_step': 0.008767045729341562, 'td_error': 1.284435368898431, 'init_value': -15.145156860351562, 'ave_value': -9.482540071428678} step=1026
2022-04-20 17:13.55 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171344/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:13.59 [info     ] TD3PlusBC_20220420171344: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.0003756528709367005, 'time_algorithm_update': 0.008907102701956765, 'critic_loss': 6.951441835590273, 'actor_loss': 2.5291238068140043, 'time_step': 0.00936185267933628, 'td_error': 1.5126768189197157, 'init_value': -18.350915908813477, 'ave_value': -11.582745156591036} step=1368
2022-04-20 17:13.59 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171344/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:14.03 [info     ] TD3PlusBC_20220420171344: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.00037289084049693324, 'time_algorithm_update': 0.00891651535591884, 'critic_loss': 9.242569971502872, 'actor_loss': 2.5277869910524604, 'time_step': 0.009368971077322263, 'td_error': 1.8686585163501839, 'init_value': -22.333173751831055, 'ave_value': -14.145850721048317} step=1710
2022-04-20 17:14.03 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171344/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:14.06 [info     ] TD3PlusBC_20220420171344: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.00037030378977457684, 'time_algorithm_update': 0.008764682457460994, 'critic_loss': 12.161608777548137, 'actor_loss': 2.5248109527498657, 'time_step': 0.009212501564918206, 'td_error': 2.1891799393877105, 'init_value': -25.401042938232422, 'ave_value': -16.139068447087244} step=2052
2022-04-20 17:14.06 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171344/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:14.10 [info     ] TD3PlusBC_20220420171344: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.0003776041387814527, 'time_algorithm_update': 0.008838102831478007, 'critic_loss': 15.280236509111193, 'actor_loss': 2.525823664246944, 'time_step': 0.009287068021227743, 'td_error': 2.6290722375867275, 'init_value': -28.820110321044922, 'ave_value': -18.495605546568797} step=2394
2022-04-20 17:14.10 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171344/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:14.14 [info     ] TD3PlusBC_20220420171344: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.0003754437318322254, 'time_algorithm_update': 0.008307042177657636, 'critic_loss': 18.593012399840774, 'actor_loss': 2.5246039189790426, 'time_step': 0.008754535725242213, 'td_error': 2.9671415528404785, 'init_value': -32.23011016845703, 'ave_value': -20.513853503415984} step=2736
2022-04-20 17:14.14 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171344/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:14.17 [info     ] TD3PlusBC_20220420171344: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.0003743764252690544, 'time_algorithm_update': 0.00891080585836667, 'critic_loss': 21.92718211391516, 'actor_loss': 2.5231529760081868, 'time_step': 0.009358525276184082, 'td_error': 3.37102852425867, 'init_value': -35.22698211669922, 'ave_value': -22.54520058698751} step=3078
2022-04-20 17:14.17 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171344/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:14.21 [info     ] TD3PlusBC_20220420171344: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.00037275141442728323, 'time_algorithm_update': 0.008857199323107625, 'critic_loss': 25.51927805783456, 'actor_loss': 2.523166823805424, 'time_step': 0.009301536961605674, 'td_error': 3.686534056721886, 'init_value': -38.20302200317383, 'ave_value': -24.5388074168531} step=3420
2022-04-20 17:14.21 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171344/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:14.25 [info     ] TD3PlusBC_20220420171344: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.00037121842479148105, 'time_algorithm_update': 0.008657335537916038, 'critic_loss': 29.34367791393347, 'actor_loss': 2.521151747619897, 'time_step': 0.009103033277723525, 'td_error': 4.063646172060848, 'init_value': -41.30558395385742, 'ave_value': -26.39779468743659} step=3762
2022-04-20 17:14.25 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171344/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:14.28 [info     ] TD3PlusBC_20220420171344: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.0003722690002262941, 'time_algorithm_update': 0.008765415838587354, 'critic_loss': 33.50548327596564, 'actor_loss': 2.522139798828036, 'time_step': 0.00921205331010428, 'td_error': 4.4928101761737205, 'init_value': -44.5201530456543, 'ave_value': -28.388081239679543} step=4104
2022-04-20 17:14.28 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171344/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:14.32 [info     ] TD3PlusBC_20220420171344: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.00037635557832773665, 'time_algorithm_update': 0.00847810193112022, 'critic_loss': 37.58240749403747, 'actor_loss': 2.521994274262099, 'time_step': 0.008929352314151519, 'td_error': 4.839213520930222, 'init_value': -46.90662384033203, 'ave_value': -29.965475486279928} step=4446
2022-04-20 17:14.32 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171344/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:14.36 [info     ] TD3PlusBC_20220420171344: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.0003764866388332077, 'time_algorithm_update': 0.008937657227990224, 'critic_loss': 41.74356059024208, 'actor_loss': 2.520917352877165, 'time_step': 0.009391088931881196, 'td_error': 5.103534396988478, 'init_value': -49.0251579284668, 'ave_value': -31.482931698616007} step=4788
2022-04-20 17:14.36 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171344/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:14.39 [info     ] TD3PlusBC_20220420171344: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.00037198945095664576, 'time_algorithm_update': 0.008873454311437774, 'critic_loss': 46.184411138121845, 'actor_loss': 2.5193356282529775, 'time_step': 0.00931687313213683, 'td_error': 5.555120276972393, 'init_value': -51.499900817871094, 'ave_value': -33.26206543567045} step=5130
2022-04-20 17:14.39 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171344/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:14.43 [info     ] TD3PlusBC_20220420171344: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.00037294451953374854, 'time_algorithm_update': 0.008532007535298666, 'critic_loss': 50.42970261378595, 'actor_loss': 2.5210658778921204, 'time_step': 0.00897992075535289, 'td_error': 5.985109693972676, 'init_value': -53.81779861450195, 'ave_value': -34.873555869734304} step=5472
2022-04-20 17:14.43 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171344/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:14.46 [info     ] TD3PlusBC_20220420171344: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.00037157535552978516, 'time_algorithm_update': 0.008693151306687739, 'critic_loss': 55.010570643240946, 'actor_loss': 2.5208914740043773, 'time_step': 0.009139382351211638, 'td_error': 6.316562363853209, 'init_value': -56.032554626464844, 'ave_value': -36.12552481327055} step=5814
2022-04-20 17:14.47 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171344/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:14.50 [info     ] TD3PlusBC_20220420171344: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.0003725199671516642, 'time_algorithm_update': 0.008413435422886185, 'critic_loss': 59.17038961042438, 'actor_loss': 2.51893028738903, 'time_step': 0.008859147802430984, 'td_error': 6.73707440707331, 'init_value': -58.6148796081543, 'ave_value': -37.71774983803029} step=6156
2022-04-20 17:14.50 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171344/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:14.54 [info     ] TD3PlusBC_20220420171344: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.0003828611987376074, 'time_algorithm_update': 0.008954109504208928, 'critic_loss': 63.81554330859268, 'actor_loss': 2.5189293323204534, 'time_step': 0.009412176427785416, 'td_error': 7.165600503851304, 'init_value': -59.946807861328125, 'ave_value': -38.982417431784505} step=6498
2022-04-20 17:14.54 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171344/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:14.58 [info     ] TD3PlusBC_20220420171344: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.00037168341073376393, 'time_algorithm_update': 0.008840568581519768, 'critic_loss': 68.25670351619608, 'actor_loss': 2.520727817078083, 'time_step': 0.009287137734262567, 'td_error': 7.4778910242711865, 'init_value': -62.30625534057617, 'ave_value': -40.43567682962277} step=6840
2022-04-20 17:14.58 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171344/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:15.01 [info     ] TD3PlusBC_20220420171344: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.00037351128650687593, 'time_algorithm_update': 0.008444134952032079, 'critic_loss': 72.48150346990218, 'actor_loss': 2.5215488860481665, 'time_step': 0.00889309735326042, 'td_error': 7.788946101491515, 'init_value': -63.722496032714844, 'ave_value': -41.77608622850028} step=7182
2022-04-20 17:15.01 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171344/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:15.05 [info     ] TD3PlusBC_20220420171344: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.00037199572512978, 'time_algorithm_update': 0.008889684203075386, 'critic_loss': 77.0778143029464, 'actor_loss': 2.5205998406772725, 'time_step': 0.009336117415400276, 'td_error': 8.011542826805552, 'init_value': -64.85462951660156, 'ave_value': -42.62288359064005} step=7524
2022-04-20 17:15.05 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171344/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:15.08 [info     ] TD3PlusBC_20220420171344: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.00037179634585018045, 'time_algorithm_update': 0.008343032229016398, 'critic_loss': 81.15468215942383, 'actor_loss': 2.521825547803912, 'time_step': 0.00879045327504476, 'td_error': 8.449352939943953, 'init_value': -66.16219329833984, 'ave_value': -43.82167092101598} step=7866
2022-04-20 17:15.08 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171344/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:15.12 [info     ] TD3PlusBC_20220420171344: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.00037802241699040287, 'time_algorithm_update': 0.008881358375326234, 'critic_loss': 85.56364052755791, 'actor_loss': 2.5216283156857853, 'time_step': 0.009336125083834107, 'td_error': 8.558652923880844, 'init_value': -67.7271499633789, 'ave_value': -44.861736426915606} step=8208
2022-04-20 17:15.12 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171344/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:15.16 [info     ] TD3PlusBC_20220420171344: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.00037282670450489424, 'time_algorithm_update': 0.008771632149902701, 'critic_loss': 89.31677861799274, 'actor_loss': 2.522664789567914, 'time_step': 0.009220108651278312, 'td_error': 9.031173318397824, 'init_value': -68.51768493652344, 'ave_value': -45.87280486848004} step=8550
2022-04-20 17:15.16 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171344/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:15.19 [info     ] TD3PlusBC_20220420171344: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.0003694651419656319, 'time_algorithm_update': 0.008419450263530887, 'critic_loss': 93.57022264268663, 'actor_loss': 2.524211749695895, 'time_step': 0.008864666286267732, 'td_error': 9.546557847350563, 'init_value': -70.82215881347656, 'ave_value': -47.24881752001214} step=8892
2022-04-20 17:15.19 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171344/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:15.23 [info     ] TD3PlusBC_20220420171344: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.0003752806033307349, 'time_algorithm_update': 0.008910800281323884, 'critic_loss': 97.59282746789052, 'actor_loss': 2.5244639909755415, 'time_step': 0.009362229826854683, 'td_error': 9.724673022769757, 'init_value': -71.42265319824219, 'ave_value': -48.07355984655847} step=9234
2022-04-20 17:15.23 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171344/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:15.26 [info     ] TD3PlusBC_20220420171344: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.0003748818447715358, 'time_algorithm_update': 0.007900106976603904, 'critic_loss': 101.8102013771994, 'actor_loss': 2.5241963849430196, 'time_step': 0.008342590945505957, 'td_error': 9.534277322795537, 'init_value': -71.79841613769531, 'ave_value': -48.62128645371394} step=9576
2022-04-20 17:15.26 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171344/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:15.29 [info     ] TD3PlusBC_20220420171344: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.0003695076669168751, 'time_algorithm_update': 0.006659168946115594, 'critic_loss': 105.51286079451354, 'actor_loss': 2.5240792991125094, 'time_step': 0.007088341908148158, 'td_error': 10.533272288177043, 'init_value': -74.47898864746094, 'ave_value': -50.087691169080095} step=9918
2022-04-20 17:15.29 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171344/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:15.32 [info     ] TD3PlusBC_20220420171344: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.00037005282284920674, 'time_algorithm_update': 0.006663323842991165, 'critic_loss': 109.16209194116425, 'actor_loss': 2.5239290410315083, 'time_step': 0.007091578684355083, 'td_error': 10.167864560487873, 'init_value': -73.84465789794922, 'ave_value': -50.531445361623824} step=10260
2022-04-20 17:15.32 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171344/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:15.35 [info     ] TD3PlusBC_20220420171344: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.00037003609172084876, 'time_algorithm_update': 0.00674893842105977, 'critic_loss': 112.58302235742758, 'actor_loss': 2.524819276486224, 'time_step': 0.007178159485086363, 'td_error': 10.780269433251831, 'init_value': -74.9029769897461, 'ave_value': -51.60067362363392} step=10602
2022-04-20 17:15.35 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171344/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:15.38 [info     ] TD3PlusBC_20220420171344: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.0003686543793706169, 'time_algorithm_update': 0.006689627268160993, 'critic_loss': 116.24650236877085, 'actor_loss': 2.5250871237258465, 'time_step': 0.007118301781994557, 'td_error': 11.124274619185384, 'init_value': -76.91829681396484, 'ave_value': -52.5613371614788} step=10944
2022-04-20 17:15.38 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171344/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:15.41 [info     ] TD3PlusBC_20220420171344: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.0003702382595218413, 'time_algorithm_update': 0.0067386480799892495, 'critic_loss': 119.49710089700264, 'actor_loss': 2.524960740965012, 'time_step': 0.007173573064525225, 'td_error': 11.289868930289098, 'init_value': -76.36100006103516, 'ave_value': -53.12678630024681} step=11286
2022-04-20 17:15.41 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171344/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:15.43 [info     ] TD3PlusBC_20220420171344: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.00036715624625222726, 'time_algorithm_update': 0.006642299088818288, 'critic_loss': 122.62433358800341, 'actor_loss': 2.5249028289527224, 'time_step': 0.007070049207810073, 'td_error': 11.184985358056682, 'init_value': -76.72992706298828, 'ave_value': -53.58634291552972} step=11628
2022-04-20 17:15.43 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171344/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:15.46 [info     ] TD3PlusBC_20220420171344: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.00036854841555768284, 'time_algorithm_update': 0.006713417538425378, 'critic_loss': 125.45448178296898, 'actor_loss': 2.5260460822902924, 'time_step': 0.0071420293105276, 'td_error': 12.109329649937774, 'init_value': -77.26068115234375, 'ave_value': -54.38613009859002} step=11970
2022-04-20 17:15.46 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171344/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:15.49 [info     ] TD3PlusBC_20220420171344: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.0003691967467815555, 'time_algorithm_update': 0.006735477531165408, 'critic_loss': 128.37154149730304, 'actor_loss': 2.5256664934214097, 'time_step': 0.007169056356998912, 'td_error': 12.323256131789782, 'init_value': -78.44047546386719, 'ave_value': -55.02218776167832} step=12312
2022-04-20 17:15.49 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171344/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:15.52 [info     ] TD3PlusBC_20220420171344: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.0003730400263914588, 'time_algorithm_update': 0.0067693615517421075, 'critic_loss': 131.0473791423597, 'actor_loss': 2.5278750190957946, 'time_step': 0.007206046790407415, 'td_error': 11.65081891569356, 'init_value': -76.8033218383789, 'ave_value': -55.15194165614476} step=12654
2022-04-20 17:15.52 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171344/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:15.55 [info     ] TD3PlusBC_20220420171344: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.0003748616279914365, 'time_algorithm_update': 0.00668499135134513, 'critic_loss': 133.5632074590315, 'actor_loss': 2.526749136852242, 'time_step': 0.007122877048470123, 'td_error': 11.989309017573012, 'init_value': -78.06439971923828, 'ave_value': -55.845348942202456} step=12996
2022-04-20 17:15.55 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171344/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:15.58 [info     ] TD3PlusBC_20220420171344: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.0003695118496989646, 'time_algorithm_update': 0.006699411492598684, 'critic_loss': 135.68996447289896, 'actor_loss': 2.527173438267401, 'time_step': 0.007130420695968539, 'td_error': 12.485460076824802, 'init_value': -79.11552429199219, 'ave_value': -56.58534742159769} step=13338
2022-04-20 17:15.58 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171344/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:16.01 [info     ] TD3PlusBC_20220420171344: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.00037384521194368773, 'time_algorithm_update': 0.006762286375837716, 'critic_loss': 138.26493578905252, 'actor_loss': 2.5273118925373455, 'time_step': 0.00719964643668013, 'td_error': 12.577955766861892, 'init_value': -79.52684020996094, 'ave_value': -57.08370630422439} step=13680
2022-04-20 17:16.01 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171344/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:16.03 [info     ] TD3PlusBC_20220420171344: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.0003682130958601745, 'time_algorithm_update': 0.006747427739595112, 'critic_loss': 140.25322257147894, 'actor_loss': 2.526563234496535, 'time_step': 0.007179970629731117, 'td_error': 12.603262242330356, 'init_value': -78.00065612792969, 'ave_value': -57.20194082265778} step=14022
2022-04-20 17:16.03 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171344/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:16.06 [info     ] TD3PlusBC_20220420171344: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.0003689311401188722, 'time_algorithm_update': 0.006741546747977273, 'critic_loss': 142.35696391055458, 'actor_loss': 2.5281553379973474, 'time_step': 0.007171187484473513, 'td_error': 13.286460386331147, 'init_value': -78.09999084472656, 'ave_value': -57.88354289586664} step=14364
2022-04-20 17:16.06 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171344/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:16.09 [info     ] TD3PlusBC_20220420171344: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.0003692085979974758, 'time_algorithm_update': 0.006274555161682486, 'critic_loss': 144.60027511775144, 'actor_loss': 2.5282893013535883, 'time_step': 0.006709175500256276, 'td_error': 13.163251394737946, 'init_value': -77.28334045410156, 'ave_value': -58.02673005714123} step=14706
2022-04-20 17:16.09 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171344/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:16.12 [info     ] TD3PlusBC_20220420171344: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.0003736681408352322, 'time_algorithm_update': 0.006787741393373723, 'critic_loss': 146.63619267870808, 'actor_loss': 2.5280246804332176, 'time_step': 0.00722664629506786, 'td_error': 13.032244135702332, 'init_value': -79.41739654541016, 'ave_value': -58.99559081410016} step=15048
2022-04-20 17:16.12 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171344/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:16.15 [info     ] TD3PlusBC_20220420171344: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.0003744335899576109, 'time_algorithm_update': 0.006746504041883681, 'critic_loss': 148.35572777017515, 'actor_loss': 2.5284156896914656, 'time_step': 0.0071818194194146765, 'td_error': 13.478467676625128, 'init_value': -79.04188537597656, 'ave_value': -59.080959054170734} step=15390
2022-04-20 17:16.15 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171344/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:16.18 [info     ] TD3PlusBC_20220420171344: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.00036634269513581926, 'time_algorithm_update': 0.006675278931333308, 'critic_loss': 150.00406149256298, 'actor_loss': 2.5305086506737604, 'time_step': 0.007103672501636528, 'td_error': 13.369155656007692, 'init_value': -79.46394348144531, 'ave_value': -59.640976777408305} step=15732
2022-04-20 17:16.18 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171344/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:16.20 [info     ] TD3PlusBC_20220420171344: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.0003684926451298228, 'time_algorithm_update': 0.00678341639669318, 'critic_loss': 151.6418330990083, 'actor_loss': 2.528937601903726, 'time_step': 0.0072123572143197754, 'td_error': 13.728831619268409, 'init_value': -78.93043518066406, 'ave_value': -59.68946984861172} step=16074
2022-04-20 17:16.20 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171344/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:16.23 [info     ] TD3PlusBC_20220420171344: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.0003685574782522101, 'time_algorithm_update': 0.00675290648700201, 'critic_loss': 153.42430478369283, 'actor_loss': 2.5298617402015373, 'time_step': 0.0071782445349888495, 'td_error': 13.782197303640181, 'init_value': -78.79083251953125, 'ave_value': -60.14972668854352} step=16416
2022-04-20 17:16.23 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171344/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:16.26 [info     ] TD3PlusBC_20220420171344: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.00036869760145220843, 'time_algorithm_update': 0.006741633192140457, 'critic_loss': 155.19417393556114, 'actor_loss': 2.530770470524392, 'time_step': 0.007175153459024708, 'td_error': 13.83267442112368, 'init_value': -80.23331451416016, 'ave_value': -61.00775146386964} step=16758
2022-04-20 17:16.26 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171344/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:16.29 [info     ] TD3PlusBC_20220420171344: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.00037066420616462214, 'time_algorithm_update': 0.0067095979612473156, 'critic_loss': 156.3964374386079, 'actor_loss': 2.5314636899713885, 'time_step': 0.007143523957994249, 'td_error': 14.323361603597608, 'init_value': -76.81297302246094, 'ave_value': -60.691475718517765} step=17100
2022-04-20 17:16.29 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171344/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 102 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.28189108e-0

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 17:16.30 [info     ] FQE_20220420171629: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00015525501894663615, 'time_algorithm_update': 0.003482649125248553, 'loss': 0.008257564833297967, 'time_step': 0.0037054828850619764, 'init_value': -0.4296990931034088, 'ave_value': -0.36070458874911876, 'soft_opc': nan} step=166




2022-04-20 17:16.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171629/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:16.31 [info     ] FQE_20220420171629: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00015256921929049204, 'time_algorithm_update': 0.0035121397799756154, 'loss': 0.005710371683003852, 'time_step': 0.003733851823462061, 'init_value': -0.5668794512748718, 'ave_value': -0.43813544434604346, 'soft_opc': nan} step=332




2022-04-20 17:16.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171629/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:16.31 [info     ] FQE_20220420171629: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.0001579077847032662, 'time_algorithm_update': 0.003486896135720862, 'loss': 0.005273984210756439, 'time_step': 0.0037172745509319997, 'init_value': -0.5938154458999634, 'ave_value': -0.44083233682254147, 'soft_opc': nan} step=498




2022-04-20 17:16.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171629/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:16.32 [info     ] FQE_20220420171629: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.0001621231975325619, 'time_algorithm_update': 0.0035453833729387767, 'loss': 0.005360527018769588, 'time_step': 0.0037818802408425205, 'init_value': -0.6551856398582458, 'ave_value': -0.48201722153001003, 'soft_opc': nan} step=664




2022-04-20 17:16.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171629/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:16.33 [info     ] FQE_20220420171629: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.0001578848045992564, 'time_algorithm_update': 0.003599963992475027, 'loss': 0.005131358883893454, 'time_step': 0.0038299991423825183, 'init_value': -0.7062835693359375, 'ave_value': -0.5116113865932634, 'soft_opc': nan} step=830




2022-04-20 17:16.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171629/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:16.33 [info     ] FQE_20220420171629: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.0001553340130541698, 'time_algorithm_update': 0.0034923180040106715, 'loss': 0.004935615861519094, 'time_step': 0.0037200278546436726, 'init_value': -0.7234609127044678, 'ave_value': -0.5101329472041815, 'soft_opc': nan} step=996




2022-04-20 17:16.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171629/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:16.34 [info     ] FQE_20220420171629: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00015919036175831254, 'time_algorithm_update': 0.003539002085306558, 'loss': 0.005036409024194062, 'time_step': 0.003773324460868376, 'init_value': -0.7929670810699463, 'ave_value': -0.5713626077616806, 'soft_opc': nan} step=1162




2022-04-20 17:16.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171629/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:16.35 [info     ] FQE_20220420171629: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00015627188854906932, 'time_algorithm_update': 0.003455427755792457, 'loss': 0.004618631120412105, 'time_step': 0.0036820776491280063, 'init_value': -0.8167165517807007, 'ave_value': -0.5888926331658622, 'soft_opc': nan} step=1328




2022-04-20 17:16.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171629/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:16.35 [info     ] FQE_20220420171629: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00015599325478795063, 'time_algorithm_update': 0.0034303076295967563, 'loss': 0.004422726655934365, 'time_step': 0.003656789480921734, 'init_value': -0.8585201501846313, 'ave_value': -0.6213375136826758, 'soft_opc': nan} step=1494




2022-04-20 17:16.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171629/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:16.36 [info     ] FQE_20220420171629: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00015493760626000095, 'time_algorithm_update': 0.003571326474109328, 'loss': 0.004550162576565362, 'time_step': 0.003798384264291051, 'init_value': -0.9185508489608765, 'ave_value': -0.6651701834052801, 'soft_opc': nan} step=1660




2022-04-20 17:16.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171629/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:16.37 [info     ] FQE_20220420171629: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00015926361083984375, 'time_algorithm_update': 0.0035004472158041344, 'loss': 0.0043853531768321095, 'time_step': 0.0037349534321980305, 'init_value': -0.9634197354316711, 'ave_value': -0.702258571646772, 'soft_opc': nan} step=1826




2022-04-20 17:16.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171629/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:16.37 [info     ] FQE_20220420171629: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00015571892979633376, 'time_algorithm_update': 0.003410195729818689, 'loss': 0.004431191662113261, 'time_step': 0.003633533615663827, 'init_value': -1.0083236694335938, 'ave_value': -0.7272753899665297, 'soft_opc': nan} step=1992




2022-04-20 17:16.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171629/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:16.38 [info     ] FQE_20220420171629: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00015663095267422228, 'time_algorithm_update': 0.003509502813040492, 'loss': 0.005227636848856886, 'time_step': 0.003733870494796569, 'init_value': -1.032914638519287, 'ave_value': -0.7331817250875001, 'soft_opc': nan} step=2158




2022-04-20 17:16.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171629/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:16.39 [info     ] FQE_20220420171629: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00015824674123741058, 'time_algorithm_update': 0.003502753843744117, 'loss': 0.005419092663109347, 'time_step': 0.003731128681136901, 'init_value': -1.122620701789856, 'ave_value': -0.8041322520711702, 'soft_opc': nan} step=2324




2022-04-20 17:16.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171629/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:16.40 [info     ] FQE_20220420171629: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.0001545440719788333, 'time_algorithm_update': 0.0034864796213356845, 'loss': 0.0058594636923453985, 'time_step': 0.0037151761801846057, 'init_value': -1.118358850479126, 'ave_value': -0.7918296597692445, 'soft_opc': nan} step=2490




2022-04-20 17:16.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171629/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:16.40 [info     ] FQE_20220420171629: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.0001574725989835808, 'time_algorithm_update': 0.003435445119099445, 'loss': 0.006724125989684723, 'time_step': 0.0036650838622127673, 'init_value': -1.2338593006134033, 'ave_value': -0.8891059925318234, 'soft_opc': nan} step=2656




2022-04-20 17:16.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171629/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:16.41 [info     ] FQE_20220420171629: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00015859431531055863, 'time_algorithm_update': 0.003578648509749447, 'loss': 0.007402964237185352, 'time_step': 0.003807605031024979, 'init_value': -1.3671104907989502, 'ave_value': -1.0282026013799919, 'soft_opc': nan} step=2822




2022-04-20 17:16.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171629/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:16.42 [info     ] FQE_20220420171629: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.0001606898135449513, 'time_algorithm_update': 0.0034287148211375774, 'loss': 0.008203157035813454, 'time_step': 0.0036609316446694984, 'init_value': -1.356640338897705, 'ave_value': -0.9994022238689164, 'soft_opc': nan} step=2988




2022-04-20 17:16.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171629/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:16.42 [info     ] FQE_20220420171629: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00015573760113084172, 'time_algorithm_update': 0.003422886492258095, 'loss': 0.009176642514754042, 'time_step': 0.003650501549962055, 'init_value': -1.4014256000518799, 'ave_value': -1.0431329199234611, 'soft_opc': nan} step=3154




2022-04-20 17:16.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171629/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:16.43 [info     ] FQE_20220420171629: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.0001549318612339985, 'time_algorithm_update': 0.003456193280507283, 'loss': 0.010233456024855196, 'time_step': 0.0036813609571342007, 'init_value': -1.4809741973876953, 'ave_value': -1.1346300657182042, 'soft_opc': nan} step=3320




2022-04-20 17:16.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171629/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:16.44 [info     ] FQE_20220420171629: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00016092679586755224, 'time_algorithm_update': 0.0034774527492293394, 'loss': 0.010687963140803019, 'time_step': 0.003710369029677058, 'init_value': -1.5377737283706665, 'ave_value': -1.1687484602674607, 'soft_opc': nan} step=3486




2022-04-20 17:16.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171629/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:16.44 [info     ] FQE_20220420171629: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.0001568751162793263, 'time_algorithm_update': 0.003537323101457343, 'loss': 0.011532664712075234, 'time_step': 0.0037638092615518226, 'init_value': -1.5742475986480713, 'ave_value': -1.188826590596891, 'soft_opc': nan} step=3652




2022-04-20 17:16.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171629/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:16.45 [info     ] FQE_20220420171629: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.0001558309578033815, 'time_algorithm_update': 0.003512770296579384, 'loss': 0.01202363473172468, 'time_step': 0.0037395666880779958, 'init_value': -1.5781006813049316, 'ave_value': -1.182060508715274, 'soft_opc': nan} step=3818




2022-04-20 17:16.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171629/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:16.46 [info     ] FQE_20220420171629: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00015477961804493363, 'time_algorithm_update': 0.003391387950943177, 'loss': 0.013900927129111826, 'time_step': 0.0036138525928359434, 'init_value': -1.6357474327087402, 'ave_value': -1.2349551009140103, 'soft_opc': nan} step=3984




2022-04-20 17:16.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171629/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:16.46 [info     ] FQE_20220420171629: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00015581228646887354, 'time_algorithm_update': 0.0034421423831617975, 'loss': 0.014918289396979183, 'time_step': 0.0036743276090507046, 'init_value': -1.6398874521255493, 'ave_value': -1.200647432190106, 'soft_opc': nan} step=4150




2022-04-20 17:16.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171629/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:16.47 [info     ] FQE_20220420171629: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00015845643468649992, 'time_algorithm_update': 0.0034542227365884437, 'loss': 0.01575269450711266, 'time_step': 0.0036840367029948406, 'init_value': -1.6846957206726074, 'ave_value': -1.2381110501265875, 'soft_opc': nan} step=4316




2022-04-20 17:16.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171629/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:16.48 [info     ] FQE_20220420171629: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.0001547379666064159, 'time_algorithm_update': 0.0035509632294436536, 'loss': 0.016595777676788617, 'time_step': 0.0037815312305128717, 'init_value': -1.7112176418304443, 'ave_value': -1.2376769446587474, 'soft_opc': nan} step=4482




2022-04-20 17:16.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171629/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:16.48 [info     ] FQE_20220420171629: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00015765069478965667, 'time_algorithm_update': 0.0034730104078729467, 'loss': 0.01819134698493451, 'time_step': 0.0037023561546601445, 'init_value': -1.8930463790893555, 'ave_value': -1.4023732413730776, 'soft_opc': nan} step=4648




2022-04-20 17:16.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171629/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:16.49 [info     ] FQE_20220420171629: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.000157834535621735, 'time_algorithm_update': 0.0035567125642156027, 'loss': 0.019040661796658153, 'time_step': 0.0037927886089646674, 'init_value': -1.9442046880722046, 'ave_value': -1.4328130329640263, 'soft_opc': nan} step=4814




2022-04-20 17:16.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171629/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:16.50 [info     ] FQE_20220420171629: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00016026324536426957, 'time_algorithm_update': 0.003433279244296522, 'loss': 0.020060255953828883, 'time_step': 0.0036672526095286907, 'init_value': -1.9770878553390503, 'ave_value': -1.4163591410123602, 'soft_opc': nan} step=4980




2022-04-20 17:16.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171629/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:16.50 [info     ] FQE_20220420171629: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00015867187316159168, 'time_algorithm_update': 0.003488964345081743, 'loss': 0.02142152604604903, 'time_step': 0.003715574023235275, 'init_value': -2.1227033138275146, 'ave_value': -1.5425069794302118, 'soft_opc': nan} step=5146




2022-04-20 17:16.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171629/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:16.51 [info     ] FQE_20220420171629: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00015623598213655403, 'time_algorithm_update': 0.0036073291158101647, 'loss': 0.022429710395735162, 'time_step': 0.00383451042405094, 'init_value': -2.165900230407715, 'ave_value': -1.580866104984438, 'soft_opc': nan} step=5312




2022-04-20 17:16.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171629/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:16.52 [info     ] FQE_20220420171629: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00015836882303996258, 'time_algorithm_update': 0.0035195982599832924, 'loss': 0.02439032707094069, 'time_step': 0.003750460693635136, 'init_value': -2.2354846000671387, 'ave_value': -1.629913980422986, 'soft_opc': nan} step=5478




2022-04-20 17:16.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171629/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:16.53 [info     ] FQE_20220420171629: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00015575196369584785, 'time_algorithm_update': 0.0034761155944272697, 'loss': 0.025472266483821917, 'time_step': 0.0037055202277309924, 'init_value': -2.3784866333007812, 'ave_value': -1.7429844070037173, 'soft_opc': nan} step=5644




2022-04-20 17:16.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171629/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:16.53 [info     ] FQE_20220420171629: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00015826828508491977, 'time_algorithm_update': 0.003556370735168457, 'loss': 0.028406081297603178, 'time_step': 0.003782177545938147, 'init_value': -2.4922118186950684, 'ave_value': -1.8291300675185682, 'soft_opc': nan} step=5810




2022-04-20 17:16.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171629/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:16.54 [info     ] FQE_20220420171629: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.0001573131745120129, 'time_algorithm_update': 0.0035466056272207974, 'loss': 0.027935379535431064, 'time_step': 0.0037743499480098128, 'init_value': -2.5730435848236084, 'ave_value': -1.8686965022210937, 'soft_opc': nan} step=5976




2022-04-20 17:16.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171629/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:16.55 [info     ] FQE_20220420171629: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00015565286199730564, 'time_algorithm_update': 0.0035351615354239224, 'loss': 0.029380266040602857, 'time_step': 0.003759892590074654, 'init_value': -2.6851580142974854, 'ave_value': -1.9383847107397612, 'soft_opc': nan} step=6142




2022-04-20 17:16.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171629/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:16.55 [info     ] FQE_20220420171629: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00015702448695538993, 'time_algorithm_update': 0.003530475030462426, 'loss': 0.03051293301129579, 'time_step': 0.0037557145199143744, 'init_value': -2.7511892318725586, 'ave_value': -1.9896848764115325, 'soft_opc': nan} step=6308




2022-04-20 17:16.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171629/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:16.56 [info     ] FQE_20220420171629: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00015808300799634084, 'time_algorithm_update': 0.0036119438079466304, 'loss': 0.03265982203140675, 'time_step': 0.003836963550153985, 'init_value': -2.775538444519043, 'ave_value': -1.9834892547211131, 'soft_opc': nan} step=6474




2022-04-20 17:16.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171629/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:16.57 [info     ] FQE_20220420171629: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.0001592593020703419, 'time_algorithm_update': 0.0035929909671645567, 'loss': 0.03405122914848041, 'time_step': 0.003822415708059288, 'init_value': -2.9672679901123047, 'ave_value': -2.1503957378980076, 'soft_opc': nan} step=6640




2022-04-20 17:16.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171629/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:16.57 [info     ] FQE_20220420171629: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00015535842941468022, 'time_algorithm_update': 0.0035006612180227257, 'loss': 0.036751209171937714, 'time_step': 0.00372539801769946, 'init_value': -3.1061248779296875, 'ave_value': -2.2385523113829024, 'soft_opc': nan} step=6806




2022-04-20 17:16.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171629/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:16.58 [info     ] FQE_20220420171629: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.0001559286232454231, 'time_algorithm_update': 0.0035876926169337996, 'loss': 0.038218858834154094, 'time_step': 0.0038197629423026578, 'init_value': -3.169898509979248, 'ave_value': -2.2643963442872037, 'soft_opc': nan} step=6972




2022-04-20 17:16.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171629/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:16.59 [info     ] FQE_20220420171629: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00015700437936438136, 'time_algorithm_update': 0.003498029996113605, 'loss': 0.041424384309085516, 'time_step': 0.00372586767357516, 'init_value': -3.2811684608459473, 'ave_value': -2.300543145486783, 'soft_opc': nan} step=7138




2022-04-20 17:16.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171629/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:16.59 [info     ] FQE_20220420171629: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00015536273818418203, 'time_algorithm_update': 0.0034828286573111294, 'loss': 0.04227479257065444, 'time_step': 0.003710693623646196, 'init_value': -3.4395060539245605, 'ave_value': -2.4853502377439796, 'soft_opc': nan} step=7304




2022-04-20 17:16.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171629/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:17.00 [info     ] FQE_20220420171629: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.0001582725938544216, 'time_algorithm_update': 0.0035857723419924817, 'loss': 0.04433959584924426, 'time_step': 0.003817391682820148, 'init_value': -3.4826877117156982, 'ave_value': -2.4728760971767443, 'soft_opc': nan} step=7470




2022-04-20 17:17.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171629/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:17.01 [info     ] FQE_20220420171629: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00015608373894748917, 'time_algorithm_update': 0.003507411623575601, 'loss': 0.04738460483972028, 'time_step': 0.0037385828523750766, 'init_value': -3.543808937072754, 'ave_value': -2.5102332212257426, 'soft_opc': nan} step=7636




2022-04-20 17:17.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171629/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:17.02 [info     ] FQE_20220420171629: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00015592000570641943, 'time_algorithm_update': 0.0034846957907619246, 'loss': 0.04732532783324482, 'time_step': 0.0037083826869367116, 'init_value': -3.624821424484253, 'ave_value': -2.561191385670624, 'soft_opc': nan} step=7802




2022-04-20 17:17.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171629/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:17.02 [info     ] FQE_20220420171629: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00015915445534579726, 'time_algorithm_update': 0.0036192830786647566, 'loss': 0.04903827645080949, 'time_step': 0.0038514668682971633, 'init_value': -3.691361904144287, 'ave_value': -2.5769827365111726, 'soft_opc': nan} step=7968




2022-04-20 17:17.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171629/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:17.03 [info     ] FQE_20220420171629: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00015762053340314384, 'time_algorithm_update': 0.003516230238489358, 'loss': 0.05257864441745073, 'time_step': 0.0037472118814307525, 'init_value': -3.746598482131958, 'ave_value': -2.5875138332207293, 'soft_opc': nan} step=8134




2022-04-20 17:17.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171629/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:17.04 [info     ] FQE_20220420171629: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00015916881791080338, 'time_algorithm_update': 0.0036151294248649873, 'loss': 0.04831377408854349, 'time_step': 0.0038455796529011555, 'init_value': -3.7450153827667236, 'ave_value': -2.555291190826091, 'soft_opc': nan} step=8300




2022-04-20 17:17.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171629/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.43210892e-01 -3.25999953e-02
 -1.38000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.28049971e-01  6.00000000e-01  4.67532035e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.87189108e-01  2.46000047e-02
 -8.00013420e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.61927906e-02  2.08952959e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.53789108e-01  1.32000047e-02
  8.79998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -9.71586383e-02 -6.00000000e-01 -5.33093061e-02]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.39489108e-01 -4.75999953e-02
 -9.30001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  1.41298638e-01  5.43892365e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.9573146

2022-04-20 17:17.04 [debug    ] RoundIterator is selected.
2022-04-20 17:17.04 [info     ] Directory is created at d3rlpy_logs/FQE_20220420171704
2022-04-20 17:17.04 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 17:17.04 [debug    ] Building models...
2022-04-20 17:17.04 [debug    ] Models have been built.
2022-04-20 17:17.04 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420171704/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}},

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 17:17.06 [info     ] FQE_20220420171704: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.0001622972100280052, 'time_algorithm_update': 0.0035063860028289083, 'loss': 0.028359603656585828, 'time_step': 0.0037393847177194994, 'init_value': -1.459258794784546, 'ave_value': -1.4390953203579326, 'soft_opc': nan} step=344




2022-04-20 17:17.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171704/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:17.07 [info     ] FQE_20220420171704: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00016389475312343863, 'time_algorithm_update': 0.003541928391123927, 'loss': 0.024608569847809715, 'time_step': 0.003778566454732141, 'init_value': -2.2435245513916016, 'ave_value': -2.223181978324512, 'soft_opc': nan} step=688




2022-04-20 17:17.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171704/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:17.09 [info     ] FQE_20220420171704: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.0001614710619283277, 'time_algorithm_update': 0.003505042126012403, 'loss': 0.028227273049915947, 'time_step': 0.0037376915299615196, 'init_value': -3.3115134239196777, 'ave_value': -3.331124118212107, 'soft_opc': nan} step=1032




2022-04-20 17:17.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171704/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:17.10 [info     ] FQE_20220420171704: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.0001633257366890131, 'time_algorithm_update': 0.003500651481539704, 'loss': 0.03137124740237067, 'time_step': 0.0037342649559641995, 'init_value': -3.9323575496673584, 'ave_value': -4.015468253973905, 'soft_opc': nan} step=1376




2022-04-20 17:17.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171704/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:17.11 [info     ] FQE_20220420171704: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.0001611051171324974, 'time_algorithm_update': 0.003523711548295132, 'loss': 0.03807101216773654, 'time_step': 0.0037555583687715753, 'init_value': -4.86580753326416, 'ave_value': -5.061422295224022, 'soft_opc': nan} step=1720




2022-04-20 17:17.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171704/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:17.13 [info     ] FQE_20220420171704: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00015769309775773868, 'time_algorithm_update': 0.00349556706672491, 'loss': 0.043953671423328476, 'time_step': 0.0037255016870276873, 'init_value': -5.534605979919434, 'ave_value': -5.882077502667367, 'soft_opc': nan} step=2064




2022-04-20 17:17.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171704/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:17.14 [info     ] FQE_20220420171704: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.0001609166001164636, 'time_algorithm_update': 0.0034364371798759285, 'loss': 0.05413781310334195, 'time_step': 0.0036700139212053878, 'init_value': -6.466154098510742, 'ave_value': -6.899038154464047, 'soft_opc': nan} step=2408




2022-04-20 17:17.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171704/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:17.16 [info     ] FQE_20220420171704: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.0001632245474083479, 'time_algorithm_update': 0.003495082605716794, 'loss': 0.06485592276886712, 'time_step': 0.0037300059961718184, 'init_value': -7.056652069091797, 'ave_value': -7.644164417455862, 'soft_opc': nan} step=2752




2022-04-20 17:17.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171704/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:17.17 [info     ] FQE_20220420171704: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00016472713891849963, 'time_algorithm_update': 0.003522062717482101, 'loss': 0.07520655658215197, 'time_step': 0.003760520108910494, 'init_value': -7.814305305480957, 'ave_value': -8.57574446938328, 'soft_opc': nan} step=3096




2022-04-20 17:17.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171704/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:17.18 [info     ] FQE_20220420171704: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00016242265701293945, 'time_algorithm_update': 0.003491035727567451, 'loss': 0.0907589976762443, 'time_step': 0.003724948611370353, 'init_value': -8.57702350616455, 'ave_value': -9.46500293263444, 'soft_opc': nan} step=3440




2022-04-20 17:17.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171704/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:17.20 [info     ] FQE_20220420171704: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00016592477643212608, 'time_algorithm_update': 0.004246659750162169, 'loss': 0.10052261920104366, 'time_step': 0.00448428614195003, 'init_value': -9.253750801086426, 'ave_value': -10.246514659811247, 'soft_opc': nan} step=3784




2022-04-20 17:17.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171704/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:17.22 [info     ] FQE_20220420171704: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00016978660295175952, 'time_algorithm_update': 0.005076844331830047, 'loss': 0.11828446832247251, 'time_step': 0.005318272945492766, 'init_value': -10.220462799072266, 'ave_value': -11.212437599385577, 'soft_opc': nan} step=4128




2022-04-20 17:17.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171704/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:17.24 [info     ] FQE_20220420171704: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00017065433568732682, 'time_algorithm_update': 0.005127309366714123, 'loss': 0.1349428540615495, 'time_step': 0.005372446636820949, 'init_value': -11.04945182800293, 'ave_value': -12.110204319040047, 'soft_opc': nan} step=4472




2022-04-20 17:17.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171704/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:17.26 [info     ] FQE_20220420171704: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.0001739921957947487, 'time_algorithm_update': 0.005090424487757129, 'loss': 0.15451720411629352, 'time_step': 0.005342460648958073, 'init_value': -12.37773323059082, 'ave_value': -13.486142912817497, 'soft_opc': nan} step=4816




2022-04-20 17:17.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171704/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:17.28 [info     ] FQE_20220420171704: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00016744053641030955, 'time_algorithm_update': 0.004614902790202651, 'loss': 0.17894655447669847, 'time_step': 0.0048534398855165, 'init_value': -12.985294342041016, 'ave_value': -14.194922200251405, 'soft_opc': nan} step=5160




2022-04-20 17:17.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171704/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:17.30 [info     ] FQE_20220420171704: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00016658389291097952, 'time_algorithm_update': 0.005049955706263698, 'loss': 0.19809233186073427, 'time_step': 0.005289729251418003, 'init_value': -13.469425201416016, 'ave_value': -14.793049586975426, 'soft_opc': nan} step=5504




2022-04-20 17:17.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171704/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:17.32 [info     ] FQE_20220420171704: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00016942897508310717, 'time_algorithm_update': 0.005126016084537949, 'loss': 0.22245565336197615, 'time_step': 0.0053710154322690745, 'init_value': -14.256692886352539, 'ave_value': -15.640729817469513, 'soft_opc': nan} step=5848




2022-04-20 17:17.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171704/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:17.34 [info     ] FQE_20220420171704: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.0001557607983433923, 'time_algorithm_update': 0.0050204562586407325, 'loss': 0.24423779691244626, 'time_step': 0.005245656468147455, 'init_value': -14.566420555114746, 'ave_value': -16.170909278979106, 'soft_opc': nan} step=6192




2022-04-20 17:17.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171704/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:17.36 [info     ] FQE_20220420171704: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00015202719111775243, 'time_algorithm_update': 0.004950390305629996, 'loss': 0.26723001092715665, 'time_step': 0.005169887182324431, 'init_value': -15.02462387084961, 'ave_value': -16.832965607573655, 'soft_opc': nan} step=6536




2022-04-20 17:17.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171704/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:17.38 [info     ] FQE_20220420171704: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.0001440727433492971, 'time_algorithm_update': 0.004388689301734747, 'loss': 0.28178037461543154, 'time_step': 0.0045991941939952765, 'init_value': -15.214255332946777, 'ave_value': -17.12474506895167, 'soft_opc': nan} step=6880




2022-04-20 17:17.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171704/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:17.39 [info     ] FQE_20220420171704: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00015702358511991279, 'time_algorithm_update': 0.005006763824196749, 'loss': 0.298442957294715, 'time_step': 0.005230415006016576, 'init_value': -15.442041397094727, 'ave_value': -17.472151729722952, 'soft_opc': nan} step=7224




2022-04-20 17:17.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171704/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:17.41 [info     ] FQE_20220420171704: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00016679458839948788, 'time_algorithm_update': 0.005120226117067559, 'loss': 0.3144525231815182, 'time_step': 0.005362927220588507, 'init_value': -16.011768341064453, 'ave_value': -18.425557436089257, 'soft_opc': nan} step=7568




2022-04-20 17:17.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171704/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:17.43 [info     ] FQE_20220420171704: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00017250762429348257, 'time_algorithm_update': 0.005055681217548459, 'loss': 0.3334549683211155, 'time_step': 0.005305239627527636, 'init_value': -15.916338920593262, 'ave_value': -18.551086993278886, 'soft_opc': nan} step=7912




2022-04-20 17:17.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171704/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:17.45 [info     ] FQE_20220420171704: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00016942897508310717, 'time_algorithm_update': 0.004631038322005161, 'loss': 0.35382724123422143, 'time_step': 0.004876685696978902, 'init_value': -16.233261108398438, 'ave_value': -19.14836884439663, 'soft_opc': nan} step=8256




2022-04-20 17:17.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171704/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:17.47 [info     ] FQE_20220420171704: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.0001724507919577665, 'time_algorithm_update': 0.005125304987264234, 'loss': 0.3587980209849775, 'time_step': 0.005374637454055076, 'init_value': -16.40354347229004, 'ave_value': -19.751982044559476, 'soft_opc': nan} step=8600




2022-04-20 17:17.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171704/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:17.49 [info     ] FQE_20220420171704: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00017160801000373307, 'time_algorithm_update': 0.005108618459036184, 'loss': 0.37679748672472185, 'time_step': 0.005357022202292154, 'init_value': -16.625041961669922, 'ave_value': -20.40579865620648, 'soft_opc': nan} step=8944




2022-04-20 17:17.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171704/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:17.51 [info     ] FQE_20220420171704: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.0001699959122857382, 'time_algorithm_update': 0.0050054809381795485, 'loss': 0.3822801331231414, 'time_step': 0.005248177190159642, 'init_value': -16.852005004882812, 'ave_value': -20.88737481249043, 'soft_opc': nan} step=9288




2022-04-20 17:17.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171704/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:17.53 [info     ] FQE_20220420171704: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00016665874525558118, 'time_algorithm_update': 0.0048969062261803205, 'loss': 0.39088540348809125, 'time_step': 0.005138034044310104, 'init_value': -17.524463653564453, 'ave_value': -21.833836335452222, 'soft_opc': nan} step=9632




2022-04-20 17:17.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171704/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:17.55 [info     ] FQE_20220420171704: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.0001668458761170853, 'time_algorithm_update': 0.004914260880891667, 'loss': 0.4051588489132565, 'time_step': 0.005158328039701595, 'init_value': -17.89879035949707, 'ave_value': -22.493508428401473, 'soft_opc': nan} step=9976




2022-04-20 17:17.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171704/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:17.57 [info     ] FQE_20220420171704: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00016764360804890477, 'time_algorithm_update': 0.005026328009228373, 'loss': 0.4183880364641461, 'time_step': 0.005270272493362427, 'init_value': -18.64072036743164, 'ave_value': -23.47303206251414, 'soft_opc': nan} step=10320




2022-04-20 17:17.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171704/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:17.59 [info     ] FQE_20220420171704: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00016797905744508256, 'time_algorithm_update': 0.0050920421300932415, 'loss': 0.43142685343966236, 'time_step': 0.005336992962415828, 'init_value': -18.87445640563965, 'ave_value': -23.733131257777714, 'soft_opc': nan} step=10664




2022-04-20 17:17.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171704/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:18.01 [info     ] FQE_20220420171704: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00017083592193071231, 'time_algorithm_update': 0.00512442339298337, 'loss': 0.4400166012779918, 'time_step': 0.005371754252633383, 'init_value': -19.44150733947754, 'ave_value': -24.504076745471842, 'soft_opc': nan} step=11008




2022-04-20 17:18.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171704/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:18.03 [info     ] FQE_20220420171704: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00016353781833205113, 'time_algorithm_update': 0.0046169633089109905, 'loss': 0.45376649465368585, 'time_step': 0.004858053007791209, 'init_value': -19.387781143188477, 'ave_value': -24.622155977760418, 'soft_opc': nan} step=11352




2022-04-20 17:18.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171704/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:18.05 [info     ] FQE_20220420171704: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00016943937124207963, 'time_algorithm_update': 0.005068098389825156, 'loss': 0.4611706358358957, 'time_step': 0.005317023327184278, 'init_value': -19.769594192504883, 'ave_value': -25.010009412454057, 'soft_opc': nan} step=11696




2022-04-20 17:18.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171704/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:18.07 [info     ] FQE_20220420171704: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00016903461411941882, 'time_algorithm_update': 0.005019878232201865, 'loss': 0.47969318143373657, 'time_step': 0.005262877358946689, 'init_value': -20.16238021850586, 'ave_value': -25.50999625782642, 'soft_opc': nan} step=12040




2022-04-20 17:18.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171704/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:18.09 [info     ] FQE_20220420171704: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00016875599705895713, 'time_algorithm_update': 0.005008889492167983, 'loss': 0.5025599810489735, 'time_step': 0.0052531756633935976, 'init_value': -20.75838851928711, 'ave_value': -26.021653221695274, 'soft_opc': nan} step=12384




2022-04-20 17:18.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171704/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:18.11 [info     ] FQE_20220420171704: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00016771222269812295, 'time_algorithm_update': 0.0048380306986875316, 'loss': 0.5084775988835581, 'time_step': 0.0050816376541936124, 'init_value': -20.815876007080078, 'ave_value': -26.283637986802034, 'soft_opc': nan} step=12728




2022-04-20 17:18.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171704/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:18.13 [info     ] FQE_20220420171704: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.0001546290031699247, 'time_algorithm_update': 0.0048461715842402255, 'loss': 0.5227667724681195, 'time_step': 0.005070665548014087, 'init_value': -20.909969329833984, 'ave_value': -26.54090780099971, 'soft_opc': nan} step=13072




2022-04-20 17:18.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171704/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:18.14 [info     ] FQE_20220420171704: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00014899567116138547, 'time_algorithm_update': 0.004965905533280484, 'loss': 0.537830775652384, 'time_step': 0.0051794488762700285, 'init_value': -21.491737365722656, 'ave_value': -27.199595223948663, 'soft_opc': nan} step=13416




2022-04-20 17:18.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171704/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:18.16 [info     ] FQE_20220420171704: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00015120658763619356, 'time_algorithm_update': 0.0050217606300531435, 'loss': 0.5709282133110993, 'time_step': 0.005240410566329956, 'init_value': -21.931106567382812, 'ave_value': -27.56935081264541, 'soft_opc': nan} step=13760




2022-04-20 17:18.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171704/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:18.18 [info     ] FQE_20220420171704: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.0001643799072088197, 'time_algorithm_update': 0.005039719648139421, 'loss': 0.5855518587334299, 'time_step': 0.005278478528178016, 'init_value': -22.075550079345703, 'ave_value': -27.90125943149465, 'soft_opc': nan} step=14104




2022-04-20 17:18.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171704/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:18.20 [info     ] FQE_20220420171704: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00016936937043833178, 'time_algorithm_update': 0.004715829394584478, 'loss': 0.6168689847122445, 'time_step': 0.0049592928830967395, 'init_value': -22.348392486572266, 'ave_value': -28.19884701088356, 'soft_opc': nan} step=14448




2022-04-20 17:18.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171704/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:18.22 [info     ] FQE_20220420171704: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00017029116320055584, 'time_algorithm_update': 0.005033160364905069, 'loss': 0.627799512715577, 'time_step': 0.00528101033942644, 'init_value': -22.65755844116211, 'ave_value': -28.591598693912363, 'soft_opc': nan} step=14792




2022-04-20 17:18.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171704/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:18.24 [info     ] FQE_20220420171704: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.0001700485861578653, 'time_algorithm_update': 0.0050822711268136665, 'loss': 0.6367360570164787, 'time_step': 0.005327692558599073, 'init_value': -22.949731826782227, 'ave_value': -29.038023070240946, 'soft_opc': nan} step=15136




2022-04-20 17:18.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171704/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:18.26 [info     ] FQE_20220420171704: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00016817519831102947, 'time_algorithm_update': 0.005110520956128142, 'loss': 0.6621079060822985, 'time_step': 0.005353729392206946, 'init_value': -23.03146743774414, 'ave_value': -29.38326274794904, 'soft_opc': nan} step=15480




2022-04-20 17:18.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171704/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:18.28 [info     ] FQE_20220420171704: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00016906025797821755, 'time_algorithm_update': 0.00463971357012904, 'loss': 0.6661383104510605, 'time_step': 0.0048853962920432864, 'init_value': -23.20057487487793, 'ave_value': -29.614727313457443, 'soft_opc': nan} step=15824




2022-04-20 17:18.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171704/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:18.30 [info     ] FQE_20220420171704: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00016611745191174885, 'time_algorithm_update': 0.005134129247000051, 'loss': 0.6861051107962551, 'time_step': 0.005374247944632242, 'init_value': -23.441730499267578, 'ave_value': -29.78000847631478, 'soft_opc': nan} step=16168




2022-04-20 17:18.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171704/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:18.32 [info     ] FQE_20220420171704: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00016590121180512185, 'time_algorithm_update': 0.00503317907799122, 'loss': 0.6752048887279924, 'time_step': 0.005275573841361112, 'init_value': -23.244630813598633, 'ave_value': -29.657767286997387, 'soft_opc': nan} step=16512




2022-04-20 17:18.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171704/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:18.34 [info     ] FQE_20220420171704: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00016811143520266511, 'time_algorithm_update': 0.005113295344419257, 'loss': 0.6742768393732, 'time_step': 0.00535705893538719, 'init_value': -23.046249389648438, 'ave_value': -29.60628407338785, 'soft_opc': nan} step=16856




2022-04-20 17:18.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171704/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:18.36 [info     ] FQE_20220420171704: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.0001709620619929114, 'time_algorithm_update': 0.005051508199336917, 'loss': 0.6889166223740768, 'time_step': 0.00529818618020346, 'init_value': -23.3588809967041, 'ave_value': -29.818066681668096, 'soft_opc': nan} step=17200




2022-04-20 17:18.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171704/model_17200.pt
search iteration:  22
using hyper params:  [0.000626941219511501, 0.008751273906651343, 4.667364601273149e-05, 5]
2022-04-20 17:18.36 [debug    ] RoundIterator is selected.
2022-04-20 17:18.36 [info     ] Directory is created at d3rlpy_logs/TD3PlusBC_20220420171836
2022-04-20 17:18.36 [debug    ] Fitting scaler...              scaler=standard
2022-04-20 17:18.36 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 17:18.36 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 17:18.36 [info     ] Parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171836/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.00062694121951

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:18.40 [info     ] TD3PlusBC_20220420171836: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.00036475602646320184, 'time_algorithm_update': 0.008862945071437903, 'critic_loss': 5.548650389874887, 'actor_loss': 2.6602086365571496, 'time_step': 0.009307440958525004, 'td_error': 0.9065053467207623, 'init_value': -7.912957668304443, 'ave_value': -4.856759394220672} step=342
2022-04-20 17:18.40 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171836/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:18.44 [info     ] TD3PlusBC_20220420171836: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.0003702034030044288, 'time_algorithm_update': 0.00894256293425086, 'critic_loss': 3.8015435860868085, 'actor_loss': 2.5503118163660954, 'time_step': 0.009390343699538917, 'td_error': 1.0305058606832191, 'init_value': -11.381265640258789, 'ave_value': -7.0297184256900955} step=684
2022-04-20 17:18.44 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171836/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:18.47 [info     ] TD3PlusBC_20220420171836: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.000362457587705021, 'time_algorithm_update': 0.008399053623801783, 'critic_loss': 5.6283701072659404, 'actor_loss': 2.5381093792050904, 'time_step': 0.008838758134005363, 'td_error': 1.2233817074003965, 'init_value': -14.773547172546387, 'ave_value': -9.167670112005686} step=1026
2022-04-20 17:18.47 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171836/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:18.51 [info     ] TD3PlusBC_20220420171836: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.0003648438648870814, 'time_algorithm_update': 0.008869716995640806, 'critic_loss': 7.767530455923917, 'actor_loss': 2.531856294263873, 'time_step': 0.009314066485354775, 'td_error': 1.4977850258181147, 'init_value': -18.903833389282227, 'ave_value': -11.681667817766392} step=1368
2022-04-20 17:18.51 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171836/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:18.54 [info     ] TD3PlusBC_20220420171836: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.0003711396490621288, 'time_algorithm_update': 0.008813137199446472, 'critic_loss': 10.330086220077604, 'actor_loss': 2.528000084280271, 'time_step': 0.009260222228646975, 'td_error': 1.828131029694899, 'init_value': -22.509262084960938, 'ave_value': -14.042186275311485} step=1710
2022-04-20 17:18.54 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171836/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:18.58 [info     ] TD3PlusBC_20220420171836: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.00036688994245919566, 'time_algorithm_update': 0.008869662619473641, 'critic_loss': 13.458213875865379, 'actor_loss': 2.5253926731689633, 'time_step': 0.009310995626170732, 'td_error': 2.052625572508407, 'init_value': -26.201141357421875, 'ave_value': -16.248127950171874} step=2052
2022-04-20 17:18.58 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171836/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:19.02 [info     ] TD3PlusBC_20220420171836: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.0003689485683775785, 'time_algorithm_update': 0.008847861959223161, 'critic_loss': 16.64771794575697, 'actor_loss': 2.5228359629536232, 'time_step': 0.009289410379197862, 'td_error': 2.4134876721536918, 'init_value': -29.398550033569336, 'ave_value': -18.325653297514528} step=2394
2022-04-20 17:19.02 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171836/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:19.05 [info     ] TD3PlusBC_20220420171836: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.00036829396298057154, 'time_algorithm_update': 0.008446139898913645, 'critic_loss': 19.884073711975276, 'actor_loss': 2.521141230711463, 'time_step': 0.00888736415327641, 'td_error': 2.7150190157508867, 'init_value': -32.994346618652344, 'ave_value': -20.544402852219505} step=2736
2022-04-20 17:19.05 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171836/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:19.09 [info     ] TD3PlusBC_20220420171836: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.0003697955817507024, 'time_algorithm_update': 0.008847044225324664, 'critic_loss': 23.199956545355725, 'actor_loss': 2.5220591840688247, 'time_step': 0.009288908445347122, 'td_error': 3.043309062945103, 'init_value': -36.228736877441406, 'ave_value': -22.27083346475075} step=3078
2022-04-20 17:19.09 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171836/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:19.13 [info     ] TD3PlusBC_20220420171836: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.00037208495781435605, 'time_algorithm_update': 0.008859763368528489, 'critic_loss': 26.66862104092425, 'actor_loss': 2.521137545680442, 'time_step': 0.009306691543400636, 'td_error': 3.468526222105517, 'init_value': -39.202796936035156, 'ave_value': -24.371844828417945} step=3420
2022-04-20 17:19.13 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171836/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:19.16 [info     ] TD3PlusBC_20220420171836: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.00037041254210890386, 'time_algorithm_update': 0.008565356856898257, 'critic_loss': 30.36338072492365, 'actor_loss': 2.5187157887464378, 'time_step': 0.009011097121656987, 'td_error': 3.8382108140692086, 'init_value': -42.345176696777344, 'ave_value': -26.15598432166586} step=3762
2022-04-20 17:19.16 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171836/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:19.20 [info     ] TD3PlusBC_20220420171836: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.00036928876798752455, 'time_algorithm_update': 0.008907383645487111, 'critic_loss': 33.95417511532878, 'actor_loss': 2.5187788413979155, 'time_step': 0.009348660184625993, 'td_error': 4.423971394249724, 'init_value': -45.218833923339844, 'ave_value': -28.06941804811518} step=4104
2022-04-20 17:19.20 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171836/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:19.24 [info     ] TD3PlusBC_20220420171836: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.00037019573457059806, 'time_algorithm_update': 0.008473112569217794, 'critic_loss': 38.18873033467789, 'actor_loss': 2.517091479217797, 'time_step': 0.00891832789482429, 'td_error': 4.679418931007585, 'init_value': -46.64487838745117, 'ave_value': -29.698233264019578} step=4446
2022-04-20 17:19.24 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171836/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:19.27 [info     ] TD3PlusBC_20220420171836: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.00036173675492493033, 'time_algorithm_update': 0.008856359281037984, 'critic_loss': 42.43871707247015, 'actor_loss': 2.5190753365120693, 'time_step': 0.009292079691301313, 'td_error': 5.161274496544487, 'init_value': -50.32744216918945, 'ave_value': -31.51846673962702} step=4788
2022-04-20 17:19.27 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171836/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:19.31 [info     ] TD3PlusBC_20220420171836: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.0003650118733010097, 'time_algorithm_update': 0.008874449116444727, 'critic_loss': 46.59081786417822, 'actor_loss': 2.5180772954260395, 'time_step': 0.009316876617788572, 'td_error': 5.393863402158158, 'init_value': -52.249908447265625, 'ave_value': -33.01282036002037} step=5130
2022-04-20 17:19.31 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171836/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:19.35 [info     ] TD3PlusBC_20220420171836: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.0003707834154541729, 'time_algorithm_update': 0.0085897027400502, 'critic_loss': 50.95372053335982, 'actor_loss': 2.518615340628819, 'time_step': 0.009036129678201955, 'td_error': 5.8417629065195085, 'init_value': -53.943397521972656, 'ave_value': -34.326382965644704} step=5472
2022-04-20 17:19.35 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171836/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:19.38 [info     ] TD3PlusBC_20220420171836: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.00037659957394962424, 'time_algorithm_update': 0.00878405431557817, 'critic_loss': 55.33903599900809, 'actor_loss': 2.517302041862443, 'time_step': 0.009237283154537803, 'td_error': 6.274235343008309, 'init_value': -57.167572021484375, 'ave_value': -36.039429317738545} step=5814
2022-04-20 17:19.38 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171836/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:19.42 [info     ] TD3PlusBC_20220420171836: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.0003700653711954753, 'time_algorithm_update': 0.008419515096653275, 'critic_loss': 59.598502499318265, 'actor_loss': 2.517040460430391, 'time_step': 0.008866397260922438, 'td_error': 6.7449084375772745, 'init_value': -59.3736686706543, 'ave_value': -37.739234929372394} step=6156
2022-04-20 17:19.42 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171836/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:19.46 [info     ] TD3PlusBC_20220420171836: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.0003699740471198545, 'time_algorithm_update': 0.008897246673093205, 'critic_loss': 63.79185492532295, 'actor_loss': 2.5177033045138533, 'time_step': 0.009343631086293717, 'td_error': 7.163400028730705, 'init_value': -61.3038215637207, 'ave_value': -39.02290813372277} step=6498
2022-04-20 17:19.46 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171836/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:19.49 [info     ] TD3PlusBC_20220420171836: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.0003775135118361802, 'time_algorithm_update': 0.008921966915242156, 'critic_loss': 67.81713342387773, 'actor_loss': 2.517701510100337, 'time_step': 0.009374810241119207, 'td_error': 7.216920268202842, 'init_value': -61.27685546875, 'ave_value': -39.7540237443238} step=6840
2022-04-20 17:19.49 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171836/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:19.53 [info     ] TD3PlusBC_20220420171836: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.0003697488740173697, 'time_algorithm_update': 0.0085199625171416, 'critic_loss': 71.78787777995505, 'actor_loss': 2.516177283393012, 'time_step': 0.008969424063699287, 'td_error': 7.731607251372866, 'init_value': -65.03266143798828, 'ave_value': -41.30681271556715} step=7182
2022-04-20 17:19.53 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171836/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:19.57 [info     ] TD3PlusBC_20220420171836: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.00037554551286306996, 'time_algorithm_update': 0.008953278524833814, 'critic_loss': 75.5768719834891, 'actor_loss': 2.5190627379724155, 'time_step': 0.009408017348127755, 'td_error': 8.280426576755636, 'init_value': -66.09257507324219, 'ave_value': -42.59307663467738} step=7524
2022-04-20 17:19.57 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171836/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:20.00 [info     ] TD3PlusBC_20220420171836: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.00036695198706018995, 'time_algorithm_update': 0.0086118735765156, 'critic_loss': 79.42325507269965, 'actor_loss': 2.5186116862715338, 'time_step': 0.009057953343753927, 'td_error': 8.51678094664109, 'init_value': -66.5393295288086, 'ave_value': -43.47633847614985} step=7866
2022-04-20 17:20.00 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171836/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:20.04 [info     ] TD3PlusBC_20220420171836: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.0003669931177507367, 'time_algorithm_update': 0.008937063272933515, 'critic_loss': 82.89438044676307, 'actor_loss': 2.5201974235780056, 'time_step': 0.009379824002583822, 'td_error': 8.829326188013093, 'init_value': -69.36904907226562, 'ave_value': -44.86008628032397} step=8208
2022-04-20 17:20.04 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171836/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:20.08 [info     ] TD3PlusBC_20220420171836: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.0003726621817427072, 'time_algorithm_update': 0.008969088744001779, 'critic_loss': 86.93520044025622, 'actor_loss': 2.519388539052149, 'time_step': 0.009418196148342557, 'td_error': 9.397760085413138, 'init_value': -70.09091186523438, 'ave_value': -45.832650086683735} step=8550
2022-04-20 17:20.08 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171836/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:20.11 [info     ] TD3PlusBC_20220420171836: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.0003688182050024557, 'time_algorithm_update': 0.008449200301142464, 'critic_loss': 90.14312290169342, 'actor_loss': 2.5216341116275007, 'time_step': 0.008894911286426567, 'td_error': 9.121319787963321, 'init_value': -71.26363372802734, 'ave_value': -46.840060942598726} step=8892
2022-04-20 17:20.11 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171836/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:20.15 [info     ] TD3PlusBC_20220420171836: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.0003707987523218345, 'time_algorithm_update': 0.008856141079238981, 'critic_loss': 93.56257969594141, 'actor_loss': 2.5211168423033596, 'time_step': 0.009304583421227529, 'td_error': 9.497497693894575, 'init_value': -72.02073669433594, 'ave_value': -47.966081519413756} step=9234
2022-04-20 17:20.15 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171836/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:20.19 [info     ] TD3PlusBC_20220420171836: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.00036517291041145546, 'time_algorithm_update': 0.00830870971345065, 'critic_loss': 97.64055672584222, 'actor_loss': 2.5209196952351354, 'time_step': 0.008752511258710894, 'td_error': 10.04658421459234, 'init_value': -73.69979095458984, 'ave_value': -48.5903712904908} step=9576
2022-04-20 17:20.19 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171836/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:20.22 [info     ] TD3PlusBC_20220420171836: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.0003742035369426883, 'time_algorithm_update': 0.00890000591501158, 'critic_loss': 100.60355680588393, 'actor_loss': 2.521145350751821, 'time_step': 0.00934319119704397, 'td_error': 9.991620052060883, 'init_value': -73.79060363769531, 'ave_value': -49.30959003557493} step=9918
2022-04-20 17:20.22 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171836/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:20.26 [info     ] TD3PlusBC_20220420171836: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.00036467097656071537, 'time_algorithm_update': 0.008864279378924454, 'critic_loss': 103.74541932936998, 'actor_loss': 2.5233081600122285, 'time_step': 0.009296844577231603, 'td_error': 10.425704113535689, 'init_value': -74.68728637695312, 'ave_value': -50.27989409882105} step=10260
2022-04-20 17:20.26 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171836/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:20.30 [info     ] TD3PlusBC_20220420171836: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.00036709141312983996, 'time_algorithm_update': 0.008466119654694496, 'critic_loss': 106.90539792824907, 'actor_loss': 2.5213104563149793, 'time_step': 0.008895220115170842, 'td_error': 11.221571746799485, 'init_value': -75.93977355957031, 'ave_value': -50.952647187054694} step=10602
2022-04-20 17:20.30 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171836/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:20.33 [info     ] TD3PlusBC_20220420171836: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.00036893741429200647, 'time_algorithm_update': 0.008838592914112826, 'critic_loss': 110.03723068683468, 'actor_loss': 2.52266428205702, 'time_step': 0.009269988327695612, 'td_error': 11.650422601130849, 'init_value': -76.48866271972656, 'ave_value': -52.04810819584591} step=10944
2022-04-20 17:20.33 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171836/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:20.37 [info     ] TD3PlusBC_20220420171836: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.00036736747674774703, 'time_algorithm_update': 0.008495318959330955, 'critic_loss': 112.83984845702412, 'actor_loss': 2.523727007079543, 'time_step': 0.008923341656288905, 'td_error': 11.05979359292838, 'init_value': -76.4059066772461, 'ave_value': -52.5123645821655} step=11286
2022-04-20 17:20.37 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171836/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:20.41 [info     ] TD3PlusBC_20220420171836: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.0003695383406521981, 'time_algorithm_update': 0.008848116411800272, 'critic_loss': 115.36465947112144, 'actor_loss': 2.5234998220588727, 'time_step': 0.009281406625669602, 'td_error': 11.702629488508075, 'init_value': -77.71808624267578, 'ave_value': -53.01736658306826} step=11628
2022-04-20 17:20.41 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171836/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:20.44 [info     ] TD3PlusBC_20220420171836: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.00036563231931095237, 'time_algorithm_update': 0.008850241962232087, 'critic_loss': 117.94231914497955, 'actor_loss': 2.523720819350572, 'time_step': 0.009281751705191986, 'td_error': 12.9523300021592, 'init_value': -79.71174621582031, 'ave_value': -54.089612983306615} step=11970
2022-04-20 17:20.44 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171836/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:20.48 [info     ] TD3PlusBC_20220420171836: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.00036234186406721146, 'time_algorithm_update': 0.008513251243278994, 'critic_loss': 120.6214757328145, 'actor_loss': 2.5238341211575515, 'time_step': 0.00893731214846784, 'td_error': 11.723644493320947, 'init_value': -79.17829132080078, 'ave_value': -54.525068665203044} step=12312
2022-04-20 17:20.48 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171836/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:20.52 [info     ] TD3PlusBC_20220420171836: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.0003682333126402738, 'time_algorithm_update': 0.00895177760319403, 'critic_loss': 123.0385011594895, 'actor_loss': 2.52426871082239, 'time_step': 0.009379157545970894, 'td_error': 12.28777099004187, 'init_value': -78.70999908447266, 'ave_value': -55.02837438306016} step=12654
2022-04-20 17:20.52 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171836/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:20.55 [info     ] TD3PlusBC_20220420171836: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.0003731829381128501, 'time_algorithm_update': 0.00846634134214524, 'critic_loss': 125.12389652631437, 'actor_loss': 2.5257156355339183, 'time_step': 0.008902505127310056, 'td_error': 12.783310335429636, 'init_value': -78.60153198242188, 'ave_value': -55.349451627509985} step=12996
2022-04-20 17:20.55 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171836/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:20.59 [info     ] TD3PlusBC_20220420171836: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.0003743743338780096, 'time_algorithm_update': 0.008847203171044066, 'critic_loss': 127.70667777702822, 'actor_loss': 2.5254718128003573, 'time_step': 0.009288370260718272, 'td_error': 12.853572639785199, 'init_value': -80.13188171386719, 'ave_value': -56.21201495359291} step=13338
2022-04-20 17:20.59 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171836/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:21.03 [info     ] TD3PlusBC_20220420171836: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.00037077923267208343, 'time_algorithm_update': 0.008919550661455122, 'critic_loss': 129.8174523247613, 'actor_loss': 2.5263340626543727, 'time_step': 0.009353344900566236, 'td_error': 12.449430168463763, 'init_value': -79.74771881103516, 'ave_value': -56.468866135752386} step=13680
2022-04-20 17:21.03 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171836/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:21.06 [info     ] TD3PlusBC_20220420171836: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.0003666027247557166, 'time_algorithm_update': 0.008304463492499458, 'critic_loss': 132.16681775990983, 'actor_loss': 2.5251420227407713, 'time_step': 0.00873607919927229, 'td_error': 12.67510364918051, 'init_value': -80.13062286376953, 'ave_value': -57.11893248921931} step=14022
2022-04-20 17:21.06 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171836/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:21.10 [info     ] TD3PlusBC_20220420171836: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.00037091796161138526, 'time_algorithm_update': 0.008951233144392046, 'critic_loss': 133.83303230687193, 'actor_loss': 2.525410912887395, 'time_step': 0.009391170496131942, 'td_error': 13.043663293080455, 'init_value': -82.55250549316406, 'ave_value': -58.17109224236931} step=14364
2022-04-20 17:21.10 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171836/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:21.14 [info     ] TD3PlusBC_20220420171836: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.000368122468914902, 'time_algorithm_update': 0.008447916187040987, 'critic_loss': 136.07537377786915, 'actor_loss': 2.5266707110823248, 'time_step': 0.008876409446984007, 'td_error': 13.306258361614345, 'init_value': -81.91275787353516, 'ave_value': -58.33450443995311} step=14706
2022-04-20 17:21.14 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171836/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:21.17 [info     ] TD3PlusBC_20220420171836: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.00036951045543826813, 'time_algorithm_update': 0.008935951350028054, 'critic_loss': 137.3646957665159, 'actor_loss': 2.5262444827988833, 'time_step': 0.009368635060494407, 'td_error': 12.688720528518502, 'init_value': -81.09970092773438, 'ave_value': -58.73647477211296} step=15048
2022-04-20 17:21.17 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171836/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:21.21 [info     ] TD3PlusBC_20220420171836: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.0003703783827218396, 'time_algorithm_update': 0.008917063300372565, 'critic_loss': 139.41951008846885, 'actor_loss': 2.5278715376268353, 'time_step': 0.009355089817827905, 'td_error': 13.851814802024169, 'init_value': -83.37447357177734, 'ave_value': -59.50327724825238} step=15390
2022-04-20 17:21.21 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171836/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:21.25 [info     ] TD3PlusBC_20220420171836: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.0003684215378343013, 'time_algorithm_update': 0.008439991906372428, 'critic_loss': 140.4239944101077, 'actor_loss': 2.5281169261151586, 'time_step': 0.008876080401459633, 'td_error': 14.284380586266852, 'init_value': -83.82142639160156, 'ave_value': -59.98319259441399} step=15732
2022-04-20 17:21.25 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171836/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:21.28 [info     ] TD3PlusBC_20220420171836: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.000369153524699964, 'time_algorithm_update': 0.008808722272951004, 'critic_loss': 141.9733789455124, 'actor_loss': 2.5290252060918084, 'time_step': 0.009240481588575575, 'td_error': 14.382966426276559, 'init_value': -82.51434326171875, 'ave_value': -60.04287085853483} step=16074
2022-04-20 17:21.28 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171836/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:21.32 [info     ] TD3PlusBC_20220420171836: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.00037051362600940013, 'time_algorithm_update': 0.008482841720357973, 'critic_loss': 143.14373984532048, 'actor_loss': 2.528691641768517, 'time_step': 0.008916996375859132, 'td_error': 13.974029903309, 'init_value': -83.64564514160156, 'ave_value': -60.79999148426024} step=16416
2022-04-20 17:21.32 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171836/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:21.36 [info     ] TD3PlusBC_20220420171836: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.0003722703944869906, 'time_algorithm_update': 0.008854688259593228, 'critic_loss': 144.73339107580352, 'actor_loss': 2.5287562657517997, 'time_step': 0.009292234454238624, 'td_error': 14.00742968664913, 'init_value': -81.22103118896484, 'ave_value': -60.523626197263226} step=16758
2022-04-20 17:21.36 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171836/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:21.39 [info     ] TD3PlusBC_20220420171836: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.00037069139424820394, 'time_algorithm_update': 0.008945352152774208, 'critic_loss': 145.92781923528304, 'actor_loss': 2.5298362614815697, 'time_step': 0.009376781725744058, 'td_error': 14.105233241612583, 'init_value': -80.4913558959961, 'ave_value': -60.663871236706555} step=17100
2022-04-20 17:21.39 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420171836/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 102 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.28189108e-01

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 17:21.40 [info     ] FQE_20220420172139: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00016382947025528872, 'time_algorithm_update': 0.004799291311976421, 'loss': 0.0055218602275386094, 'time_step': 0.00503842945558479, 'init_value': -0.1808084100484848, 'ave_value': -0.15261480915486006, 'soft_opc': nan} step=166




2022-04-20 17:21.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172139/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:21.41 [info     ] FQE_20220420172139: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.0001640693250908909, 'time_algorithm_update': 0.004682041076292475, 'loss': 0.0040616401659145235, 'time_step': 0.004921562700386507, 'init_value': -0.33090561628341675, 'ave_value': -0.24751722346487884, 'soft_opc': nan} step=332




2022-04-20 17:21.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172139/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:21.42 [info     ] FQE_20220420172139: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00016713573271969715, 'time_algorithm_update': 0.005061413868364081, 'loss': 0.0037142028253671364, 'time_step': 0.005300355244831866, 'init_value': -0.4152991473674774, 'ave_value': -0.3169186005628928, 'soft_opc': nan} step=498




2022-04-20 17:21.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172139/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:21.43 [info     ] FQE_20220420172139: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.0001688793481114399, 'time_algorithm_update': 0.00497870847403285, 'loss': 0.0037278844385845475, 'time_step': 0.005220615720174399, 'init_value': -0.49002137780189514, 'ave_value': -0.34839582984075623, 'soft_opc': nan} step=664




2022-04-20 17:21.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172139/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:21.44 [info     ] FQE_20220420172139: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00016610450055225785, 'time_algorithm_update': 0.004985425845686212, 'loss': 0.003454365052215486, 'time_step': 0.0052246472921716166, 'init_value': -0.574772834777832, 'ave_value': -0.3971092489791346, 'soft_opc': nan} step=830




2022-04-20 17:21.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172139/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:21.45 [info     ] FQE_20220420172139: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.0001692068145935794, 'time_algorithm_update': 0.005047704800065741, 'loss': 0.0033680866360215537, 'time_step': 0.00529193734548178, 'init_value': -0.618288516998291, 'ave_value': -0.42296745421717297, 'soft_opc': nan} step=996




2022-04-20 17:21.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172139/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:21.46 [info     ] FQE_20220420172139: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.000171006443988846, 'time_algorithm_update': 0.005140716771045363, 'loss': 0.003369066240097654, 'time_step': 0.005385227950222521, 'init_value': -0.6612070798873901, 'ave_value': -0.4234900057265485, 'soft_opc': nan} step=1162




2022-04-20 17:21.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172139/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:21.47 [info     ] FQE_20220420172139: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.0001665454312979457, 'time_algorithm_update': 0.005077146622071783, 'loss': 0.003269530325028372, 'time_step': 0.005314623016908944, 'init_value': -0.818895161151886, 'ave_value': -0.5463559842794328, 'soft_opc': nan} step=1328




2022-04-20 17:21.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172139/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:21.48 [info     ] FQE_20220420172139: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00016793572759053795, 'time_algorithm_update': 0.005115622497466673, 'loss': 0.0032718998235538154, 'time_step': 0.005357835666242853, 'init_value': -0.8803368806838989, 'ave_value': -0.5780106800499263, 'soft_opc': nan} step=1494




2022-04-20 17:21.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172139/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:21.49 [info     ] FQE_20220420172139: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00016869837979236282, 'time_algorithm_update': 0.004985618304057294, 'loss': 0.003227383171836565, 'time_step': 0.0052276619945664, 'init_value': -1.0223922729492188, 'ave_value': -0.6636623649111202, 'soft_opc': nan} step=1660




2022-04-20 17:21.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172139/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:21.50 [info     ] FQE_20220420172139: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00016555728682552474, 'time_algorithm_update': 0.004351213753941548, 'loss': 0.0032332490243197204, 'time_step': 0.00458801223571042, 'init_value': -1.1324042081832886, 'ave_value': -0.7275651853649063, 'soft_opc': nan} step=1826




2022-04-20 17:21.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172139/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:21.51 [info     ] FQE_20220420172139: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.0001646380826651332, 'time_algorithm_update': 0.005061718354742211, 'loss': 0.003195664644836033, 'time_step': 0.0052984507687120555, 'init_value': -1.1926164627075195, 'ave_value': -0.7564081353026333, 'soft_opc': nan} step=1992




2022-04-20 17:21.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172139/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:21.52 [info     ] FQE_20220420172139: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00016455047101859586, 'time_algorithm_update': 0.005097007176962243, 'loss': 0.0033429308250232273, 'time_step': 0.005334266697067812, 'init_value': -1.2930471897125244, 'ave_value': -0.817807677695276, 'soft_opc': nan} step=2158




2022-04-20 17:21.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172139/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:21.53 [info     ] FQE_20220420172139: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00016647218221641448, 'time_algorithm_update': 0.005071842526815024, 'loss': 0.0033545778062845388, 'time_step': 0.005311575280614646, 'init_value': -1.406836986541748, 'ave_value': -0.8858954131032701, 'soft_opc': nan} step=2324




2022-04-20 17:21.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172139/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:21.54 [info     ] FQE_20220420172139: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00016662586166197994, 'time_algorithm_update': 0.005108739956315741, 'loss': 0.0034233110273784273, 'time_step': 0.005349834281277944, 'init_value': -1.4553945064544678, 'ave_value': -0.9005960942455777, 'soft_opc': nan} step=2490




2022-04-20 17:21.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172139/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:21.55 [info     ] FQE_20220420172139: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00016640467816088573, 'time_algorithm_update': 0.005077247160026826, 'loss': 0.003559770694434508, 'time_step': 0.005318151899130948, 'init_value': -1.5619094371795654, 'ave_value': -0.9589275516569614, 'soft_opc': nan} step=2656




2022-04-20 17:21.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172139/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:21.56 [info     ] FQE_20220420172139: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.0001656736236020743, 'time_algorithm_update': 0.0050106795437364696, 'loss': 0.0036581356680800534, 'time_step': 0.005246988262038633, 'init_value': -1.6788344383239746, 'ave_value': -1.0298340940730528, 'soft_opc': nan} step=2822




2022-04-20 17:21.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172139/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:21.57 [info     ] FQE_20220420172139: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00016591204218117586, 'time_algorithm_update': 0.005052817873207919, 'loss': 0.003712596313562244, 'time_step': 0.005295157432556152, 'init_value': -1.744290828704834, 'ave_value': -1.0617840049586988, 'soft_opc': nan} step=2988




2022-04-20 17:21.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172139/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:21.58 [info     ] FQE_20220420172139: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00016405209001288357, 'time_algorithm_update': 0.0049650008419910106, 'loss': 0.0041321811637229635, 'time_step': 0.005201549415128777, 'init_value': -1.8388985395431519, 'ave_value': -1.1261662795871228, 'soft_opc': nan} step=3154




2022-04-20 17:21.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172139/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:21.58 [info     ] FQE_20220420172139: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00016333539801907827, 'time_algorithm_update': 0.0040925468306943595, 'loss': 0.004396707103559635, 'time_step': 0.004326212837035398, 'init_value': -1.9254975318908691, 'ave_value': -1.202689701348946, 'soft_opc': nan} step=3320




2022-04-20 17:21.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172139/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:21.59 [info     ] FQE_20220420172139: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00016801328544157097, 'time_algorithm_update': 0.005004016749830131, 'loss': 0.004645757426238473, 'time_step': 0.0052455620593335256, 'init_value': -1.9482171535491943, 'ave_value': -1.1875736621653175, 'soft_opc': nan} step=3486




2022-04-20 17:21.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172139/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:22.00 [info     ] FQE_20220420172139: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00016580001417412814, 'time_algorithm_update': 0.005052993096500994, 'loss': 0.005017375090672825, 'time_step': 0.005291839680039739, 'init_value': -2.116584300994873, 'ave_value': -1.3016132305737014, 'soft_opc': nan} step=3652




2022-04-20 17:22.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172139/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:22.01 [info     ] FQE_20220420172139: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00016635010041386248, 'time_algorithm_update': 0.004967137991663921, 'loss': 0.005134828278937949, 'time_step': 0.005209378449313612, 'init_value': -2.1980440616607666, 'ave_value': -1.3500883748603834, 'soft_opc': nan} step=3818




2022-04-20 17:22.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172139/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:22.02 [info     ] FQE_20220420172139: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.0001624061400631824, 'time_algorithm_update': 0.005117347441523908, 'loss': 0.0053284404664693105, 'time_step': 0.005351333733064583, 'init_value': -2.2533211708068848, 'ave_value': -1.3636538019084985, 'soft_opc': nan} step=3984




2022-04-20 17:22.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172139/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:22.03 [info     ] FQE_20220420172139: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00016574256391410367, 'time_algorithm_update': 0.005051986280694066, 'loss': 0.005670824146363884, 'time_step': 0.0052897858332438645, 'init_value': -2.299520969390869, 'ave_value': -1.3776330035146291, 'soft_opc': nan} step=4150




2022-04-20 17:22.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172139/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:22.04 [info     ] FQE_20220420172139: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00016836660454072147, 'time_algorithm_update': 0.005126632839800364, 'loss': 0.006061182179241385, 'time_step': 0.00536559863262866, 'init_value': -2.4334311485290527, 'ave_value': -1.4922199692560343, 'soft_opc': nan} step=4316




2022-04-20 17:22.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172139/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:22.05 [info     ] FQE_20220420172139: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00016441402665103776, 'time_algorithm_update': 0.005007545632052134, 'loss': 0.0061916138416475966, 'time_step': 0.005243380385709096, 'init_value': -2.4891815185546875, 'ave_value': -1.5181697044555131, 'soft_opc': nan} step=4482




2022-04-20 17:22.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172139/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:22.06 [info     ] FQE_20220420172139: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00016508044966732162, 'time_algorithm_update': 0.005116579044296081, 'loss': 0.006737902260130182, 'time_step': 0.005355228860694242, 'init_value': -2.618821144104004, 'ave_value': -1.6007371137338178, 'soft_opc': nan} step=4648




2022-04-20 17:22.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172139/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:22.07 [info     ] FQE_20220420172139: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.0001636556832187147, 'time_algorithm_update': 0.004022305270275438, 'loss': 0.007064011000189369, 'time_step': 0.004258622606116605, 'init_value': -2.717221736907959, 'ave_value': -1.647812458241845, 'soft_opc': nan} step=4814




2022-04-20 17:22.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172139/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:22.08 [info     ] FQE_20220420172139: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00016411815781191172, 'time_algorithm_update': 0.005049785935735128, 'loss': 0.007748817423280014, 'time_step': 0.0052876788449574665, 'init_value': -2.7424702644348145, 'ave_value': -1.6697728425398604, 'soft_opc': nan} step=4980




2022-04-20 17:22.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172139/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:22.09 [info     ] FQE_20220420172139: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00016864523830184018, 'time_algorithm_update': 0.004991450941706279, 'loss': 0.008186654046778175, 'time_step': 0.005233348134052323, 'init_value': -2.9133081436157227, 'ave_value': -1.778648470649244, 'soft_opc': nan} step=5146




2022-04-20 17:22.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172139/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:22.10 [info     ] FQE_20220420172139: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00016603268772722726, 'time_algorithm_update': 0.005093468240944736, 'loss': 0.008298841972711933, 'time_step': 0.005331611058798181, 'init_value': -2.9947381019592285, 'ave_value': -1.8363755266358321, 'soft_opc': nan} step=5312




2022-04-20 17:22.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172139/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:22.11 [info     ] FQE_20220420172139: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00017089728849479952, 'time_algorithm_update': 0.005062930555228728, 'loss': 0.008691663575759831, 'time_step': 0.005308547651911357, 'init_value': -3.0297865867614746, 'ave_value': -1.8438344432024267, 'soft_opc': nan} step=5478




2022-04-20 17:22.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172139/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:22.12 [info     ] FQE_20220420172139: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.0001702050128615046, 'time_algorithm_update': 0.005012625671294798, 'loss': 0.009166128046990159, 'time_step': 0.005256721772343279, 'init_value': -3.1245946884155273, 'ave_value': -1.900363475720289, 'soft_opc': nan} step=5644




2022-04-20 17:22.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172139/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:22.13 [info     ] FQE_20220420172139: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.000166877206549587, 'time_algorithm_update': 0.005026617682123759, 'loss': 0.009654513235184285, 'time_step': 0.005268173045422657, 'init_value': -3.1871554851531982, 'ave_value': -1.9441947377110655, 'soft_opc': nan} step=5810




2022-04-20 17:22.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172139/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:22.14 [info     ] FQE_20220420172139: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00016419427940644413, 'time_algorithm_update': 0.004970958433955549, 'loss': 0.009705811962331027, 'time_step': 0.005209252058741558, 'init_value': -3.2620785236358643, 'ave_value': -1.997383572604205, 'soft_opc': nan} step=5976




2022-04-20 17:22.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172139/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:22.15 [info     ] FQE_20220420172139: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00016358674290668532, 'time_algorithm_update': 0.004969796502446553, 'loss': 0.010020925865735835, 'time_step': 0.005205772009240575, 'init_value': -3.39231538772583, 'ave_value': -2.072961948385781, 'soft_opc': nan} step=6142




2022-04-20 17:22.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172139/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:22.15 [info     ] FQE_20220420172139: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00016665315053549158, 'time_algorithm_update': 0.004146771258618458, 'loss': 0.010701041888860783, 'time_step': 0.004384206002017102, 'init_value': -3.522561550140381, 'ave_value': -2.1662411260175274, 'soft_opc': nan} step=6308




2022-04-20 17:22.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172139/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:22.16 [info     ] FQE_20220420172139: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00016554292426051865, 'time_algorithm_update': 0.005026262926768108, 'loss': 0.011445643951490813, 'time_step': 0.005268017929720591, 'init_value': -3.598668336868286, 'ave_value': -2.2172200888160383, 'soft_opc': nan} step=6474




2022-04-20 17:22.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172139/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:22.17 [info     ] FQE_20220420172139: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00016612317188676582, 'time_algorithm_update': 0.005042186702590391, 'loss': 0.012608969787483564, 'time_step': 0.005283815314970821, 'init_value': -3.733886957168579, 'ave_value': -2.31402528297391, 'soft_opc': nan} step=6640




2022-04-20 17:22.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172139/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:22.18 [info     ] FQE_20220420172139: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00016674219843852952, 'time_algorithm_update': 0.0049917425017759025, 'loss': 0.012625011019703248, 'time_step': 0.0052314350403935075, 'init_value': -3.78192400932312, 'ave_value': -2.3296552923684186, 'soft_opc': nan} step=6806




2022-04-20 17:22.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172139/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:22.19 [info     ] FQE_20220420172139: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00016608726547425053, 'time_algorithm_update': 0.005061474191137107, 'loss': 0.01342440635514311, 'time_step': 0.005296042166560529, 'init_value': -3.8259215354919434, 'ave_value': -2.341016437885192, 'soft_opc': nan} step=6972




2022-04-20 17:22.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172139/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:22.20 [info     ] FQE_20220420172139: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.000168405383466238, 'time_algorithm_update': 0.005042568746819554, 'loss': 0.013833462168793586, 'time_step': 0.005283542426235704, 'init_value': -3.934818744659424, 'ave_value': -2.411855064104269, 'soft_opc': nan} step=7138




2022-04-20 17:22.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172139/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:22.21 [info     ] FQE_20220420172139: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.0001653432846069336, 'time_algorithm_update': 0.005106783774961908, 'loss': 0.014343292802342224, 'time_step': 0.00534710108515728, 'init_value': -4.028388977050781, 'ave_value': -2.508908056447635, 'soft_opc': nan} step=7304




2022-04-20 17:22.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172139/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:22.22 [info     ] FQE_20220420172139: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00016801903046757342, 'time_algorithm_update': 0.004958303577928658, 'loss': 0.015230200301115233, 'time_step': 0.0052002855094082385, 'init_value': -4.0699462890625, 'ave_value': -2.5293878095308404, 'soft_opc': nan} step=7470




2022-04-20 17:22.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172139/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:22.23 [info     ] FQE_20220420172139: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.0001667407621820289, 'time_algorithm_update': 0.005045563341623329, 'loss': 0.01634102986693517, 'time_step': 0.005285060549356851, 'init_value': -4.111642360687256, 'ave_value': -2.559736724009922, 'soft_opc': nan} step=7636




2022-04-20 17:22.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172139/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:22.24 [info     ] FQE_20220420172139: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.0001652470554213926, 'time_algorithm_update': 0.0044582981661141635, 'loss': 0.016290801552023334, 'time_step': 0.0046938514134970055, 'init_value': -4.2144694328308105, 'ave_value': -2.6186514988541605, 'soft_opc': nan} step=7802




2022-04-20 17:22.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172139/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:22.25 [info     ] FQE_20220420172139: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.0001678969486650214, 'time_algorithm_update': 0.004947129502353898, 'loss': 0.016748035788754875, 'time_step': 0.005186411271612328, 'init_value': -4.316451072692871, 'ave_value': -2.673037017505985, 'soft_opc': nan} step=7968




2022-04-20 17:22.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172139/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:22.26 [info     ] FQE_20220420172139: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00016497560294277696, 'time_algorithm_update': 0.005071563893053905, 'loss': 0.016815021075869362, 'time_step': 0.005310015506054981, 'init_value': -4.346370697021484, 'ave_value': -2.6685787334467643, 'soft_opc': nan} step=8134




2022-04-20 17:22.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172139/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:22.27 [info     ] FQE_20220420172139: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.0001709820276283356, 'time_algorithm_update': 0.005086736506726368, 'loss': 0.01709184925708673, 'time_step': 0.005334138870239258, 'init_value': -4.391106605529785, 'ave_value': -2.6813051768318488, 'soft_opc': nan} step=8300




2022-04-20 17:22.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172139/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.43210892e-01 -3.25999953e-02
 -1.38000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.28049971e-01  6.00000000e-01  4.67532035e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.87189108e-01  2.46000047e-02
 -8.00013420e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.61927906e-02  2.08952959e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.53789108e-01  1.32000047e-02
  8.79998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -9.71586383e-02 -6.00000000e-01 -5.33093061e-02]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.39489108e-01 -4.75999953e-02
 -9.30001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  1.41298638e-01  5.43892365e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.9573146

2022-04-20 17:22.27 [info     ] Directory is created at d3rlpy_logs/FQE_20220420172227
2022-04-20 17:22.27 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 17:22.27 [debug    ] Building models...
2022-04-20 17:22.27 [debug    ] Models have been built.
2022-04-20 17:22.27 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420172227/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/355 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 17:22.29 [info     ] FQE_20220420172227: epoch=1 step=355 epoch=1 metrics={'time_sample_batch': 0.0001666948828898685, 'time_algorithm_update': 0.004941178711367325, 'loss': 0.029088208032116085, 'time_step': 0.005181593962118659, 'init_value': -1.3140581846237183, 'ave_value': -1.2490674373189148, 'soft_opc': nan} step=355




2022-04-20 17:22.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172227/model_355.pt


Epoch 2/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:22.31 [info     ] FQE_20220420172227: epoch=2 step=710 epoch=2 metrics={'time_sample_batch': 0.00016946322481397173, 'time_algorithm_update': 0.0051743789457939044, 'loss': 0.028177830760537738, 'time_step': 0.00541962637028224, 'init_value': -2.400247097015381, 'ave_value': -2.2839305497197726, 'soft_opc': nan} step=710




2022-04-20 17:22.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172227/model_710.pt


Epoch 3/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:22.33 [info     ] FQE_20220420172227: epoch=3 step=1065 epoch=3 metrics={'time_sample_batch': 0.00016608506860867352, 'time_algorithm_update': 0.0045817710983921105, 'loss': 0.0325594212020367, 'time_step': 0.004819332042210539, 'init_value': -3.1122632026672363, 'ave_value': -2.905236405862958, 'soft_opc': nan} step=1065




2022-04-20 17:22.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172227/model_1065.pt


Epoch 4/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:22.35 [info     ] FQE_20220420172227: epoch=4 step=1420 epoch=4 metrics={'time_sample_batch': 0.00016887490178497743, 'time_algorithm_update': 0.005136356891041071, 'loss': 0.03752217499250677, 'time_step': 0.005383695683009188, 'init_value': -4.071652889251709, 'ave_value': -3.7691218978803285, 'soft_opc': nan} step=1420




2022-04-20 17:22.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172227/model_1420.pt


Epoch 5/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:22.37 [info     ] FQE_20220420172227: epoch=5 step=1775 epoch=5 metrics={'time_sample_batch': 0.0001675350565305898, 'time_algorithm_update': 0.004960772688959686, 'loss': 0.042519458220668244, 'time_step': 0.005203772262788155, 'init_value': -4.701374530792236, 'ave_value': -4.335019950156353, 'soft_opc': nan} step=1775




2022-04-20 17:22.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172227/model_1775.pt


Epoch 6/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:22.39 [info     ] FQE_20220420172227: epoch=6 step=2130 epoch=6 metrics={'time_sample_batch': 0.0001699219287281305, 'time_algorithm_update': 0.005044557678867394, 'loss': 0.05193112984194722, 'time_step': 0.005291803118208764, 'init_value': -5.543394565582275, 'ave_value': -5.05819687422178, 'soft_opc': nan} step=2130




2022-04-20 17:22.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172227/model_2130.pt


Epoch 7/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:22.41 [info     ] FQE_20220420172227: epoch=7 step=2485 epoch=7 metrics={'time_sample_batch': 0.0001705599502778389, 'time_algorithm_update': 0.004984878486310932, 'loss': 0.0605359344074214, 'time_step': 0.005231280394003425, 'init_value': -6.170967102050781, 'ave_value': -5.652099207147208, 'soft_opc': nan} step=2485




2022-04-20 17:22.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172227/model_2485.pt


Epoch 8/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:22.43 [info     ] FQE_20220420172227: epoch=8 step=2840 epoch=8 metrics={'time_sample_batch': 0.00016927719116210938, 'time_algorithm_update': 0.004701445807873363, 'loss': 0.07317770695654859, 'time_step': 0.004945078702040122, 'init_value': -6.841751575469971, 'ave_value': -6.275825110035667, 'soft_opc': nan} step=2840




2022-04-20 17:22.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172227/model_2840.pt


Epoch 9/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:22.45 [info     ] FQE_20220420172227: epoch=9 step=3195 epoch=9 metrics={'time_sample_batch': 0.00017083463534502916, 'time_algorithm_update': 0.005092542272218516, 'loss': 0.08326717073386404, 'time_step': 0.0053366049914292885, 'init_value': -7.316387176513672, 'ave_value': -6.7772207719977455, 'soft_opc': nan} step=3195




2022-04-20 17:22.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172227/model_3195.pt


Epoch 10/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:22.47 [info     ] FQE_20220420172227: epoch=10 step=3550 epoch=10 metrics={'time_sample_batch': 0.00016807569584376375, 'time_algorithm_update': 0.004993463569963482, 'loss': 0.09609220767734757, 'time_step': 0.00523562834296428, 'init_value': -7.993635654449463, 'ave_value': -7.481768109562161, 'soft_opc': nan} step=3550




2022-04-20 17:22.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172227/model_3550.pt


Epoch 11/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:22.49 [info     ] FQE_20220420172227: epoch=11 step=3905 epoch=11 metrics={'time_sample_batch': 0.00016845985197685135, 'time_algorithm_update': 0.005024376721449301, 'loss': 0.11208139601832544, 'time_step': 0.005267511287205656, 'init_value': -8.75005054473877, 'ave_value': -8.297202564068282, 'soft_opc': nan} step=3905




2022-04-20 17:22.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172227/model_3905.pt


Epoch 12/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:22.51 [info     ] FQE_20220420172227: epoch=12 step=4260 epoch=12 metrics={'time_sample_batch': 0.00016690643740371918, 'time_algorithm_update': 0.004611572749178175, 'loss': 0.1277090413836946, 'time_step': 0.004849809324237662, 'init_value': -8.948187828063965, 'ave_value': -8.534756263448081, 'soft_opc': nan} step=4260




2022-04-20 17:22.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172227/model_4260.pt


Epoch 13/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:22.53 [info     ] FQE_20220420172227: epoch=13 step=4615 epoch=13 metrics={'time_sample_batch': 0.00016990648189061123, 'time_algorithm_update': 0.005109869594305334, 'loss': 0.14089707154255937, 'time_step': 0.0053568598250268214, 'init_value': -9.473284721374512, 'ave_value': -9.222776762293496, 'soft_opc': nan} step=4615




2022-04-20 17:22.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172227/model_4615.pt


Epoch 14/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:22.55 [info     ] FQE_20220420172227: epoch=14 step=4970 epoch=14 metrics={'time_sample_batch': 0.00017036317099987621, 'time_algorithm_update': 0.00498532107178594, 'loss': 0.1599584410465519, 'time_step': 0.005230465741224692, 'init_value': -9.832234382629395, 'ave_value': -9.728015040626397, 'soft_opc': nan} step=4970




2022-04-20 17:22.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172227/model_4970.pt


Epoch 15/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:22.57 [info     ] FQE_20220420172227: epoch=15 step=5325 epoch=15 metrics={'time_sample_batch': 0.0001705055505457059, 'time_algorithm_update': 0.005056318095032598, 'loss': 0.17527888100856626, 'time_step': 0.005303933586872799, 'init_value': -10.125393867492676, 'ave_value': -10.20033376725484, 'soft_opc': nan} step=5325




2022-04-20 17:22.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172227/model_5325.pt


Epoch 16/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:22.59 [info     ] FQE_20220420172227: epoch=16 step=5680 epoch=16 metrics={'time_sample_batch': 0.00016916369048642442, 'time_algorithm_update': 0.004808707976005446, 'loss': 0.18534632285820765, 'time_step': 0.005051706878232284, 'init_value': -10.271084785461426, 'ave_value': -10.515230412636926, 'soft_opc': nan} step=5680




2022-04-20 17:22.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172227/model_5680.pt


Epoch 17/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:23.01 [info     ] FQE_20220420172227: epoch=17 step=6035 epoch=17 metrics={'time_sample_batch': 0.00016805823420135068, 'time_algorithm_update': 0.004991266761027591, 'loss': 0.2029010563931415, 'time_step': 0.0052324415932238945, 'init_value': -10.510014533996582, 'ave_value': -11.015650661787836, 'soft_opc': nan} step=6035




2022-04-20 17:23.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172227/model_6035.pt


Epoch 18/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:23.03 [info     ] FQE_20220420172227: epoch=18 step=6390 epoch=18 metrics={'time_sample_batch': 0.00016678554911009024, 'time_algorithm_update': 0.005053778769264759, 'loss': 0.21500043308650943, 'time_step': 0.005294414976952781, 'init_value': -10.687347412109375, 'ave_value': -11.428876737306227, 'soft_opc': nan} step=6390




2022-04-20 17:23.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172227/model_6390.pt


Epoch 19/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:23.05 [info     ] FQE_20220420172227: epoch=19 step=6745 epoch=19 metrics={'time_sample_batch': 0.00016817710769008584, 'time_algorithm_update': 0.005083847045898438, 'loss': 0.23043406842355157, 'time_step': 0.005327982298085387, 'init_value': -10.65092945098877, 'ave_value': -11.798371488519464, 'soft_opc': nan} step=6745




2022-04-20 17:23.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172227/model_6745.pt


Epoch 20/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:23.07 [info     ] FQE_20220420172227: epoch=20 step=7100 epoch=20 metrics={'time_sample_batch': 0.00017134505258479589, 'time_algorithm_update': 0.005141013776752311, 'loss': 0.2432480613373115, 'time_step': 0.005390198801604794, 'init_value': -10.579512596130371, 'ave_value': -11.969884954684412, 'soft_opc': nan} step=7100




2022-04-20 17:23.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172227/model_7100.pt


Epoch 21/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:23.09 [info     ] FQE_20220420172227: epoch=21 step=7455 epoch=21 metrics={'time_sample_batch': 0.00016818113729987345, 'time_algorithm_update': 0.004564011936456384, 'loss': 0.26416910398803967, 'time_step': 0.004806626682550135, 'init_value': -10.637267112731934, 'ave_value': -12.36331262423404, 'soft_opc': nan} step=7455




2022-04-20 17:23.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172227/model_7455.pt


Epoch 22/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:23.11 [info     ] FQE_20220420172227: epoch=22 step=7810 epoch=22 metrics={'time_sample_batch': 0.0001667667442644146, 'time_algorithm_update': 0.005043586542908574, 'loss': 0.2776435137791953, 'time_step': 0.005285923245926978, 'init_value': -10.869072914123535, 'ave_value': -12.963207305285318, 'soft_opc': nan} step=7810




2022-04-20 17:23.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172227/model_7810.pt


Epoch 23/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:23.13 [info     ] FQE_20220420172227: epoch=23 step=8165 epoch=23 metrics={'time_sample_batch': 0.00017028593681227993, 'time_algorithm_update': 0.005049602078719878, 'loss': 0.3046012660986941, 'time_step': 0.0052957662394349, 'init_value': -11.119572639465332, 'ave_value': -13.484766719948759, 'soft_opc': nan} step=8165




2022-04-20 17:23.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172227/model_8165.pt


Epoch 24/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:23.15 [info     ] FQE_20220420172227: epoch=24 step=8520 epoch=24 metrics={'time_sample_batch': 0.00016920868779571964, 'time_algorithm_update': 0.005019928703845386, 'loss': 0.3175416755267012, 'time_step': 0.005265237244082168, 'init_value': -11.0895414352417, 'ave_value': -13.893880892028863, 'soft_opc': nan} step=8520




2022-04-20 17:23.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172227/model_8520.pt


Epoch 25/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:23.17 [info     ] FQE_20220420172227: epoch=25 step=8875 epoch=25 metrics={'time_sample_batch': 0.00016866200406786422, 'time_algorithm_update': 0.004589266172597106, 'loss': 0.32874561945422437, 'time_step': 0.004831431617199535, 'init_value': -11.099093437194824, 'ave_value': -14.086347937650807, 'soft_opc': nan} step=8875




2022-04-20 17:23.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172227/model_8875.pt


Epoch 26/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:23.19 [info     ] FQE_20220420172227: epoch=26 step=9230 epoch=26 metrics={'time_sample_batch': 0.00016928525038168463, 'time_algorithm_update': 0.005078719367443676, 'loss': 0.3400101121357629, 'time_step': 0.005320290444602429, 'init_value': -11.29641056060791, 'ave_value': -14.508009210924595, 'soft_opc': nan} step=9230




2022-04-20 17:23.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172227/model_9230.pt


Epoch 27/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:23.21 [info     ] FQE_20220420172227: epoch=27 step=9585 epoch=27 metrics={'time_sample_batch': 0.00016908779950209067, 'time_algorithm_update': 0.004960761271731954, 'loss': 0.3530863676797336, 'time_step': 0.005204709147063779, 'init_value': -11.3726806640625, 'ave_value': -14.774923143025003, 'soft_opc': nan} step=9585




2022-04-20 17:23.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172227/model_9585.pt


Epoch 28/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:23.23 [info     ] FQE_20220420172227: epoch=28 step=9940 epoch=28 metrics={'time_sample_batch': 0.00017150153576488226, 'time_algorithm_update': 0.005143197825257208, 'loss': 0.3654277610474489, 'time_step': 0.005391498350761306, 'init_value': -11.573732376098633, 'ave_value': -15.095122493145702, 'soft_opc': nan} step=9940




2022-04-20 17:23.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172227/model_9940.pt


Epoch 29/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:23.25 [info     ] FQE_20220420172227: epoch=29 step=10295 epoch=29 metrics={'time_sample_batch': 0.00016807099463234484, 'time_algorithm_update': 0.005037086782321124, 'loss': 0.37568973845894066, 'time_step': 0.005280326117931957, 'init_value': -11.719968795776367, 'ave_value': -15.329104019246728, 'soft_opc': nan} step=10295




2022-04-20 17:23.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172227/model_10295.pt


Epoch 30/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:23.27 [info     ] FQE_20220420172227: epoch=30 step=10650 epoch=30 metrics={'time_sample_batch': 0.00016857066624601124, 'time_algorithm_update': 0.004696372529150734, 'loss': 0.38971975231674355, 'time_step': 0.00493715245958785, 'init_value': -11.982759475708008, 'ave_value': -15.634192976405416, 'soft_opc': nan} step=10650




2022-04-20 17:23.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172227/model_10650.pt


Epoch 31/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:23.29 [info     ] FQE_20220420172227: epoch=31 step=11005 epoch=31 metrics={'time_sample_batch': 0.00017113215486768267, 'time_algorithm_update': 0.005092957322026642, 'loss': 0.4000491769380972, 'time_step': 0.005340024115334094, 'init_value': -12.105294227600098, 'ave_value': -15.815126277538953, 'soft_opc': nan} step=11005




2022-04-20 17:23.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172227/model_11005.pt


Epoch 32/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:23.31 [info     ] FQE_20220420172227: epoch=32 step=11360 epoch=32 metrics={'time_sample_batch': 0.00017189039310938875, 'time_algorithm_update': 0.005142288476648465, 'loss': 0.41226725026860206, 'time_step': 0.0053861013600524045, 'init_value': -12.398706436157227, 'ave_value': -16.068667169153557, 'soft_opc': nan} step=11360




2022-04-20 17:23.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172227/model_11360.pt


Epoch 33/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:23.33 [info     ] FQE_20220420172227: epoch=33 step=11715 epoch=33 metrics={'time_sample_batch': 0.00017260363404179963, 'time_algorithm_update': 0.005137880755142427, 'loss': 0.42599330996753465, 'time_step': 0.005387406953623597, 'init_value': -13.044511795043945, 'ave_value': -16.478408228821255, 'soft_opc': nan} step=11715




2022-04-20 17:23.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172227/model_11715.pt


Epoch 34/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:23.35 [info     ] FQE_20220420172227: epoch=34 step=12070 epoch=34 metrics={'time_sample_batch': 0.00016692322744450098, 'time_algorithm_update': 0.004572780367354272, 'loss': 0.4360525874342297, 'time_step': 0.004815584505108041, 'init_value': -13.436556816101074, 'ave_value': -16.841023695070483, 'soft_opc': nan} step=12070




2022-04-20 17:23.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172227/model_12070.pt


Epoch 35/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:23.37 [info     ] FQE_20220420172227: epoch=35 step=12425 epoch=35 metrics={'time_sample_batch': 0.00016839470661861796, 'time_algorithm_update': 0.005080628730881382, 'loss': 0.46042147065237377, 'time_step': 0.005324670630441585, 'init_value': -14.340057373046875, 'ave_value': -17.559049248841603, 'soft_opc': nan} step=12425




2022-04-20 17:23.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172227/model_12425.pt


Epoch 36/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:23.39 [info     ] FQE_20220420172227: epoch=36 step=12780 epoch=36 metrics={'time_sample_batch': 0.00016838328939088634, 'time_algorithm_update': 0.004971034090283891, 'loss': 0.46766190685751574, 'time_step': 0.005213729428573393, 'init_value': -14.66344928741455, 'ave_value': -17.823615204951363, 'soft_opc': nan} step=12780




2022-04-20 17:23.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172227/model_12780.pt


Epoch 37/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:23.41 [info     ] FQE_20220420172227: epoch=37 step=13135 epoch=37 metrics={'time_sample_batch': 0.00016666197440993617, 'time_algorithm_update': 0.004994432019515776, 'loss': 0.48993329534858043, 'time_step': 0.005236914460088166, 'init_value': -15.420930862426758, 'ave_value': -18.40205096812221, 'soft_opc': nan} step=13135




2022-04-20 17:23.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172227/model_13135.pt


Epoch 38/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:23.43 [info     ] FQE_20220420172227: epoch=38 step=13490 epoch=38 metrics={'time_sample_batch': 0.0001697392530844245, 'time_algorithm_update': 0.004825085653385646, 'loss': 0.5018978395512407, 'time_step': 0.005070902596057301, 'init_value': -15.434141159057617, 'ave_value': -18.184898794632755, 'soft_opc': nan} step=13490




2022-04-20 17:23.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172227/model_13490.pt


Epoch 39/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:23.45 [info     ] FQE_20220420172227: epoch=39 step=13845 epoch=39 metrics={'time_sample_batch': 0.00017313352772887325, 'time_algorithm_update': 0.005065417625534702, 'loss': 0.5002840517849569, 'time_step': 0.0053147483879411725, 'init_value': -16.185155868530273, 'ave_value': -18.628282800063182, 'soft_opc': nan} step=13845




2022-04-20 17:23.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172227/model_13845.pt


Epoch 40/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:23.47 [info     ] FQE_20220420172227: epoch=40 step=14200 epoch=40 metrics={'time_sample_batch': 0.00016945718039929026, 'time_algorithm_update': 0.005068365285094355, 'loss': 0.5137386108997842, 'time_step': 0.005313994850910885, 'init_value': -16.068262100219727, 'ave_value': -18.285205955086862, 'soft_opc': nan} step=14200




2022-04-20 17:23.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172227/model_14200.pt


Epoch 41/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:23.49 [info     ] FQE_20220420172227: epoch=41 step=14555 epoch=41 metrics={'time_sample_batch': 0.0001713853486826722, 'time_algorithm_update': 0.005067652715763576, 'loss': 0.514226769372611, 'time_step': 0.005312974016431352, 'init_value': -16.669769287109375, 'ave_value': -18.486153122576066, 'soft_opc': nan} step=14555




2022-04-20 17:23.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172227/model_14555.pt


Epoch 42/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:23.51 [info     ] FQE_20220420172227: epoch=42 step=14910 epoch=42 metrics={'time_sample_batch': 0.00016866200406786422, 'time_algorithm_update': 0.005013078367206413, 'loss': 0.5255070566868698, 'time_step': 0.005259288868433992, 'init_value': -16.596614837646484, 'ave_value': -18.167429823985508, 'soft_opc': nan} step=14910




2022-04-20 17:23.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172227/model_14910.pt


Epoch 43/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:23.52 [info     ] FQE_20220420172227: epoch=43 step=15265 epoch=43 metrics={'time_sample_batch': 0.0001697204482387489, 'time_algorithm_update': 0.004662820654855648, 'loss': 0.5370316871769831, 'time_step': 0.004906988143920899, 'init_value': -17.320112228393555, 'ave_value': -18.677548184964035, 'soft_opc': nan} step=15265




2022-04-20 17:23.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172227/model_15265.pt


Epoch 44/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:23.55 [info     ] FQE_20220420172227: epoch=44 step=15620 epoch=44 metrics={'time_sample_batch': 0.00017244177804866308, 'time_algorithm_update': 0.0051076015955965285, 'loss': 0.5561697894838494, 'time_step': 0.005355568335089885, 'init_value': -18.07911491394043, 'ave_value': -19.03745363278547, 'soft_opc': nan} step=15620




2022-04-20 17:23.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172227/model_15620.pt


Epoch 45/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:23.57 [info     ] FQE_20220420172227: epoch=45 step=15975 epoch=45 metrics={'time_sample_batch': 0.00017159287358673526, 'time_algorithm_update': 0.005075852300079776, 'loss': 0.5649416460065355, 'time_step': 0.005323038638477594, 'init_value': -18.452425003051758, 'ave_value': -19.303106965289537, 'soft_opc': nan} step=15975




2022-04-20 17:23.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172227/model_15975.pt


Epoch 46/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:23.59 [info     ] FQE_20220420172227: epoch=46 step=16330 epoch=46 metrics={'time_sample_batch': 0.00017041958553690306, 'time_algorithm_update': 0.0051089589024933295, 'loss': 0.5539820780269277, 'time_step': 0.005356196282615125, 'init_value': -19.178205490112305, 'ave_value': -19.791646927618633, 'soft_opc': nan} step=16330




2022-04-20 17:23.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172227/model_16330.pt


Epoch 47/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:24.00 [info     ] FQE_20220420172227: epoch=47 step=16685 epoch=47 metrics={'time_sample_batch': 0.00016756997981541593, 'time_algorithm_update': 0.004584309752558319, 'loss': 0.5848732051767513, 'time_step': 0.004826886217359086, 'init_value': -19.160797119140625, 'ave_value': -19.700805036524645, 'soft_opc': nan} step=16685




2022-04-20 17:24.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172227/model_16685.pt


Epoch 48/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:24.02 [info     ] FQE_20220420172227: epoch=48 step=17040 epoch=48 metrics={'time_sample_batch': 0.00017027720599107338, 'time_algorithm_update': 0.005090113089118205, 'loss': 0.5978822405594335, 'time_step': 0.005333045502783547, 'init_value': -20.042835235595703, 'ave_value': -20.4479516442122, 'soft_opc': nan} step=17040




2022-04-20 17:24.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172227/model_17040.pt


Epoch 49/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:24.05 [info     ] FQE_20220420172227: epoch=49 step=17395 epoch=49 metrics={'time_sample_batch': 0.0001733141885676854, 'time_algorithm_update': 0.0051006511903144945, 'loss': 0.6151393168776388, 'time_step': 0.0053494755650909855, 'init_value': -20.272077560424805, 'ave_value': -20.60656903011717, 'soft_opc': nan} step=17395




2022-04-20 17:24.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172227/model_17395.pt


Epoch 50/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:24.07 [info     ] FQE_20220420172227: epoch=50 step=17750 epoch=50 metrics={'time_sample_batch': 0.00016822009019448723, 'time_algorithm_update': 0.005045420686963578, 'loss': 0.6182690216359538, 'time_step': 0.005290331639034647, 'init_value': -20.423734664916992, 'ave_value': -20.615222873406886, 'soft_opc': nan} step=17750




2022-04-20 17:24.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172227/model_17750.pt
search iteration:  23
using hyper params:  [0.003086718887050156, 0.005364417074933797, 6.0409768478269284e-05, 5]
2022-04-20 17:24.07 [debug    ] RoundIterator is selected.
2022-04-20 17:24.07 [info     ] Directory is created at d3rlpy_logs/TD3PlusBC_20220420172407
2022-04-20 17:24.07 [debug    ] Fitting scaler...              scaler=standard
2022-04-20 17:24.07 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 17:24.07 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 17:24.07 [info     ] Parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172407/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0030867188870

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:24.10 [info     ] TD3PlusBC_20220420172407: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.0003658595838044819, 'time_algorithm_update': 0.008306570917542219, 'critic_loss': 6.250624298003682, 'actor_loss': 2.6412388832248443, 'time_step': 0.00874969276071292, 'td_error': 0.8999619783120923, 'init_value': -8.118374824523926, 'ave_value': -5.0560765298547405} step=342
2022-04-20 17:24.10 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172407/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:24.14 [info     ] TD3PlusBC_20220420172407: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.0003693173503318028, 'time_algorithm_update': 0.008840760292365538, 'critic_loss': 3.1904242828226925, 'actor_loss': 2.5390159107788266, 'time_step': 0.00928479816481384, 'td_error': 1.0488056330816637, 'init_value': -11.470375061035156, 'ave_value': -7.143938960380147} step=684
2022-04-20 17:24.14 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172407/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:24.18 [info     ] TD3PlusBC_20220420172407: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.00036756755315769485, 'time_algorithm_update': 0.008789427099172135, 'critic_loss': 5.02087733114672, 'actor_loss': 2.5279601303457517, 'time_step': 0.009231074511656287, 'td_error': 1.246918814121551, 'init_value': -14.931289672851562, 'ave_value': -9.364016537588165} step=1026
2022-04-20 17:24.18 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172407/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:24.21 [info     ] TD3PlusBC_20220420172407: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.0003704641297546744, 'time_algorithm_update': 0.008432316501238192, 'critic_loss': 7.202798902639869, 'actor_loss': 2.5229128104204324, 'time_step': 0.008878465284380996, 'td_error': 1.5102165565972865, 'init_value': -18.63726806640625, 'ave_value': -11.717512873421919} step=1368
2022-04-20 17:24.21 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172407/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:24.25 [info     ] TD3PlusBC_20220420172407: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.0003742920724969161, 'time_algorithm_update': 0.008965846390752067, 'critic_loss': 9.774935123516105, 'actor_loss': 2.5223433943519815, 'time_step': 0.009417690031709726, 'td_error': 1.8152559137827065, 'init_value': -21.947154998779297, 'ave_value': -13.776173139257768} step=1710
2022-04-20 17:24.25 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172407/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:24.29 [info     ] TD3PlusBC_20220420172407: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.0003691193653128998, 'time_algorithm_update': 0.008481470464962965, 'critic_loss': 12.711382485272592, 'actor_loss': 2.5197447712658443, 'time_step': 0.008921665754931711, 'td_error': 2.1534159183456114, 'init_value': -25.474720001220703, 'ave_value': -16.1855984101056} step=2052
2022-04-20 17:24.29 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172407/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:24.32 [info     ] TD3PlusBC_20220420172407: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.0003707457704153674, 'time_algorithm_update': 0.008966923457140114, 'critic_loss': 15.939078717203865, 'actor_loss': 2.5201775609401236, 'time_step': 0.009409500841508833, 'td_error': 2.467967683299228, 'init_value': -29.0863037109375, 'ave_value': -18.45216062155611} step=2394
2022-04-20 17:24.32 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172407/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:24.36 [info     ] TD3PlusBC_20220420172407: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.0003700521257188585, 'time_algorithm_update': 0.00883031309696666, 'critic_loss': 19.31626445786995, 'actor_loss': 2.519009848087155, 'time_step': 0.00927291348663687, 'td_error': 2.959745327651496, 'init_value': -32.27292251586914, 'ave_value': -20.37125985919498} step=2736
2022-04-20 17:24.36 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172407/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:24.39 [info     ] TD3PlusBC_20220420172407: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.0003743276261446769, 'time_algorithm_update': 0.008364579133820115, 'critic_loss': 22.98434577908432, 'actor_loss': 2.518682919050518, 'time_step': 0.008812129148962901, 'td_error': 3.2193696038057777, 'init_value': -35.589988708496094, 'ave_value': -22.51271597688531} step=3078
2022-04-20 17:24.40 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172407/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:24.43 [info     ] TD3PlusBC_20220420172407: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.0003689241688153897, 'time_algorithm_update': 0.008882017163505332, 'critic_loss': 26.94179119422422, 'actor_loss': 2.519174433591073, 'time_step': 0.009325921186926768, 'td_error': 3.5658164250371644, 'init_value': -39.124183654785156, 'ave_value': -24.554958827877584} step=3420
2022-04-20 17:24.43 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172407/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:24.47 [info     ] TD3PlusBC_20220420172407: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.0003712965033904851, 'time_algorithm_update': 0.008032971655416209, 'critic_loss': 30.921884355489272, 'actor_loss': 2.5175052665130435, 'time_step': 0.008476827576843619, 'td_error': 3.9250153336157423, 'init_value': -41.99895477294922, 'ave_value': -26.477780707699942} step=3762
2022-04-20 17:24.47 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172407/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:24.49 [info     ] TD3PlusBC_20220420172407: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.00037403831705015304, 'time_algorithm_update': 0.006637288813005414, 'critic_loss': 35.12801044307954, 'actor_loss': 2.5191127431322955, 'time_step': 0.007083578416478564, 'td_error': 4.354325078349458, 'init_value': -44.92425537109375, 'ave_value': -28.342330526442996} step=4104
2022-04-20 17:24.49 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172407/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:24.52 [info     ] TD3PlusBC_20220420172407: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.0003677292873984889, 'time_algorithm_update': 0.006739550166659885, 'critic_loss': 39.75112152657314, 'actor_loss': 2.5188483773616324, 'time_step': 0.0071764640640794184, 'td_error': 4.698189895032226, 'init_value': -47.13192367553711, 'ave_value': -30.036891869474957} step=4446
2022-04-20 17:24.52 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172407/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:24.55 [info     ] TD3PlusBC_20220420172407: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.0003710051029049165, 'time_algorithm_update': 0.006722481627213328, 'critic_loss': 43.546680355629725, 'actor_loss': 2.518072340223524, 'time_step': 0.007164620516592996, 'td_error': 4.887636309105123, 'init_value': -48.72272491455078, 'ave_value': -31.15512474188128} step=4788
2022-04-20 17:24.55 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172407/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:24.58 [info     ] TD3PlusBC_20220420172407: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.0003705742763496979, 'time_algorithm_update': 0.006747025495384171, 'critic_loss': 48.08611191643609, 'actor_loss': 2.519104652237474, 'time_step': 0.007191561816031472, 'td_error': 5.491188030968537, 'init_value': -53.109275817871094, 'ave_value': -33.535627720810325} step=5130
2022-04-20 17:24.58 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172407/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:25.01 [info     ] TD3PlusBC_20220420172407: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.0003711243121944673, 'time_algorithm_update': 0.006691551347922164, 'critic_loss': 51.955898552610165, 'actor_loss': 2.5184416422369886, 'time_step': 0.007136672560931646, 'td_error': 5.864810526452389, 'init_value': -53.692115783691406, 'ave_value': -34.72410146621618} step=5472
2022-04-20 17:25.01 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172407/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:25.04 [info     ] TD3PlusBC_20220420172407: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.00036653510311193633, 'time_algorithm_update': 0.0066875484254625105, 'critic_loss': 56.631000078212445, 'actor_loss': 2.519128227791591, 'time_step': 0.007129995446456106, 'td_error': 6.438312041512271, 'init_value': -56.7078742980957, 'ave_value': -36.23141358496722} step=5814
2022-04-20 17:25.04 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172407/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:25.06 [info     ] TD3PlusBC_20220420172407: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.0003691047255755865, 'time_algorithm_update': 0.006702655240109092, 'critic_loss': 61.00857787662082, 'actor_loss': 2.519040728173061, 'time_step': 0.00714555957861114, 'td_error': 6.919945732490011, 'init_value': -59.334869384765625, 'ave_value': -37.927053103791046} step=6156
2022-04-20 17:25.06 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172407/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:25.09 [info     ] TD3PlusBC_20220420172407: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.0003671959826820775, 'time_algorithm_update': 0.006681250549896419, 'critic_loss': 65.33574769092583, 'actor_loss': 2.520240403058236, 'time_step': 0.007125436911108898, 'td_error': 6.89322815979885, 'init_value': -59.773475646972656, 'ave_value': -39.102444728586725} step=6498
2022-04-20 17:25.09 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172407/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:25.12 [info     ] TD3PlusBC_20220420172407: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.0003753621675814802, 'time_algorithm_update': 0.006668119402656778, 'critic_loss': 69.76761676275243, 'actor_loss': 2.5207622232492906, 'time_step': 0.007119555919491059, 'td_error': 7.615859894450557, 'init_value': -61.90105438232422, 'ave_value': -40.44907121270039} step=6840
2022-04-20 17:25.12 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172407/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:25.15 [info     ] TD3PlusBC_20220420172407: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.0003674650749965021, 'time_algorithm_update': 0.006685924808881436, 'critic_loss': 73.77740267703408, 'actor_loss': 2.520568539524636, 'time_step': 0.007129105211001391, 'td_error': 7.944424191344745, 'init_value': -62.72239303588867, 'ave_value': -41.300099831818194} step=7182
2022-04-20 17:25.15 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172407/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:25.18 [info     ] TD3PlusBC_20220420172407: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.00037430880362527414, 'time_algorithm_update': 0.00669102292311819, 'critic_loss': 77.86393494633903, 'actor_loss': 2.5213695659972073, 'time_step': 0.007139658370213202, 'td_error': 8.428158006225408, 'init_value': -65.49810791015625, 'ave_value': -42.92187154692036} step=7524
2022-04-20 17:25.18 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172407/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:25.21 [info     ] TD3PlusBC_20220420172407: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.000370031211808411, 'time_algorithm_update': 0.006674546247337296, 'critic_loss': 81.20329973990457, 'actor_loss': 2.5235277811686196, 'time_step': 0.007119151583889075, 'td_error': 8.36624707562207, 'init_value': -65.97764587402344, 'ave_value': -43.698221444735424} step=7866
2022-04-20 17:25.21 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172407/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:25.24 [info     ] TD3PlusBC_20220420172407: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.000368275837591517, 'time_algorithm_update': 0.006718885131746705, 'critic_loss': 85.2673151228163, 'actor_loss': 2.5220889905739945, 'time_step': 0.007158291967291581, 'td_error': 9.381574296220146, 'init_value': -68.88282775878906, 'ave_value': -45.31740704385346} step=8208
2022-04-20 17:25.24 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172407/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:25.26 [info     ] TD3PlusBC_20220420172407: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.0003712400358322768, 'time_algorithm_update': 0.006722473261649148, 'critic_loss': 88.78445488109924, 'actor_loss': 2.521425733789366, 'time_step': 0.007167415312159131, 'td_error': 9.079741363664937, 'init_value': -68.51075744628906, 'ave_value': -46.07244826651321} step=8550
2022-04-20 17:25.26 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172407/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:25.29 [info     ] TD3PlusBC_20220420172407: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.00036732146614476254, 'time_algorithm_update': 0.006387736365111947, 'critic_loss': 92.43180133306493, 'actor_loss': 2.5215239789750843, 'time_step': 0.006830655343351308, 'td_error': 9.500050494672989, 'init_value': -69.56451416015625, 'ave_value': -46.983044716164066} step=8892
2022-04-20 17:25.29 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172407/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:25.32 [info     ] TD3PlusBC_20220420172407: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.00036744625247709935, 'time_algorithm_update': 0.0067744387520684134, 'critic_loss': 95.65689439383166, 'actor_loss': 2.5227798802113672, 'time_step': 0.007218653695625171, 'td_error': 9.918672616090646, 'init_value': -70.94572448730469, 'ave_value': -48.18819954146874} step=9234
2022-04-20 17:25.32 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172407/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:25.35 [info     ] TD3PlusBC_20220420172407: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.0003656525360910516, 'time_algorithm_update': 0.006755071076733327, 'critic_loss': 98.7497229994389, 'actor_loss': 2.5238797413675407, 'time_step': 0.007194822991800587, 'td_error': 10.54067013368254, 'init_value': -73.12467956542969, 'ave_value': -49.195448522455905} step=9576
2022-04-20 17:25.35 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172407/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:25.38 [info     ] TD3PlusBC_20220420172407: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.0003711619572332728, 'time_algorithm_update': 0.006730840917219196, 'critic_loss': 101.89958624811898, 'actor_loss': 2.523600705185829, 'time_step': 0.007165413850929305, 'td_error': 10.957355910535387, 'init_value': -74.3215560913086, 'ave_value': -50.07466562150902} step=9918
2022-04-20 17:25.38 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172407/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:25.41 [info     ] TD3PlusBC_20220420172407: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.0003682932658502233, 'time_algorithm_update': 0.006796539178368641, 'critic_loss': 104.88634466427808, 'actor_loss': 2.524003735759802, 'time_step': 0.007225806252998218, 'td_error': 10.96232610305875, 'init_value': -72.92420959472656, 'ave_value': -50.25547414012119} step=10260
2022-04-20 17:25.41 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172407/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:25.44 [info     ] TD3PlusBC_20220420172407: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.0003645761668333533, 'time_algorithm_update': 0.006830727844907527, 'critic_loss': 107.39839646411919, 'actor_loss': 2.5248258992245325, 'time_step': 0.007253378455401861, 'td_error': 11.132192909488683, 'init_value': -74.67569732666016, 'ave_value': -51.390155449871074} step=10602
2022-04-20 17:25.44 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172407/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:25.47 [info     ] TD3PlusBC_20220420172407: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.00036973702280144945, 'time_algorithm_update': 0.0068383328398765875, 'critic_loss': 110.44870026348627, 'actor_loss': 2.5232190569938973, 'time_step': 0.007267747009009646, 'td_error': 11.576034825126595, 'init_value': -73.66495513916016, 'ave_value': -52.14847526993007} step=10944
2022-04-20 17:25.47 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172407/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:25.49 [info     ] TD3PlusBC_20220420172407: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.0003670307627895422, 'time_algorithm_update': 0.006725585251523738, 'critic_loss': 113.64564122651753, 'actor_loss': 2.525086165868748, 'time_step': 0.007156351853532401, 'td_error': 11.84807735854043, 'init_value': -76.3465805053711, 'ave_value': -53.14436462552312} step=11286
2022-04-20 17:25.49 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172407/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:25.52 [info     ] TD3PlusBC_20220420172407: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.0003694058858860306, 'time_algorithm_update': 0.006842338550857633, 'critic_loss': 115.74468144199304, 'actor_loss': 2.5258001040296945, 'time_step': 0.007275144259134929, 'td_error': 12.16846483085857, 'init_value': -77.43458557128906, 'ave_value': -53.66903052276806} step=11628
2022-04-20 17:25.52 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172407/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:25.55 [info     ] TD3PlusBC_20220420172407: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.00037037489707009835, 'time_algorithm_update': 0.006747300164741382, 'critic_loss': 118.42880521183126, 'actor_loss': 2.525028609392936, 'time_step': 0.007175237811796847, 'td_error': 12.28797687338405, 'init_value': -77.00605773925781, 'ave_value': -54.146745151192754} step=11970
2022-04-20 17:25.55 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172407/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:25.58 [info     ] TD3PlusBC_20220420172407: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.00036857839216265765, 'time_algorithm_update': 0.006770426766914234, 'critic_loss': 120.68767538684153, 'actor_loss': 2.5267842527021442, 'time_step': 0.007204439904954698, 'td_error': 12.282441417617232, 'init_value': -75.55563354492188, 'ave_value': -54.74746454364444} step=12312
2022-04-20 17:25.58 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172407/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:26.01 [info     ] TD3PlusBC_20220420172407: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.000366166321157712, 'time_algorithm_update': 0.0067779411349380226, 'critic_loss': 123.16862017090557, 'actor_loss': 2.5272614886189064, 'time_step': 0.007207609756648192, 'td_error': 12.362095905713252, 'init_value': -76.09288787841797, 'ave_value': -55.09227489140942} step=12654
2022-04-20 17:26.01 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172407/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:26.04 [info     ] TD3PlusBC_20220420172407: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.0003658219387656764, 'time_algorithm_update': 0.006800199112696954, 'critic_loss': 125.51033269871049, 'actor_loss': 2.527082759734483, 'time_step': 0.00722552252094648, 'td_error': 12.751289477642937, 'init_value': -77.10458374023438, 'ave_value': -55.917294229763634} step=12996
2022-04-20 17:26.04 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172407/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:26.07 [info     ] TD3PlusBC_20220420172407: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.0003656999409547326, 'time_algorithm_update': 0.0067221644329048735, 'critic_loss': 127.76200322379843, 'actor_loss': 2.528495264332197, 'time_step': 0.007152837619446872, 'td_error': 13.487548217656895, 'init_value': -78.54148864746094, 'ave_value': -56.87142528736192} step=13338
2022-04-20 17:26.07 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172407/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:26.09 [info     ] TD3PlusBC_20220420172407: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.00036097688284533763, 'time_algorithm_update': 0.006745185071264791, 'critic_loss': 129.71403300413613, 'actor_loss': 2.5271042327434694, 'time_step': 0.007170662545321281, 'td_error': 13.190855912229388, 'init_value': -78.2584457397461, 'ave_value': -57.125813551389086} step=13680
2022-04-20 17:26.10 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172407/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:26.12 [info     ] TD3PlusBC_20220420172407: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.00036657971945422435, 'time_algorithm_update': 0.006705067311114038, 'critic_loss': 131.55866788005272, 'actor_loss': 2.528756502776118, 'time_step': 0.007138397261413217, 'td_error': 13.503043507122895, 'init_value': -77.4996109008789, 'ave_value': -57.418404285249274} step=14022
2022-04-20 17:26.12 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172407/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:26.15 [info     ] TD3PlusBC_20220420172407: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.0003657682597288611, 'time_algorithm_update': 0.00682061108929372, 'critic_loss': 133.65608503107438, 'actor_loss': 2.528836576562179, 'time_step': 0.007247840451915362, 'td_error': 13.708894971083332, 'init_value': -79.00059509277344, 'ave_value': -58.009669692605094} step=14364
2022-04-20 17:26.15 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172407/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:26.18 [info     ] TD3PlusBC_20220420172407: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.00036441791824430055, 'time_algorithm_update': 0.00674010577954744, 'critic_loss': 135.38618259541474, 'actor_loss': 2.5287599703024704, 'time_step': 0.007167348387645699, 'td_error': 13.241793740853877, 'init_value': -77.23316192626953, 'ave_value': -57.95673252219586} step=14706
2022-04-20 17:26.18 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172407/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:26.21 [info     ] TD3PlusBC_20220420172407: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.0003671855257268538, 'time_algorithm_update': 0.006664255906266776, 'critic_loss': 137.0930795948408, 'actor_loss': 2.52969377361543, 'time_step': 0.0070963937636704475, 'td_error': 14.123895981366802, 'init_value': -78.45763397216797, 'ave_value': -58.97780267311646} step=15048
2022-04-20 17:26.21 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172407/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:26.24 [info     ] TD3PlusBC_20220420172407: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.00037281764181036696, 'time_algorithm_update': 0.006644485289590401, 'critic_loss': 138.89080641004773, 'actor_loss': 2.5288654642495496, 'time_step': 0.007080545202333328, 'td_error': 13.707734515442366, 'init_value': -80.55091857910156, 'ave_value': -59.50717148704706} step=15390
2022-04-20 17:26.24 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172407/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:26.27 [info     ] TD3PlusBC_20220420172407: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.0003638455742283871, 'time_algorithm_update': 0.0067449180703414114, 'critic_loss': 140.5250593709667, 'actor_loss': 2.5296419099060414, 'time_step': 0.007173119929798862, 'td_error': 14.360586119465893, 'init_value': -77.9207992553711, 'ave_value': -59.617806502053064} step=15732
2022-04-20 17:26.27 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172407/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:26.29 [info     ] TD3PlusBC_20220420172407: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.0003697879133168717, 'time_algorithm_update': 0.006628316048293086, 'critic_loss': 142.08510509290193, 'actor_loss': 2.5306215091058384, 'time_step': 0.007061930427774352, 'td_error': 14.655232653703424, 'init_value': -79.14509582519531, 'ave_value': -60.21440394368275} step=16074
2022-04-20 17:26.29 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172407/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:26.32 [info     ] TD3PlusBC_20220420172407: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.0003616921385826423, 'time_algorithm_update': 0.006745769963626973, 'critic_loss': 143.20493082414595, 'actor_loss': 2.5313160293980648, 'time_step': 0.007166793471888492, 'td_error': 14.76358637489181, 'init_value': -79.66455078125, 'ave_value': -60.596170928619536} step=16416
2022-04-20 17:26.32 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172407/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:26.35 [info     ] TD3PlusBC_20220420172407: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.00036346703244928726, 'time_algorithm_update': 0.006797572325544747, 'critic_loss': 144.39700125532542, 'actor_loss': 2.5315264233371666, 'time_step': 0.007224149871290776, 'td_error': 14.88860940855941, 'init_value': -77.59718322753906, 'ave_value': -60.504285569895764} step=16758
2022-04-20 17:26.35 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172407/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:26.38 [info     ] TD3PlusBC_20220420172407: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.0003686348597208659, 'time_algorithm_update': 0.006736833449692754, 'critic_loss': 145.76326156080816, 'actor_loss': 2.532278074855693, 'time_step': 0.007167745054813854, 'td_error': 15.206216675670241, 'init_value': -80.19593811035156, 'ave_value': -61.37586585999408} step=17100
2022-04-20 17:26.38 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172407/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 102 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.28189108e-01  

Epoch 1/50:   0%|          | 0/177 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 17:26.39 [info     ] FQE_20220420172638: epoch=1 step=177 epoch=1 metrics={'time_sample_batch': 0.00015277647029208598, 'time_algorithm_update': 0.003407243954933296, 'loss': 0.008033321271939887, 'time_step': 0.003631435545150843, 'init_value': -0.5569262504577637, 'ave_value': -0.4820586718864985, 'soft_opc': nan} step=177




2022-04-20 17:26.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172638/model_177.pt


Epoch 2/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:26.40 [info     ] FQE_20220420172638: epoch=2 step=354 epoch=2 metrics={'time_sample_batch': 0.00015655614561953788, 'time_algorithm_update': 0.003501361372780665, 'loss': 0.006106435114749323, 'time_step': 0.003726441981428761, 'init_value': -0.7040796875953674, 'ave_value': -0.56230723519583, 'soft_opc': nan} step=354




2022-04-20 17:26.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172638/model_354.pt


Epoch 3/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:26.40 [info     ] FQE_20220420172638: epoch=3 step=531 epoch=3 metrics={'time_sample_batch': 0.00015594595569675253, 'time_algorithm_update': 0.003501202427061264, 'loss': 0.005518884591410978, 'time_step': 0.0037295090950141517, 'init_value': -0.7589719295501709, 'ave_value': -0.5788541438969764, 'soft_opc': nan} step=531




2022-04-20 17:26.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172638/model_531.pt


Epoch 4/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:26.41 [info     ] FQE_20220420172638: epoch=4 step=708 epoch=4 metrics={'time_sample_batch': 0.00015499497537559034, 'time_algorithm_update': 0.0034482708085054733, 'loss': 0.00540038944474595, 'time_step': 0.0036710211112674345, 'init_value': -0.8057315349578857, 'ave_value': -0.6105317441461322, 'soft_opc': nan} step=708




2022-04-20 17:26.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172638/model_708.pt


Epoch 5/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:26.42 [info     ] FQE_20220420172638: epoch=5 step=885 epoch=5 metrics={'time_sample_batch': 0.00015753810688600703, 'time_algorithm_update': 0.0035503301243324063, 'loss': 0.005225546052942505, 'time_step': 0.0037778393696930447, 'init_value': -0.8385007381439209, 'ave_value': -0.6338949284843496, 'soft_opc': nan} step=885




2022-04-20 17:26.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172638/model_885.pt


Epoch 6/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:26.43 [info     ] FQE_20220420172638: epoch=6 step=1062 epoch=6 metrics={'time_sample_batch': 0.00015748961497161348, 'time_algorithm_update': 0.003475509794418421, 'loss': 0.0050539405369278735, 'time_step': 0.003700695468881036, 'init_value': -0.8664038777351379, 'ave_value': -0.6633916613933918, 'soft_opc': nan} step=1062




2022-04-20 17:26.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172638/model_1062.pt


Epoch 7/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:26.43 [info     ] FQE_20220420172638: epoch=7 step=1239 epoch=7 metrics={'time_sample_batch': 0.00015512832814017257, 'time_algorithm_update': 0.0034342445222671422, 'loss': 0.004914271814938066, 'time_step': 0.0036610196539237673, 'init_value': -0.8396084904670715, 'ave_value': -0.6462443312188169, 'soft_opc': nan} step=1239




2022-04-20 17:26.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172638/model_1239.pt


Epoch 8/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:26.44 [info     ] FQE_20220420172638: epoch=8 step=1416 epoch=8 metrics={'time_sample_batch': 0.00015778595444846289, 'time_algorithm_update': 0.0034308514352572165, 'loss': 0.004712938131122602, 'time_step': 0.003662494616319904, 'init_value': -0.8374332785606384, 'ave_value': -0.6387265643200001, 'soft_opc': nan} step=1416




2022-04-20 17:26.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172638/model_1416.pt


Epoch 9/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:26.45 [info     ] FQE_20220420172638: epoch=9 step=1593 epoch=9 metrics={'time_sample_batch': 0.00015773476853882525, 'time_algorithm_update': 0.00342994355885996, 'loss': 0.004672185416559631, 'time_step': 0.003654478633471128, 'init_value': -0.8338083624839783, 'ave_value': -0.6509908781842785, 'soft_opc': nan} step=1593




2022-04-20 17:26.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172638/model_1593.pt


Epoch 10/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:26.45 [info     ] FQE_20220420172638: epoch=10 step=1770 epoch=10 metrics={'time_sample_batch': 0.00015985898378878663, 'time_algorithm_update': 0.003410152122799286, 'loss': 0.004651554957516281, 'time_step': 0.0036430574406338275, 'init_value': -0.8676543235778809, 'ave_value': -0.6795002086563511, 'soft_opc': nan} step=1770




2022-04-20 17:26.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172638/model_1770.pt


Epoch 11/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:26.46 [info     ] FQE_20220420172638: epoch=11 step=1947 epoch=11 metrics={'time_sample_batch': 0.00015541119764080156, 'time_algorithm_update': 0.0035666382245424775, 'loss': 0.004928336562957131, 'time_step': 0.003788965570051118, 'init_value': -0.9184873104095459, 'ave_value': -0.7150776862740159, 'soft_opc': nan} step=1947




2022-04-20 17:26.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172638/model_1947.pt


Epoch 12/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:26.47 [info     ] FQE_20220420172638: epoch=12 step=2124 epoch=12 metrics={'time_sample_batch': 0.00016096621583410576, 'time_algorithm_update': 0.003401112421757757, 'loss': 0.005053439332135442, 'time_step': 0.003632032265097408, 'init_value': -0.9766963124275208, 'ave_value': -0.7614436466414649, 'soft_opc': nan} step=2124




2022-04-20 17:26.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172638/model_2124.pt


Epoch 13/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:26.48 [info     ] FQE_20220420172638: epoch=13 step=2301 epoch=13 metrics={'time_sample_batch': 0.00015682285114870234, 'time_algorithm_update': 0.003531499097576249, 'loss': 0.005290022359287503, 'time_step': 0.003760382280511371, 'init_value': -0.9582862854003906, 'ave_value': -0.743494898962366, 'soft_opc': nan} step=2301




2022-04-20 17:26.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172638/model_2301.pt


Epoch 14/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:26.48 [info     ] FQE_20220420172638: epoch=14 step=2478 epoch=14 metrics={'time_sample_batch': 0.00015968522109554312, 'time_algorithm_update': 0.004061949455131919, 'loss': 0.006007613570134729, 'time_step': 0.004293829707776086, 'init_value': -1.0311533212661743, 'ave_value': -0.7901637276550671, 'soft_opc': nan} step=2478




2022-04-20 17:26.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172638/model_2478.pt


Epoch 15/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:26.49 [info     ] FQE_20220420172638: epoch=15 step=2655 epoch=15 metrics={'time_sample_batch': 0.00016113728453210518, 'time_algorithm_update': 0.005135699180559923, 'loss': 0.0064662111243022615, 'time_step': 0.005370044438852428, 'init_value': -1.0235363245010376, 'ave_value': -0.7802555612526141, 'soft_opc': nan} step=2655




2022-04-20 17:26.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172638/model_2655.pt


Epoch 16/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:26.50 [info     ] FQE_20220420172638: epoch=16 step=2832 epoch=16 metrics={'time_sample_batch': 0.00016334366663701118, 'time_algorithm_update': 0.00504467177525752, 'loss': 0.007005023643095572, 'time_step': 0.005280896095232775, 'init_value': -1.0446025133132935, 'ave_value': -0.8033553793243281, 'soft_opc': nan} step=2832




2022-04-20 17:26.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172638/model_2832.pt


Epoch 17/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:26.51 [info     ] FQE_20220420172638: epoch=17 step=3009 epoch=17 metrics={'time_sample_batch': 0.00016791807056146826, 'time_algorithm_update': 0.0051345932955122265, 'loss': 0.00778751830783499, 'time_step': 0.005377238753151759, 'init_value': -1.1929889917373657, 'ave_value': -0.9211682576137978, 'soft_opc': nan} step=3009




2022-04-20 17:26.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172638/model_3009.pt


Epoch 18/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:26.52 [info     ] FQE_20220420172638: epoch=18 step=3186 epoch=18 metrics={'time_sample_batch': 0.00016499373872401352, 'time_algorithm_update': 0.004972857944035934, 'loss': 0.008629170289831394, 'time_step': 0.005209036466092039, 'init_value': -1.1342158317565918, 'ave_value': -0.8808080465362237, 'soft_opc': nan} step=3186




2022-04-20 17:26.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172638/model_3186.pt


Epoch 19/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:26.53 [info     ] FQE_20220420172638: epoch=19 step=3363 epoch=19 metrics={'time_sample_batch': 0.0001649021428857146, 'time_algorithm_update': 0.005082725805077849, 'loss': 0.009583223237535037, 'time_step': 0.005320056010100801, 'init_value': -1.247676968574524, 'ave_value': -0.9881218028766615, 'soft_opc': nan} step=3363




2022-04-20 17:26.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172638/model_3363.pt


Epoch 20/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:26.54 [info     ] FQE_20220420172638: epoch=20 step=3540 epoch=20 metrics={'time_sample_batch': 0.00017573604475980425, 'time_algorithm_update': 0.005109035362631588, 'loss': 0.009895415352473277, 'time_step': 0.005359603860283975, 'init_value': -1.2552136182785034, 'ave_value': -0.9841538246307108, 'soft_opc': nan} step=3540




2022-04-20 17:26.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172638/model_3540.pt


Epoch 21/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:26.55 [info     ] FQE_20220420172638: epoch=21 step=3717 epoch=21 metrics={'time_sample_batch': 0.0001680164013878774, 'time_algorithm_update': 0.005015184650313383, 'loss': 0.009819774991059202, 'time_step': 0.005258676022459558, 'init_value': -1.2803958654403687, 'ave_value': -1.0176396344561836, 'soft_opc': nan} step=3717




2022-04-20 17:26.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172638/model_3717.pt


Epoch 22/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:26.56 [info     ] FQE_20220420172638: epoch=22 step=3894 epoch=22 metrics={'time_sample_batch': 0.00016273886470471397, 'time_algorithm_update': 0.0047109692783679, 'loss': 0.011438013647688226, 'time_step': 0.004947205721321753, 'init_value': -1.3207160234451294, 'ave_value': -1.0470445899365544, 'soft_opc': nan} step=3894




2022-04-20 17:26.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172638/model_3894.pt


Epoch 23/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:26.57 [info     ] FQE_20220420172638: epoch=23 step=4071 epoch=23 metrics={'time_sample_batch': 0.0001612383093537584, 'time_algorithm_update': 0.0046317011623059285, 'loss': 0.011908295826123511, 'time_step': 0.004864937841555493, 'init_value': -1.3185062408447266, 'ave_value': -1.0474557768978603, 'soft_opc': nan} step=4071




2022-04-20 17:26.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172638/model_4071.pt


Epoch 24/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:26.58 [info     ] FQE_20220420172638: epoch=24 step=4248 epoch=24 metrics={'time_sample_batch': 0.00016257991898531294, 'time_algorithm_update': 0.005033005428853008, 'loss': 0.012605842729072786, 'time_step': 0.005267851770260913, 'init_value': -1.3942769765853882, 'ave_value': -1.1197414634083664, 'soft_opc': nan} step=4248




2022-04-20 17:26.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172638/model_4248.pt


Epoch 25/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:26.59 [info     ] FQE_20220420172638: epoch=25 step=4425 epoch=25 metrics={'time_sample_batch': 0.00016653335700600835, 'time_algorithm_update': 0.0050565482532910705, 'loss': 0.01334790950825196, 'time_step': 0.005294317579538809, 'init_value': -1.4443553686141968, 'ave_value': -1.1188687398638184, 'soft_opc': nan} step=4425




2022-04-20 17:26.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172638/model_4425.pt


Epoch 26/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:27.00 [info     ] FQE_20220420172638: epoch=26 step=4602 epoch=26 metrics={'time_sample_batch': 0.00016769985694669735, 'time_algorithm_update': 0.005062181397346453, 'loss': 0.014343925606390978, 'time_step': 0.005303768114855061, 'init_value': -1.4980615377426147, 'ave_value': -1.170556883815978, 'soft_opc': nan} step=4602




2022-04-20 17:27.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172638/model_4602.pt


Epoch 27/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:27.01 [info     ] FQE_20220420172638: epoch=27 step=4779 epoch=27 metrics={'time_sample_batch': 0.0001689943216614804, 'time_algorithm_update': 0.005029896558341333, 'loss': 0.014877009656711739, 'time_step': 0.00526926746476168, 'init_value': -1.519608497619629, 'ave_value': -1.1936012036046824, 'soft_opc': nan} step=4779




2022-04-20 17:27.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172638/model_4779.pt


Epoch 28/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:27.02 [info     ] FQE_20220420172638: epoch=28 step=4956 epoch=28 metrics={'time_sample_batch': 0.00016487116194040762, 'time_algorithm_update': 0.00501000679145425, 'loss': 0.015386077186676785, 'time_step': 0.0052473612424344, 'init_value': -1.5671924352645874, 'ave_value': -1.2131573714148085, 'soft_opc': nan} step=4956




2022-04-20 17:27.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172638/model_4956.pt


Epoch 29/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:27.03 [info     ] FQE_20220420172638: epoch=29 step=5133 epoch=29 metrics={'time_sample_batch': 0.00016689031137584966, 'time_algorithm_update': 0.0049702905665683205, 'loss': 0.015972767618490057, 'time_step': 0.0052109478557177185, 'init_value': -1.5446348190307617, 'ave_value': -1.1730392450013676, 'soft_opc': nan} step=5133




2022-04-20 17:27.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172638/model_5133.pt


Epoch 30/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:27.04 [info     ] FQE_20220420172638: epoch=30 step=5310 epoch=30 metrics={'time_sample_batch': 0.0001638932416668046, 'time_algorithm_update': 0.005052363131679384, 'loss': 0.01667618042510529, 'time_step': 0.005288169882391806, 'init_value': -1.5517650842666626, 'ave_value': -1.1568426010945627, 'soft_opc': nan} step=5310




2022-04-20 17:27.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172638/model_5310.pt


Epoch 31/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:27.05 [info     ] FQE_20220420172638: epoch=31 step=5487 epoch=31 metrics={'time_sample_batch': 0.00015935520667814267, 'time_algorithm_update': 0.004214148063444148, 'loss': 0.017663148757975318, 'time_step': 0.0044456336457850566, 'init_value': -1.5337491035461426, 'ave_value': -1.1289771827066446, 'soft_opc': nan} step=5487




2022-04-20 17:27.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172638/model_5487.pt


Epoch 32/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:27.06 [info     ] FQE_20220420172638: epoch=32 step=5664 epoch=32 metrics={'time_sample_batch': 0.00016832351684570312, 'time_algorithm_update': 0.005056324651685811, 'loss': 0.018891269579203535, 'time_step': 0.0052980204760018045, 'init_value': -1.5668506622314453, 'ave_value': -1.1327972571211713, 'soft_opc': nan} step=5664




2022-04-20 17:27.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172638/model_5664.pt


Epoch 33/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:27.07 [info     ] FQE_20220420172638: epoch=33 step=5841 epoch=33 metrics={'time_sample_batch': 0.0001462448788228008, 'time_algorithm_update': 0.004942500658627958, 'loss': 0.01940080394579024, 'time_step': 0.005153172433713061, 'init_value': -1.6239548921585083, 'ave_value': -1.1774023285722948, 'soft_opc': nan} step=5841




2022-04-20 17:27.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172638/model_5841.pt


Epoch 34/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:27.08 [info     ] FQE_20220420172638: epoch=34 step=6018 epoch=34 metrics={'time_sample_batch': 0.00015036938554149564, 'time_algorithm_update': 0.005003915668207373, 'loss': 0.020051254361399613, 'time_step': 0.005220840206254, 'init_value': -1.6780720949172974, 'ave_value': -1.2386397332691395, 'soft_opc': nan} step=6018




2022-04-20 17:27.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172638/model_6018.pt


Epoch 35/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:27.09 [info     ] FQE_20220420172638: epoch=35 step=6195 epoch=35 metrics={'time_sample_batch': 0.00014837448206325035, 'time_algorithm_update': 0.004928547110261217, 'loss': 0.02064300986930286, 'time_step': 0.005141085824050472, 'init_value': -1.6884816884994507, 'ave_value': -1.1859185234450542, 'soft_opc': nan} step=6195




2022-04-20 17:27.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172638/model_6195.pt


Epoch 36/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:27.10 [info     ] FQE_20220420172638: epoch=36 step=6372 epoch=36 metrics={'time_sample_batch': 0.00014056189585540254, 'time_algorithm_update': 0.004918157717602401, 'loss': 0.021799975415523358, 'time_step': 0.005120448473483156, 'init_value': -1.7249740362167358, 'ave_value': -1.2016442190419445, 'soft_opc': nan} step=6372




2022-04-20 17:27.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172638/model_6372.pt


Epoch 37/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:27.11 [info     ] FQE_20220420172638: epoch=37 step=6549 epoch=37 metrics={'time_sample_batch': 0.00014608862709864385, 'time_algorithm_update': 0.004856171581031239, 'loss': 0.02166156289641256, 'time_step': 0.005066809681175792, 'init_value': -1.8296475410461426, 'ave_value': -1.2784056955193346, 'soft_opc': nan} step=6549




2022-04-20 17:27.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172638/model_6549.pt


Epoch 38/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:27.12 [info     ] FQE_20220420172638: epoch=38 step=6726 epoch=38 metrics={'time_sample_batch': 0.00014647252142092602, 'time_algorithm_update': 0.004998089903492039, 'loss': 0.0229646145796277, 'time_step': 0.005210997694629734, 'init_value': -1.9401134252548218, 'ave_value': -1.3452513833169464, 'soft_opc': nan} step=6726




2022-04-20 17:27.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172638/model_6726.pt


Epoch 39/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:27.13 [info     ] FQE_20220420172638: epoch=39 step=6903 epoch=39 metrics={'time_sample_batch': 0.00016062811943097304, 'time_algorithm_update': 0.004647900155708614, 'loss': 0.022409962605958605, 'time_step': 0.004878774201129116, 'init_value': -1.9478588104248047, 'ave_value': -1.3453602111554361, 'soft_opc': nan} step=6903




2022-04-20 17:27.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172638/model_6903.pt


Epoch 40/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:27.14 [info     ] FQE_20220420172638: epoch=40 step=7080 epoch=40 metrics={'time_sample_batch': 0.0001637046619997186, 'time_algorithm_update': 0.004707134376137944, 'loss': 0.022956447628468782, 'time_step': 0.0049446935707566425, 'init_value': -1.9183493852615356, 'ave_value': -1.3206512324142832, 'soft_opc': nan} step=7080




2022-04-20 17:27.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172638/model_7080.pt


Epoch 41/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:27.15 [info     ] FQE_20220420172638: epoch=41 step=7257 epoch=41 metrics={'time_sample_batch': 0.000165640297582594, 'time_algorithm_update': 0.004964047232590153, 'loss': 0.025009940278395423, 'time_step': 0.0052039677140402925, 'init_value': -1.9933220148086548, 'ave_value': -1.3953243102833583, 'soft_opc': nan} step=7257




2022-04-20 17:27.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172638/model_7257.pt


Epoch 42/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:27.16 [info     ] FQE_20220420172638: epoch=42 step=7434 epoch=42 metrics={'time_sample_batch': 0.00016662899383717338, 'time_algorithm_update': 0.005058498705847789, 'loss': 0.025693178327496315, 'time_step': 0.005300057136406333, 'init_value': -2.094651699066162, 'ave_value': -1.4233791179574646, 'soft_opc': nan} step=7434




2022-04-20 17:27.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172638/model_7434.pt


Epoch 43/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:27.17 [info     ] FQE_20220420172638: epoch=43 step=7611 epoch=43 metrics={'time_sample_batch': 0.00016388785367631642, 'time_algorithm_update': 0.005121807594083797, 'loss': 0.027165892038699554, 'time_step': 0.005362214341675494, 'init_value': -2.129697561264038, 'ave_value': -1.4596826757273607, 'soft_opc': nan} step=7611




2022-04-20 17:27.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172638/model_7611.pt


Epoch 44/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:27.18 [info     ] FQE_20220420172638: epoch=44 step=7788 epoch=44 metrics={'time_sample_batch': 0.00016905224255922823, 'time_algorithm_update': 0.00515128529004458, 'loss': 0.027648197163401517, 'time_step': 0.005395716866530941, 'init_value': -2.1329586505889893, 'ave_value': -1.4328507080998596, 'soft_opc': nan} step=7788




2022-04-20 17:27.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172638/model_7788.pt


Epoch 45/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:27.19 [info     ] FQE_20220420172638: epoch=45 step=7965 epoch=45 metrics={'time_sample_batch': 0.0001668283494852357, 'time_algorithm_update': 0.005039491222403144, 'loss': 0.02805281526837919, 'time_step': 0.005280413870084084, 'init_value': -2.260913610458374, 'ave_value': -1.5310511191831426, 'soft_opc': nan} step=7965




2022-04-20 17:27.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172638/model_7965.pt


Epoch 46/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:27.20 [info     ] FQE_20220420172638: epoch=46 step=8142 epoch=46 metrics={'time_sample_batch': 0.00017331144904012734, 'time_algorithm_update': 0.005056820346810723, 'loss': 0.029024822334232765, 'time_step': 0.005305016781650694, 'init_value': -2.2525901794433594, 'ave_value': -1.5189090605209898, 'soft_opc': nan} step=8142




2022-04-20 17:27.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172638/model_8142.pt


Epoch 47/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:27.21 [info     ] FQE_20220420172638: epoch=47 step=8319 epoch=47 metrics={'time_sample_batch': 0.0001669199453235346, 'time_algorithm_update': 0.005140808342540331, 'loss': 0.029792686519186713, 'time_step': 0.0053829648400430625, 'init_value': -2.3455636501312256, 'ave_value': -1.6495743118651636, 'soft_opc': nan} step=8319




2022-04-20 17:27.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172638/model_8319.pt


Epoch 48/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:27.22 [info     ] FQE_20220420172638: epoch=48 step=8496 epoch=48 metrics={'time_sample_batch': 0.00016564703257070424, 'time_algorithm_update': 0.004216036554110252, 'loss': 0.03063031764553681, 'time_step': 0.004457981572986322, 'init_value': -2.371788740158081, 'ave_value': -1.6239095708047664, 'soft_opc': nan} step=8496




2022-04-20 17:27.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172638/model_8496.pt


Epoch 49/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:27.23 [info     ] FQE_20220420172638: epoch=49 step=8673 epoch=49 metrics={'time_sample_batch': 0.0001699089330468474, 'time_algorithm_update': 0.005207992542934956, 'loss': 0.030843440975367727, 'time_step': 0.005454254689189674, 'init_value': -2.370417833328247, 'ave_value': -1.603549543671959, 'soft_opc': nan} step=8673




2022-04-20 17:27.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172638/model_8673.pt


Epoch 50/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:27.24 [info     ] FQE_20220420172638: epoch=50 step=8850 epoch=50 metrics={'time_sample_batch': 0.0001684110716911359, 'time_algorithm_update': 0.005105842978267347, 'loss': 0.031520572141744196, 'time_step': 0.0053501317730057715, 'init_value': -2.4419286251068115, 'ave_value': -1.6457235388882883, 'soft_opc': nan} step=8850




2022-04-20 17:27.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172638/model_8850.pt
start
[ 0.00000000e+00  7.95731469e+08  1.43210892e-01 -3.25999953e-02
 -1.38000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.28049971e-01  6.00000000e-01  4.67532035e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.87189108e-01  2.46000047e-02
 -8.00013420e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.61927906e-02  2.08952959e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.53789108e-01  1.32000047e-02
  8.79998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -9.71586383e-02 -6.00000000e-01 -5.33093061e-02]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.39489108e-01 -4.75999953e-02
 -9.30001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  1.41298638e-01  5.43892365e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.9573146

2022-04-20 17:27.24 [info     ] Directory is created at d3rlpy_logs/FQE_20220420172724
2022-04-20 17:27.24 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 17:27.24 [debug    ] Building models...
2022-04-20 17:27.24 [debug    ] Models have been built.
2022-04-20 17:27.24 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420172724/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/355 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 17:27.26 [info     ] FQE_20220420172724: epoch=1 step=355 epoch=1 metrics={'time_sample_batch': 0.00016534563521264305, 'time_algorithm_update': 0.005022886437429509, 'loss': 0.026921331601768312, 'time_step': 0.005262728811989368, 'init_value': -0.8243666887283325, 'ave_value': -0.8549343243797467, 'soft_opc': nan} step=355




2022-04-20 17:27.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172724/model_355.pt


Epoch 2/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:27.28 [info     ] FQE_20220420172724: epoch=2 step=710 epoch=2 metrics={'time_sample_batch': 0.0001716492881237621, 'time_algorithm_update': 0.0050281430633974744, 'loss': 0.023416334881224264, 'time_step': 0.005275244779989753, 'init_value': -2.0287997722625732, 'ave_value': -2.0654987238242053, 'soft_opc': nan} step=710




2022-04-20 17:27.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172724/model_710.pt


Epoch 3/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:27.30 [info     ] FQE_20220420172724: epoch=3 step=1065 epoch=3 metrics={'time_sample_batch': 0.0001677647442884848, 'time_algorithm_update': 0.004931942845734072, 'loss': 0.024847622304944923, 'time_step': 0.005174346037313972, 'init_value': -2.8378570079803467, 'ave_value': -2.8395189341891225, 'soft_opc': nan} step=1065




2022-04-20 17:27.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172724/model_1065.pt


Epoch 4/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:27.32 [info     ] FQE_20220420172724: epoch=4 step=1420 epoch=4 metrics={'time_sample_batch': 0.0001708890350771622, 'time_algorithm_update': 0.004834466584971254, 'loss': 0.030723333120031258, 'time_step': 0.005081276826455559, 'init_value': -3.8072478771209717, 'ave_value': -3.821807171778329, 'soft_opc': nan} step=1420




2022-04-20 17:27.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172724/model_1420.pt


Epoch 5/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:27.34 [info     ] FQE_20220420172724: epoch=5 step=1775 epoch=5 metrics={'time_sample_batch': 0.00017173659633582748, 'time_algorithm_update': 0.005143800251920458, 'loss': 0.03746927620869287, 'time_step': 0.005388398237631355, 'init_value': -4.589534759521484, 'ave_value': -4.59140560629285, 'soft_opc': nan} step=1775




2022-04-20 17:27.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172724/model_1775.pt


Epoch 6/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:27.36 [info     ] FQE_20220420172724: epoch=6 step=2130 epoch=6 metrics={'time_sample_batch': 0.00017189845232896403, 'time_algorithm_update': 0.005082715397149744, 'loss': 0.048268625234634106, 'time_step': 0.005331838634652151, 'init_value': -5.494787693023682, 'ave_value': -5.527886461551892, 'soft_opc': nan} step=2130




2022-04-20 17:27.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172724/model_2130.pt


Epoch 7/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:27.38 [info     ] FQE_20220420172724: epoch=7 step=2485 epoch=7 metrics={'time_sample_batch': 0.00017091724234567562, 'time_algorithm_update': 0.005114711170465174, 'loss': 0.056451228727966964, 'time_step': 0.005359264158866775, 'init_value': -6.24296236038208, 'ave_value': -6.33347384438705, 'soft_opc': nan} step=2485




2022-04-20 17:27.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172724/model_2485.pt


Epoch 8/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:27.40 [info     ] FQE_20220420172724: epoch=8 step=2840 epoch=8 metrics={'time_sample_batch': 0.0001688809461996589, 'time_algorithm_update': 0.00464384052115427, 'loss': 0.06977026870500454, 'time_step': 0.00488671450547769, 'init_value': -6.998371601104736, 'ave_value': -7.164163875986404, 'soft_opc': nan} step=2840




2022-04-20 17:27.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172724/model_2840.pt


Epoch 9/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:27.42 [info     ] FQE_20220420172724: epoch=9 step=3195 epoch=9 metrics={'time_sample_batch': 0.00015328366991499779, 'time_algorithm_update': 0.004998759820427693, 'loss': 0.08148908764123916, 'time_step': 0.0052201559845830355, 'init_value': -7.689839839935303, 'ave_value': -7.97507551417541, 'soft_opc': nan} step=3195




2022-04-20 17:27.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172724/model_3195.pt


Epoch 10/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:27.44 [info     ] FQE_20220420172724: epoch=10 step=3550 epoch=10 metrics={'time_sample_batch': 0.0001509659726854781, 'time_algorithm_update': 0.004881663389608893, 'loss': 0.09715018591673022, 'time_step': 0.005099056808041855, 'init_value': -8.380948066711426, 'ave_value': -8.732924097521348, 'soft_opc': nan} step=3550




2022-04-20 17:27.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172724/model_3550.pt


Epoch 11/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:27.46 [info     ] FQE_20220420172724: epoch=11 step=3905 epoch=11 metrics={'time_sample_batch': 0.00015938852874326036, 'time_algorithm_update': 0.004970948796876719, 'loss': 0.11014181598496268, 'time_step': 0.00519386479552363, 'init_value': -9.220959663391113, 'ave_value': -9.805336230710399, 'soft_opc': nan} step=3905




2022-04-20 17:27.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172724/model_3905.pt


Epoch 12/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:27.48 [info     ] FQE_20220420172724: epoch=12 step=4260 epoch=12 metrics={'time_sample_batch': 0.0001743121885917556, 'time_algorithm_update': 0.00460052490234375, 'loss': 0.1271867732924055, 'time_step': 0.004849091382093833, 'init_value': -9.656211853027344, 'ave_value': -10.405407184431452, 'soft_opc': nan} step=4260




2022-04-20 17:27.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172724/model_4260.pt


Epoch 13/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:27.50 [info     ] FQE_20220420172724: epoch=13 step=4615 epoch=13 metrics={'time_sample_batch': 0.00016977686277577575, 'time_algorithm_update': 0.005015799025414695, 'loss': 0.14235953631430445, 'time_step': 0.005259436620792872, 'init_value': -10.5364990234375, 'ave_value': -11.359998473786584, 'soft_opc': nan} step=4615




2022-04-20 17:27.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172724/model_4615.pt


Epoch 14/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:27.52 [info     ] FQE_20220420172724: epoch=14 step=4970 epoch=14 metrics={'time_sample_batch': 0.00017507647124814316, 'time_algorithm_update': 0.0050940661363198724, 'loss': 0.16309764566765705, 'time_step': 0.005347557470832072, 'init_value': -10.748812675476074, 'ave_value': -11.804746769906107, 'soft_opc': nan} step=4970




2022-04-20 17:27.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172724/model_4970.pt


Epoch 15/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:27.54 [info     ] FQE_20220420172724: epoch=15 step=5325 epoch=15 metrics={'time_sample_batch': 0.0001711643917459837, 'time_algorithm_update': 0.005072425788557025, 'loss': 0.17632616002794724, 'time_step': 0.0053187135239722025, 'init_value': -11.195701599121094, 'ave_value': -12.468273705397486, 'soft_opc': nan} step=5325




2022-04-20 17:27.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172724/model_5325.pt


Epoch 16/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:27.56 [info     ] FQE_20220420172724: epoch=16 step=5680 epoch=16 metrics={'time_sample_batch': 0.00017356939718756877, 'time_algorithm_update': 0.004875154898200237, 'loss': 0.1987848862218605, 'time_step': 0.005125514554305815, 'init_value': -11.422385215759277, 'ave_value': -13.034493957918755, 'soft_opc': nan} step=5680




2022-04-20 17:27.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172724/model_5680.pt


Epoch 17/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:27.58 [info     ] FQE_20220420172724: epoch=17 step=6035 epoch=17 metrics={'time_sample_batch': 0.0001705310714076942, 'time_algorithm_update': 0.004940775750388562, 'loss': 0.2129377865875271, 'time_step': 0.00518645165671765, 'init_value': -11.25428295135498, 'ave_value': -13.271098226641557, 'soft_opc': nan} step=6035




2022-04-20 17:27.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172724/model_6035.pt


Epoch 18/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:28.00 [info     ] FQE_20220420172724: epoch=18 step=6390 epoch=18 metrics={'time_sample_batch': 0.00017062308083117848, 'time_algorithm_update': 0.005030636048652756, 'loss': 0.22749490731618774, 'time_step': 0.005274751152790768, 'init_value': -10.97474193572998, 'ave_value': -13.426494171130477, 'soft_opc': nan} step=6390




2022-04-20 17:28.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172724/model_6390.pt


Epoch 19/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:28.02 [info     ] FQE_20220420172724: epoch=19 step=6745 epoch=19 metrics={'time_sample_batch': 0.0001682805343413017, 'time_algorithm_update': 0.005088358194055692, 'loss': 0.24116588241197692, 'time_step': 0.005332201971134669, 'init_value': -11.269686698913574, 'ave_value': -14.117478016736596, 'soft_opc': nan} step=6745




2022-04-20 17:28.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172724/model_6745.pt


Epoch 20/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:28.04 [info     ] FQE_20220420172724: epoch=20 step=7100 epoch=20 metrics={'time_sample_batch': 0.00017110797720895686, 'time_algorithm_update': 0.005077398998636595, 'loss': 0.2589286354540939, 'time_step': 0.005323922466224348, 'init_value': -11.379457473754883, 'ave_value': -14.604360880698898, 'soft_opc': nan} step=7100




2022-04-20 17:28.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172724/model_7100.pt


Epoch 21/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:28.06 [info     ] FQE_20220420172724: epoch=21 step=7455 epoch=21 metrics={'time_sample_batch': 0.00016720261372311015, 'time_algorithm_update': 0.00466389924707547, 'loss': 0.2741640011242158, 'time_step': 0.004905593227332746, 'init_value': -11.522900581359863, 'ave_value': -15.148613435315733, 'soft_opc': nan} step=7455




2022-04-20 17:28.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172724/model_7455.pt


Epoch 22/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:28.08 [info     ] FQE_20220420172724: epoch=22 step=7810 epoch=22 metrics={'time_sample_batch': 0.00017192464479258361, 'time_algorithm_update': 0.005042108347718145, 'loss': 0.27853925875584845, 'time_step': 0.005292563371255364, 'init_value': -11.486506462097168, 'ave_value': -15.425691375792256, 'soft_opc': nan} step=7810




2022-04-20 17:28.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172724/model_7810.pt


Epoch 23/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:28.10 [info     ] FQE_20220420172724: epoch=23 step=8165 epoch=23 metrics={'time_sample_batch': 0.0001686868533282213, 'time_algorithm_update': 0.005100084358537701, 'loss': 0.2820014169570846, 'time_step': 0.005345097394056723, 'init_value': -11.611824035644531, 'ave_value': -15.875988294417699, 'soft_opc': nan} step=8165




2022-04-20 17:28.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172724/model_8165.pt


Epoch 24/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:28.12 [info     ] FQE_20220420172724: epoch=24 step=8520 epoch=24 metrics={'time_sample_batch': 0.00017304286150865151, 'time_algorithm_update': 0.005086628148253535, 'loss': 0.28268536894883906, 'time_step': 0.005337083843392386, 'init_value': -11.755769729614258, 'ave_value': -16.233717269539124, 'soft_opc': nan} step=8520




2022-04-20 17:28.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172724/model_8520.pt


Epoch 25/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:28.14 [info     ] FQE_20220420172724: epoch=25 step=8875 epoch=25 metrics={'time_sample_batch': 0.00016828523555272062, 'time_algorithm_update': 0.004622472843653719, 'loss': 0.2868403875334582, 'time_step': 0.0048657954578668295, 'init_value': -12.112452507019043, 'ave_value': -16.86846809331064, 'soft_opc': nan} step=8875




2022-04-20 17:28.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172724/model_8875.pt


Epoch 26/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:28.16 [info     ] FQE_20220420172724: epoch=26 step=9230 epoch=26 metrics={'time_sample_batch': 0.00017297771615041812, 'time_algorithm_update': 0.005063124105963908, 'loss': 0.2878411445084592, 'time_step': 0.00531191691546373, 'init_value': -12.422347068786621, 'ave_value': -17.57169814288875, 'soft_opc': nan} step=9230




2022-04-20 17:28.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172724/model_9230.pt


Epoch 27/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:28.18 [info     ] FQE_20220420172724: epoch=27 step=9585 epoch=27 metrics={'time_sample_batch': 0.0001706271104409661, 'time_algorithm_update': 0.005062486756015831, 'loss': 0.2858288016537545, 'time_step': 0.0053089934335628025, 'init_value': -12.769553184509277, 'ave_value': -18.11229808659867, 'soft_opc': nan} step=9585




2022-04-20 17:28.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172724/model_9585.pt


Epoch 28/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:28.20 [info     ] FQE_20220420172724: epoch=28 step=9940 epoch=28 metrics={'time_sample_batch': 0.00017483267985599142, 'time_algorithm_update': 0.005129806760331275, 'loss': 0.2904367731910356, 'time_step': 0.005379764127059722, 'init_value': -12.773916244506836, 'ave_value': -18.412729243054613, 'soft_opc': nan} step=9940




2022-04-20 17:28.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172724/model_9940.pt


Epoch 29/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:28.22 [info     ] FQE_20220420172724: epoch=29 step=10295 epoch=29 metrics={'time_sample_batch': 0.0001687748331419179, 'time_algorithm_update': 0.005031155868315361, 'loss': 0.2952344700694084, 'time_step': 0.0052771581730372465, 'init_value': -13.215219497680664, 'ave_value': -18.93540749735411, 'soft_opc': nan} step=10295




2022-04-20 17:28.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172724/model_10295.pt


Epoch 30/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:28.24 [info     ] FQE_20220420172724: epoch=30 step=10650 epoch=30 metrics={'time_sample_batch': 0.00017213082649338414, 'time_algorithm_update': 0.004680312519342127, 'loss': 0.29803703373796503, 'time_step': 0.0049256768025143045, 'init_value': -13.16407299041748, 'ave_value': -19.010485538571324, 'soft_opc': nan} step=10650




2022-04-20 17:28.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172724/model_10650.pt


Epoch 31/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:28.26 [info     ] FQE_20220420172724: epoch=31 step=11005 epoch=31 metrics={'time_sample_batch': 0.00016809517229107065, 'time_algorithm_update': 0.005149743926357216, 'loss': 0.2944163923794535, 'time_step': 0.005393461442329515, 'init_value': -13.867231369018555, 'ave_value': -19.796664680794187, 'soft_opc': nan} step=11005




2022-04-20 17:28.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172724/model_11005.pt


Epoch 32/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:28.28 [info     ] FQE_20220420172724: epoch=32 step=11360 epoch=32 metrics={'time_sample_batch': 0.00017372185075786753, 'time_algorithm_update': 0.0050556747006698394, 'loss': 0.28578377071190886, 'time_step': 0.005305204257159166, 'init_value': -13.778773307800293, 'ave_value': -19.901164302366524, 'soft_opc': nan} step=11360




2022-04-20 17:28.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172724/model_11360.pt


Epoch 33/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:28.30 [info     ] FQE_20220420172724: epoch=33 step=11715 epoch=33 metrics={'time_sample_batch': 0.00016736178330972162, 'time_algorithm_update': 0.005009164272899359, 'loss': 0.27687405590755, 'time_step': 0.00525165275788643, 'init_value': -13.820520401000977, 'ave_value': -20.04480181152344, 'soft_opc': nan} step=11715




2022-04-20 17:28.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172724/model_11715.pt


Epoch 34/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:28.32 [info     ] FQE_20220420172724: epoch=34 step=12070 epoch=34 metrics={'time_sample_batch': 0.0001708675438249615, 'time_algorithm_update': 0.004649124682789118, 'loss': 0.27106625473415347, 'time_step': 0.004892148434276312, 'init_value': -14.071418762207031, 'ave_value': -20.340500544945073, 'soft_opc': nan} step=12070




2022-04-20 17:28.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172724/model_12070.pt


Epoch 35/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:28.34 [info     ] FQE_20220420172724: epoch=35 step=12425 epoch=35 metrics={'time_sample_batch': 0.00017254251829335388, 'time_algorithm_update': 0.005080757678394586, 'loss': 0.2709853564454636, 'time_step': 0.005329161630549901, 'init_value': -14.580428123474121, 'ave_value': -20.820673045257113, 'soft_opc': nan} step=12425




2022-04-20 17:28.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172724/model_12425.pt


Epoch 36/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:28.36 [info     ] FQE_20220420172724: epoch=36 step=12780 epoch=36 metrics={'time_sample_batch': 0.00017371177673339844, 'time_algorithm_update': 0.005019791025510976, 'loss': 0.279912590922814, 'time_step': 0.0052701130719252035, 'init_value': -14.959802627563477, 'ave_value': -21.185032693335266, 'soft_opc': nan} step=12780




2022-04-20 17:28.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172724/model_12780.pt


Epoch 37/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:28.38 [info     ] FQE_20220420172724: epoch=37 step=13135 epoch=37 metrics={'time_sample_batch': 0.00016664652757241694, 'time_algorithm_update': 0.005041104974881024, 'loss': 0.27822303540060217, 'time_step': 0.0052819769147416235, 'init_value': -15.367605209350586, 'ave_value': -21.605796942204726, 'soft_opc': nan} step=13135




2022-04-20 17:28.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172724/model_13135.pt


Epoch 38/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:28.40 [info     ] FQE_20220420172724: epoch=38 step=13490 epoch=38 metrics={'time_sample_batch': 0.00016896422480193663, 'time_algorithm_update': 0.004598827765021526, 'loss': 0.2851009591731807, 'time_step': 0.004844302862462863, 'init_value': -15.447198867797852, 'ave_value': -21.631288632687465, 'soft_opc': nan} step=13490




2022-04-20 17:28.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172724/model_13490.pt


Epoch 39/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:28.42 [info     ] FQE_20220420172724: epoch=39 step=13845 epoch=39 metrics={'time_sample_batch': 0.00017156936752964072, 'time_algorithm_update': 0.0050778046460218836, 'loss': 0.28776783246914267, 'time_step': 0.005324949345118563, 'init_value': -15.788092613220215, 'ave_value': -21.954581407878305, 'soft_opc': nan} step=13845




2022-04-20 17:28.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172724/model_13845.pt


Epoch 40/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:28.44 [info     ] FQE_20220420172724: epoch=40 step=14200 epoch=40 metrics={'time_sample_batch': 0.00017277153444961762, 'time_algorithm_update': 0.005073574227346501, 'loss': 0.29395016650079003, 'time_step': 0.005323078262973839, 'init_value': -15.917380332946777, 'ave_value': -22.1385156492433, 'soft_opc': nan} step=14200




2022-04-20 17:28.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172724/model_14200.pt


Epoch 41/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:28.46 [info     ] FQE_20220420172724: epoch=41 step=14555 epoch=41 metrics={'time_sample_batch': 0.00017066069052252972, 'time_algorithm_update': 0.0050294479853670365, 'loss': 0.28686288998160564, 'time_step': 0.0052744563196746395, 'init_value': -16.01983642578125, 'ave_value': -22.208203509216947, 'soft_opc': nan} step=14555




2022-04-20 17:28.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172724/model_14555.pt


Epoch 42/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:28.48 [info     ] FQE_20220420172724: epoch=42 step=14910 epoch=42 metrics={'time_sample_batch': 0.00017340418318627585, 'time_algorithm_update': 0.005003704151637117, 'loss': 0.3122730289021848, 'time_step': 0.005253044316466425, 'init_value': -16.627840042114258, 'ave_value': -22.76355757885175, 'soft_opc': nan} step=14910




2022-04-20 17:28.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172724/model_14910.pt


Epoch 43/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:28.49 [info     ] FQE_20220420172724: epoch=43 step=15265 epoch=43 metrics={'time_sample_batch': 0.00016567673481686016, 'time_algorithm_update': 0.004627432621700663, 'loss': 0.3313778500164479, 'time_step': 0.004867662510401766, 'init_value': -17.055416107177734, 'ave_value': -23.238782104937314, 'soft_opc': nan} step=15265




2022-04-20 17:28.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172724/model_15265.pt


Epoch 44/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:28.51 [info     ] FQE_20220420172724: epoch=44 step=15620 epoch=44 metrics={'time_sample_batch': 0.00017225843080332582, 'time_algorithm_update': 0.005099338880726989, 'loss': 0.3446725734889927, 'time_step': 0.005349750250158176, 'init_value': -17.346349716186523, 'ave_value': -23.38577663319993, 'soft_opc': nan} step=15620




2022-04-20 17:28.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172724/model_15620.pt


Epoch 45/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:28.53 [info     ] FQE_20220420172724: epoch=45 step=15975 epoch=45 metrics={'time_sample_batch': 0.00016724626782914283, 'time_algorithm_update': 0.0050034784934890105, 'loss': 0.3588085642005776, 'time_step': 0.005246766856018926, 'init_value': -17.770008087158203, 'ave_value': -23.65060543477957, 'soft_opc': nan} step=15975




2022-04-20 17:28.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172724/model_15975.pt


Epoch 46/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:28.55 [info     ] FQE_20220420172724: epoch=46 step=16330 epoch=46 metrics={'time_sample_batch': 0.00017304554791517662, 'time_algorithm_update': 0.005008978239247497, 'loss': 0.3811602314855431, 'time_step': 0.005257431889923525, 'init_value': -18.339906692504883, 'ave_value': -24.17081230641265, 'soft_opc': nan} step=16330




2022-04-20 17:28.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172724/model_16330.pt


Epoch 47/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:28.57 [info     ] FQE_20220420172724: epoch=47 step=16685 epoch=47 metrics={'time_sample_batch': 0.00017001393815161477, 'time_algorithm_update': 0.0045925086652728875, 'loss': 0.3963254846947294, 'time_step': 0.0048374908071168714, 'init_value': -18.466777801513672, 'ave_value': -24.123219195849945, 'soft_opc': nan} step=16685




2022-04-20 17:28.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172724/model_16685.pt


Epoch 48/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:28.59 [info     ] FQE_20220420172724: epoch=48 step=17040 epoch=48 metrics={'time_sample_batch': 0.00017549622226768817, 'time_algorithm_update': 0.005066381373875578, 'loss': 0.4167402910409679, 'time_step': 0.005318817622225049, 'init_value': -19.085494995117188, 'ave_value': -24.607353782968566, 'soft_opc': nan} step=17040




2022-04-20 17:28.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172724/model_17040.pt


Epoch 49/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:29.01 [info     ] FQE_20220420172724: epoch=49 step=17395 epoch=49 metrics={'time_sample_batch': 0.00016638460293622083, 'time_algorithm_update': 0.004983404992331921, 'loss': 0.4521506747414529, 'time_step': 0.005225934445018499, 'init_value': -19.700008392333984, 'ave_value': -25.18355638702883, 'soft_opc': nan} step=17395




2022-04-20 17:29.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172724/model_17395.pt


Epoch 50/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:29.03 [info     ] FQE_20220420172724: epoch=50 step=17750 epoch=50 metrics={'time_sample_batch': 0.00017252639985420334, 'time_algorithm_update': 0.00503571201378191, 'loss': 0.4688849647857354, 'time_step': 0.005284476616013218, 'init_value': -20.262847900390625, 'ave_value': -25.647030877431273, 'soft_opc': nan} step=17750




2022-04-20 17:29.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172724/model_17750.pt
search iteration:  24
using hyper params:  [0.007980300655030288, 0.005527527732912218, 6.912609868756439e-05, 7]
2022-04-20 17:29.03 [debug    ] RoundIterator is selected.
2022-04-20 17:29.03 [info     ] Directory is created at d3rlpy_logs/TD3PlusBC_20220420172903
2022-04-20 17:29.03 [debug    ] Fitting scaler...              scaler=standard
2022-04-20 17:29.04 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 17:29.04 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 17:29.04 [info     ] Parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172903/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.00798030065503

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:29.07 [info     ] TD3PlusBC_20220420172903: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.00039230313217430784, 'time_algorithm_update': 0.0083848844494736, 'critic_loss': 11.227689847960109, 'actor_loss': 2.6601932620444493, 'time_step': 0.00885737639421608, 'td_error': 1.0323183201320698, 'init_value': -11.107782363891602, 'ave_value': -7.109818081681424} step=342
2022-04-20 17:29.07 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172903/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:29.11 [info     ] TD3PlusBC_20220420172903: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.0003968107770060935, 'time_algorithm_update': 0.00890268289554886, 'critic_loss': 6.209210504216758, 'actor_loss': 2.578299389945136, 'time_step': 0.009380021987602724, 'td_error': 1.3016545394830865, 'init_value': -15.81140422821045, 'ave_value': -10.164965678742826} step=684
2022-04-20 17:29.11 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172903/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:29.15 [info     ] TD3PlusBC_20220420172903: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.000391932955959387, 'time_algorithm_update': 0.008875128818534271, 'critic_loss': 9.757052880281593, 'actor_loss': 2.568344597230878, 'time_step': 0.009346497686285721, 'td_error': 1.7069931917460008, 'init_value': -20.96755599975586, 'ave_value': -13.50779036347318} step=1026
2022-04-20 17:29.15 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172903/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:29.18 [info     ] TD3PlusBC_20220420172903: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.00040017721945779367, 'time_algorithm_update': 0.008514172152469033, 'critic_loss': 14.013751262809798, 'actor_loss': 2.565875460529885, 'time_step': 0.008989304826970686, 'td_error': 2.1548169147095413, 'init_value': -25.6917724609375, 'ave_value': -16.72227153730369} step=1368
2022-04-20 17:29.18 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172903/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:29.22 [info     ] TD3PlusBC_20220420172903: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.00039878295876129326, 'time_algorithm_update': 0.008889600547433596, 'critic_loss': 18.663866111409593, 'actor_loss': 2.5636122143059445, 'time_step': 0.009363281099419845, 'td_error': 2.7858951853712384, 'init_value': -30.73027992248535, 'ave_value': -20.014856044674325} step=1710
2022-04-20 17:29.22 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172903/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:29.26 [info     ] TD3PlusBC_20220420172903: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.0003897523322300604, 'time_algorithm_update': 0.008390978065847654, 'critic_loss': 23.85677271000823, 'actor_loss': 2.5636351052780597, 'time_step': 0.008851955508628087, 'td_error': 3.3666217287050237, 'init_value': -35.19743728637695, 'ave_value': -23.12765784541296} step=2052
2022-04-20 17:29.26 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172903/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:29.29 [info     ] TD3PlusBC_20220420172903: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.00039659048381604647, 'time_algorithm_update': 0.008915514973869101, 'critic_loss': 28.793785583206088, 'actor_loss': 2.561786132946349, 'time_step': 0.009389198314376741, 'td_error': 3.867749030919927, 'init_value': -38.828102111816406, 'ave_value': -25.726237876353583} step=2394
2022-04-20 17:29.29 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172903/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:29.33 [info     ] TD3PlusBC_20220420172903: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.00039734477885285314, 'time_algorithm_update': 0.008740388859085173, 'critic_loss': 34.38107020394844, 'actor_loss': 2.561314817060504, 'time_step': 0.00921208189244856, 'td_error': 4.263170395530271, 'init_value': -42.12565994262695, 'ave_value': -28.248454447489248} step=2736
2022-04-20 17:29.33 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172903/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:29.36 [info     ] TD3PlusBC_20220420172903: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.00039581039495635453, 'time_algorithm_update': 0.008370614888375266, 'critic_loss': 40.03292339726498, 'actor_loss': 2.559710253051847, 'time_step': 0.008842901179665014, 'td_error': 5.039514653413991, 'init_value': -47.94339370727539, 'ave_value': -31.5871107509426} step=3078
2022-04-20 17:29.36 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172903/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:29.40 [info     ] TD3PlusBC_20220420172903: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.0003981060451931424, 'time_algorithm_update': 0.008793208334181044, 'critic_loss': 45.90913712350946, 'actor_loss': 2.5589361121082863, 'time_step': 0.00926671878636232, 'td_error': 5.541665440123448, 'init_value': -51.626197814941406, 'ave_value': -34.08107759316099} step=3420
2022-04-20 17:29.40 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172903/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:29.44 [info     ] TD3PlusBC_20220420172903: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.00039844345628169543, 'time_algorithm_update': 0.00841482828932199, 'critic_loss': 52.150529504519454, 'actor_loss': 2.5580382305279112, 'time_step': 0.008890384819075378, 'td_error': 5.94435099715619, 'init_value': -53.908599853515625, 'ave_value': -35.82888506651361} step=3762
2022-04-20 17:29.44 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172903/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:29.47 [info     ] TD3PlusBC_20220420172903: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.0003953440147533751, 'time_algorithm_update': 0.008829813254506965, 'critic_loss': 58.096350563897026, 'actor_loss': 2.558247683341043, 'time_step': 0.009299540380288285, 'td_error': 6.526365658647381, 'init_value': -57.603424072265625, 'ave_value': -38.36243922306334} step=4104
2022-04-20 17:29.47 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172903/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:29.51 [info     ] TD3PlusBC_20220420172903: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.00039043900562308687, 'time_algorithm_update': 0.008730989450599715, 'critic_loss': 64.57024116962276, 'actor_loss': 2.558172545237848, 'time_step': 0.009196170589380097, 'td_error': 7.266075039681216, 'init_value': -61.90899658203125, 'ave_value': -40.758113639293455} step=4446
2022-04-20 17:29.51 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172903/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:29.55 [info     ] TD3PlusBC_20220420172903: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.00039661209485684223, 'time_algorithm_update': 0.00843138931787502, 'critic_loss': 70.31969215437682, 'actor_loss': 2.55869105824253, 'time_step': 0.008905150736981665, 'td_error': 7.895249060940835, 'init_value': -64.80621337890625, 'ave_value': -42.68883991903516} step=4788
2022-04-20 17:29.55 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172903/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:29.58 [info     ] TD3PlusBC_20220420172903: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.00039921029966477064, 'time_algorithm_update': 0.00878883523550647, 'critic_loss': 76.44131290145785, 'actor_loss': 2.558017559218825, 'time_step': 0.009262707498338487, 'td_error': 8.107962730063516, 'init_value': -64.64811706542969, 'ave_value': -43.96391472869343} step=5130
2022-04-20 17:29.58 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172903/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:30.02 [info     ] TD3PlusBC_20220420172903: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.0003951160531294973, 'time_algorithm_update': 0.008466443820306433, 'critic_loss': 82.49973106384277, 'actor_loss': 2.5593737956376104, 'time_step': 0.00893943072759617, 'td_error': 8.471517481043124, 'init_value': -67.20877838134766, 'ave_value': -45.65864700428101} step=5472
2022-04-20 17:30.02 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172903/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:30.06 [info     ] TD3PlusBC_20220420172903: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.0003969766940289771, 'time_algorithm_update': 0.00886888253061395, 'critic_loss': 88.53373434947945, 'actor_loss': 2.5586569783283255, 'time_step': 0.009342946504291735, 'td_error': 9.318631881159831, 'init_value': -70.0099868774414, 'ave_value': -47.95314204577357} step=5814
2022-04-20 17:30.06 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172903/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:30.09 [info     ] TD3PlusBC_20220420172903: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.0003998962759274488, 'time_algorithm_update': 0.008873779871310408, 'critic_loss': 94.47679925662035, 'actor_loss': 2.558561187041433, 'time_step': 0.009352492310150324, 'td_error': 9.464038873813267, 'init_value': -71.82144927978516, 'ave_value': -49.517509504953686} step=6156
2022-04-20 17:30.09 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172903/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:30.13 [info     ] TD3PlusBC_20220420172903: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.0003953224037125794, 'time_algorithm_update': 0.008399707532068442, 'critic_loss': 100.65059140969439, 'actor_loss': 2.5587440359662152, 'time_step': 0.008862266763609055, 'td_error': 10.125771909083214, 'init_value': -71.99781799316406, 'ave_value': -50.51832848292381} step=6498
2022-04-20 17:30.13 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172903/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:30.17 [info     ] TD3PlusBC_20220420172903: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.00039550226334242794, 'time_algorithm_update': 0.008774535697803162, 'critic_loss': 105.6852602150008, 'actor_loss': 2.559244896236219, 'time_step': 0.009228822780631439, 'td_error': 10.383109742171984, 'init_value': -73.7822494506836, 'ave_value': -52.12210491784152} step=6840
2022-04-20 17:30.17 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172903/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:30.20 [info     ] TD3PlusBC_20220420172903: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.00040012354042097837, 'time_algorithm_update': 0.008375807115209032, 'critic_loss': 111.55762414764939, 'actor_loss': 2.560875808983518, 'time_step': 0.008840717767414293, 'td_error': 11.381542518732617, 'init_value': -75.5026626586914, 'ave_value': -53.30816573060864} step=7182
2022-04-20 17:30.20 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172903/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:30.24 [info     ] TD3PlusBC_20220420172903: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.00039407593464990806, 'time_algorithm_update': 0.008838395626224273, 'critic_loss': 117.02562557465849, 'actor_loss': 2.560863995412637, 'time_step': 0.009294320965370936, 'td_error': 11.764795702135006, 'init_value': -76.6583251953125, 'ave_value': -54.58822385220847} step=7524
2022-04-20 17:30.24 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172903/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:30.28 [info     ] TD3PlusBC_20220420172903: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.00039704013289066785, 'time_algorithm_update': 0.008882538617005822, 'critic_loss': 122.03683554219921, 'actor_loss': 2.5612680410083972, 'time_step': 0.009345394128944442, 'td_error': 11.909408859010911, 'init_value': -77.63069915771484, 'ave_value': -55.70936175486761} step=7866
2022-04-20 17:30.28 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172903/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:30.31 [info     ] TD3PlusBC_20220420172903: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.00039838001742000467, 'time_algorithm_update': 0.008505004888389543, 'critic_loss': 126.4890462194967, 'actor_loss': 2.5623789656231977, 'time_step': 0.008966010216383906, 'td_error': 12.216432353505699, 'init_value': -79.27790832519531, 'ave_value': -57.03166492136145} step=8208
2022-04-20 17:30.31 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172903/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:30.35 [info     ] TD3PlusBC_20220420172903: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.00039557197637725297, 'time_algorithm_update': 0.008793819020366111, 'critic_loss': 131.06204283864875, 'actor_loss': 2.561630145848146, 'time_step': 0.009253384774191338, 'td_error': 12.05741871671216, 'init_value': -78.02459716796875, 'ave_value': -57.432455698578444} step=8550
2022-04-20 17:30.35 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172903/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:30.38 [info     ] TD3PlusBC_20220420172903: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.00039597910050063106, 'time_algorithm_update': 0.008520921071370443, 'critic_loss': 135.15214123642235, 'actor_loss': 2.5617640799249126, 'time_step': 0.008980691781518056, 'td_error': 12.898073621083936, 'init_value': -81.18843078613281, 'ave_value': -59.381915109282744} step=8892
2022-04-20 17:30.38 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172903/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:30.42 [info     ] TD3PlusBC_20220420172903: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.00040071331269559806, 'time_algorithm_update': 0.008826365944934867, 'critic_loss': 138.68106070178294, 'actor_loss': 2.5621454687843546, 'time_step': 0.009295672003985846, 'td_error': 12.040177132867356, 'init_value': -78.7064437866211, 'ave_value': -59.244367320833575} step=9234
2022-04-20 17:30.42 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172903/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:30.46 [info     ] TD3PlusBC_20220420172903: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.000396664379632961, 'time_algorithm_update': 0.008805553118387857, 'critic_loss': 142.0310986390588, 'actor_loss': 2.5625982005693757, 'time_step': 0.009265320342883729, 'td_error': 12.375557632249587, 'init_value': -81.283447265625, 'ave_value': -60.343585281021525} step=9576
2022-04-20 17:30.46 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172903/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:30.49 [info     ] TD3PlusBC_20220420172903: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.00038792863923903795, 'time_algorithm_update': 0.00839844154335602, 'critic_loss': 145.6674428125571, 'actor_loss': 2.5634017213743334, 'time_step': 0.008847698830721671, 'td_error': 13.40955209384738, 'init_value': -80.13505554199219, 'ave_value': -61.01281361115509} step=9918
2022-04-20 17:30.49 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172903/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:30.53 [info     ] TD3PlusBC_20220420172903: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.0003973907894558377, 'time_algorithm_update': 0.008799032161110327, 'critic_loss': 148.001668316579, 'actor_loss': 2.5624846826519883, 'time_step': 0.009259478390565392, 'td_error': 14.179761812961267, 'init_value': -80.38826751708984, 'ave_value': -61.845772171873634} step=10260
2022-04-20 17:30.53 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172903/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:30.57 [info     ] TD3PlusBC_20220420172903: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.0004010416610896239, 'time_algorithm_update': 0.008890345779775877, 'critic_loss': 150.72993049844663, 'actor_loss': 2.5631270338917336, 'time_step': 0.009360341300741273, 'td_error': 13.05758672685556, 'init_value': -77.91690826416016, 'ave_value': -61.91406776592773} step=10602
2022-04-20 17:30.57 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172903/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:31.00 [info     ] TD3PlusBC_20220420172903: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.0003970031849822106, 'time_algorithm_update': 0.00892995324051171, 'critic_loss': 153.51657178109153, 'actor_loss': 2.5628575581556174, 'time_step': 0.009392217585915014, 'td_error': 13.836301336614557, 'init_value': -80.57470703125, 'ave_value': -63.03393562191409} step=10944
2022-04-20 17:31.00 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172903/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:31.04 [info     ] TD3PlusBC_20220420172903: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.00039638343610261614, 'time_algorithm_update': 0.008863631047700582, 'critic_loss': 155.16430606061255, 'actor_loss': 2.5629661362073577, 'time_step': 0.00932645867442527, 'td_error': 12.371085488474593, 'init_value': -78.83769989013672, 'ave_value': -62.93482871228846} step=11286
2022-04-20 17:31.04 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172903/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:31.08 [info     ] TD3PlusBC_20220420172903: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.0003916624693842659, 'time_algorithm_update': 0.008476941209090384, 'critic_loss': 157.56504016452365, 'actor_loss': 2.5638152590969154, 'time_step': 0.008928162312647056, 'td_error': 15.644158015477435, 'init_value': -86.02847290039062, 'ave_value': -65.41097252577896} step=11628
2022-04-20 17:31.08 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172903/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:31.11 [info     ] TD3PlusBC_20220420172903: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.00039736081285086293, 'time_algorithm_update': 0.008740251524406568, 'critic_loss': 159.01563950031124, 'actor_loss': 2.5639291785613834, 'time_step': 0.00920263507909942, 'td_error': 14.703777310060786, 'init_value': -82.8282699584961, 'ave_value': -65.07148393874753} step=11970
2022-04-20 17:31.11 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172903/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:31.15 [info     ] TD3PlusBC_20220420172903: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.00039550017195138316, 'time_algorithm_update': 0.008798431931880483, 'critic_loss': 160.85208758973238, 'actor_loss': 2.5640788092250713, 'time_step': 0.009259778853745489, 'td_error': 14.439633309843382, 'init_value': -82.04439544677734, 'ave_value': -65.58065798923072} step=12312
2022-04-20 17:31.15 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172903/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:31.19 [info     ] TD3PlusBC_20220420172903: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.0003912107289185998, 'time_algorithm_update': 0.008736572070428503, 'critic_loss': 162.23926573190076, 'actor_loss': 2.5643353978095695, 'time_step': 0.009192147450140345, 'td_error': 15.396581486994176, 'init_value': -81.30924224853516, 'ave_value': -65.9693471392657} step=12654
2022-04-20 17:31.19 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172903/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:31.22 [info     ] TD3PlusBC_20220420172903: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.00039917962592944764, 'time_algorithm_update': 0.0089163919638472, 'critic_loss': 163.50898300973992, 'actor_loss': 2.5645942032685753, 'time_step': 0.00937806374845449, 'td_error': 16.00953700579219, 'init_value': -84.91883850097656, 'ave_value': -66.91241906898499} step=12996
2022-04-20 17:31.22 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172903/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:31.26 [info     ] TD3PlusBC_20220420172903: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.0003947598195215415, 'time_algorithm_update': 0.00850404563703035, 'critic_loss': 164.9717289216337, 'actor_loss': 2.5648181145651296, 'time_step': 0.008960624884443673, 'td_error': 15.42868780922927, 'init_value': -83.56370544433594, 'ave_value': -67.19742335862556} step=13338
2022-04-20 17:31.26 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172903/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:31.30 [info     ] TD3PlusBC_20220420172903: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.00039382705911558273, 'time_algorithm_update': 0.008712487611157156, 'critic_loss': 166.31079275566236, 'actor_loss': 2.5649268404085035, 'time_step': 0.009175802531995271, 'td_error': 14.978920894221778, 'init_value': -81.89558410644531, 'ave_value': -67.01731837600587} step=13680
2022-04-20 17:31.30 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172903/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:31.33 [info     ] TD3PlusBC_20220420172903: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.0004037995087473016, 'time_algorithm_update': 0.0088124484346624, 'critic_loss': 167.39349936323558, 'actor_loss': 2.5648773329996923, 'time_step': 0.00928678986621879, 'td_error': 15.55733838702003, 'init_value': -84.0735092163086, 'ave_value': -67.75606633981187} step=14022
2022-04-20 17:31.33 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172903/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:31.37 [info     ] TD3PlusBC_20220420172903: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.00039754276387175624, 'time_algorithm_update': 0.008473263149373016, 'critic_loss': 167.96299632111487, 'actor_loss': 2.564986656980905, 'time_step': 0.008936921755472818, 'td_error': 16.20438399860246, 'init_value': -83.10678100585938, 'ave_value': -67.75969547268708} step=14364
2022-04-20 17:31.37 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172903/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:31.41 [info     ] TD3PlusBC_20220420172903: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.00040135258122494345, 'time_algorithm_update': 0.008846138653002288, 'critic_loss': 168.9995656152915, 'actor_loss': 2.565096580494217, 'time_step': 0.009314032325967711, 'td_error': 15.8965466864453, 'init_value': -84.50495910644531, 'ave_value': -68.45062468168435} step=14706
2022-04-20 17:31.41 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172903/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:31.44 [info     ] TD3PlusBC_20220420172903: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.00039876831902398006, 'time_algorithm_update': 0.008388338730349177, 'critic_loss': 170.34194009345873, 'actor_loss': 2.5659869944143017, 'time_step': 0.008854532102395219, 'td_error': 17.574058383838047, 'init_value': -82.1364517211914, 'ave_value': -68.54789383335073} step=15048
2022-04-20 17:31.44 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172903/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:31.48 [info     ] TD3PlusBC_20220420172903: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.00039701573332847907, 'time_algorithm_update': 0.008934413480479814, 'critic_loss': 171.28717250712432, 'actor_loss': 2.5669443175109508, 'time_step': 0.009396035071702032, 'td_error': 17.82800103958791, 'init_value': -83.4766616821289, 'ave_value': -68.78074374607655} step=15390
2022-04-20 17:31.48 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172903/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:31.52 [info     ] TD3PlusBC_20220420172903: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.00040318254838910017, 'time_algorithm_update': 0.008812988710682295, 'critic_loss': 171.37320858693263, 'actor_loss': 2.5663855717196102, 'time_step': 0.009279452569303457, 'td_error': 16.810783793126962, 'init_value': -83.63917541503906, 'ave_value': -68.93953928572822} step=15732
2022-04-20 17:31.52 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172903/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:31.55 [info     ] TD3PlusBC_20220420172903: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.00039379847677130446, 'time_algorithm_update': 0.008311949278178968, 'critic_loss': 172.41135158873442, 'actor_loss': 2.567000080967507, 'time_step': 0.008771761119017127, 'td_error': 16.115534584536054, 'init_value': -83.30104064941406, 'ave_value': -69.33268108137447} step=16074
2022-04-20 17:31.55 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172903/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:31.59 [info     ] TD3PlusBC_20220420172903: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.0003999778401781941, 'time_algorithm_update': 0.008779752324199119, 'critic_loss': 172.71446629574424, 'actor_loss': 2.566281359098111, 'time_step': 0.009239760755795485, 'td_error': 16.731978778347354, 'init_value': -83.77376556396484, 'ave_value': -69.7493476356237} step=16416
2022-04-20 17:31.59 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172903/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:32.02 [info     ] TD3PlusBC_20220420172903: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.00040073422660604554, 'time_algorithm_update': 0.008358285441036112, 'critic_loss': 173.33757570054797, 'actor_loss': 2.5673364798227944, 'time_step': 0.008822557521842377, 'td_error': 17.192222155512177, 'init_value': -82.70463562011719, 'ave_value': -69.26786824692493} step=16758
2022-04-20 17:32.02 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172903/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:32.06 [info     ] TD3PlusBC_20220420172903: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.0003931236545941983, 'time_algorithm_update': 0.008931212955050997, 'critic_loss': 173.75025446930823, 'actor_loss': 2.567581399839524, 'time_step': 0.009387115986026519, 'td_error': 16.220827017012788, 'init_value': -80.89338684082031, 'ave_value': -69.60839807149921} step=17100
2022-04-20 17:32.06 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420172903/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 102 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.28189108e-01  

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 17:32.07 [info     ] FQE_20220420173206: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00016323342380753482, 'time_algorithm_update': 0.00490830605288586, 'loss': 0.007701574345063193, 'time_step': 0.005144093410078302, 'init_value': -0.10485000163316727, 'ave_value': -0.08680417505512433, 'soft_opc': nan} step=166




2022-04-20 17:32.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173206/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:32.08 [info     ] FQE_20220420173206: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00016088083565953267, 'time_algorithm_update': 0.004958148462226592, 'loss': 0.005943750909316432, 'time_step': 0.005193229181220733, 'init_value': -0.23053672909736633, 'ave_value': -0.15243355834915362, 'soft_opc': nan} step=332




2022-04-20 17:32.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173206/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:32.09 [info     ] FQE_20220420173206: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00015916307288480093, 'time_algorithm_update': 0.0051059722900390625, 'loss': 0.004929395250875099, 'time_step': 0.005337223949202572, 'init_value': -0.3198234438896179, 'ave_value': -0.20999278739925373, 'soft_opc': nan} step=498




2022-04-20 17:32.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173206/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:32.10 [info     ] FQE_20220420173206: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00016184025500194137, 'time_algorithm_update': 0.004932870347815824, 'loss': 0.004758738685421735, 'time_step': 0.0051669327609510304, 'init_value': -0.44321006536483765, 'ave_value': -0.2774480624099237, 'soft_opc': nan} step=664




2022-04-20 17:32.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173206/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:32.11 [info     ] FQE_20220420173206: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00015573760113084172, 'time_algorithm_update': 0.004035981304674263, 'loss': 0.004463632306342114, 'time_step': 0.004260350422686841, 'init_value': -0.553390383720398, 'ave_value': -0.34316604758176517, 'soft_opc': nan} step=830




2022-04-20 17:32.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173206/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:32.12 [info     ] FQE_20220420173206: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.0001610546226961067, 'time_algorithm_update': 0.005107833678463855, 'loss': 0.004135285835174552, 'time_step': 0.005341075989137213, 'init_value': -0.6012216806411743, 'ave_value': -0.3578425122299106, 'soft_opc': nan} step=996




2022-04-20 17:32.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173206/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:32.13 [info     ] FQE_20220420173206: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00016366142824471714, 'time_algorithm_update': 0.00501197217458702, 'loss': 0.004063525915998651, 'time_step': 0.005247525421969862, 'init_value': -0.6820968389511108, 'ave_value': -0.3899806826106936, 'soft_opc': nan} step=1162




2022-04-20 17:32.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173206/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:32.14 [info     ] FQE_20220420173206: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00015729163066450372, 'time_algorithm_update': 0.00503936876733619, 'loss': 0.003795563882912498, 'time_step': 0.005270472492080137, 'init_value': -0.7887839674949646, 'ave_value': -0.45343782124654936, 'soft_opc': nan} step=1328




2022-04-20 17:32.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173206/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:32.15 [info     ] FQE_20220420173206: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.0001591099313942783, 'time_algorithm_update': 0.005015366048697966, 'loss': 0.0037782205302402914, 'time_step': 0.005247045712298657, 'init_value': -0.8714849948883057, 'ave_value': -0.48940965172515866, 'soft_opc': nan} step=1494




2022-04-20 17:32.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173206/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:32.16 [info     ] FQE_20220420173206: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00016274653285382743, 'time_algorithm_update': 0.0050400682242519885, 'loss': 0.00382192425236263, 'time_step': 0.0052742211215467336, 'init_value': -0.9866795539855957, 'ave_value': -0.546207270998642, 'soft_opc': nan} step=1660




2022-04-20 17:32.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173206/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:32.17 [info     ] FQE_20220420173206: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00016793860010353914, 'time_algorithm_update': 0.004990717014634466, 'loss': 0.003780495373992227, 'time_step': 0.005232711872422552, 'init_value': -1.0871098041534424, 'ave_value': -0.6093709724495893, 'soft_opc': nan} step=1826




2022-04-20 17:32.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173206/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:32.18 [info     ] FQE_20220420173206: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00016022446643875306, 'time_algorithm_update': 0.00505281930946442, 'loss': 0.003779494979261724, 'time_step': 0.005289337721215673, 'init_value': -1.146907091140747, 'ave_value': -0.6398313847580203, 'soft_opc': nan} step=1992




2022-04-20 17:32.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173206/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:32.19 [info     ] FQE_20220420173206: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00016275945916233292, 'time_algorithm_update': 0.0050301953970667826, 'loss': 0.0039458282709862276, 'time_step': 0.005268239113221686, 'init_value': -1.202202320098877, 'ave_value': -0.6609222601545421, 'soft_opc': nan} step=2158




2022-04-20 17:32.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173206/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:32.19 [info     ] FQE_20220420173206: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.0001643350325435041, 'time_algorithm_update': 0.0041607905583209304, 'loss': 0.003943001570920628, 'time_step': 0.004398558513227716, 'init_value': -1.315676212310791, 'ave_value': -0.7340665685193328, 'soft_opc': nan} step=2324




2022-04-20 17:32.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173206/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:32.20 [info     ] FQE_20220420173206: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.0001642861998224833, 'time_algorithm_update': 0.004996838339840074, 'loss': 0.004135273739903133, 'time_step': 0.005236846854887813, 'init_value': -1.3687098026275635, 'ave_value': -0.7638582875266218, 'soft_opc': nan} step=2490




2022-04-20 17:32.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173206/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:32.21 [info     ] FQE_20220420173206: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00016471564051616624, 'time_algorithm_update': 0.004996881427535091, 'loss': 0.00457566465091252, 'time_step': 0.0052379901150623, 'init_value': -1.4578592777252197, 'ave_value': -0.8152315769583691, 'soft_opc': nan} step=2656




2022-04-20 17:32.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173206/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:32.22 [info     ] FQE_20220420173206: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.0001648319772927158, 'time_algorithm_update': 0.0050121962306011155, 'loss': 0.0045325639973259925, 'time_step': 0.005250482674104622, 'init_value': -1.5653457641601562, 'ave_value': -0.8925369901775575, 'soft_opc': nan} step=2822




2022-04-20 17:32.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173206/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:32.23 [info     ] FQE_20220420173206: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.0001598539122615952, 'time_algorithm_update': 0.0049350606389792565, 'loss': 0.004915735114881971, 'time_step': 0.005165979086634624, 'init_value': -1.62396240234375, 'ave_value': -0.9282284599464589, 'soft_opc': nan} step=2988




2022-04-20 17:32.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173206/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:32.24 [info     ] FQE_20220420173206: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00016289590352989105, 'time_algorithm_update': 0.005048784864954202, 'loss': 0.005451050070843783, 'time_step': 0.005283355712890625, 'init_value': -1.7384692430496216, 'ave_value': -1.0111498394200729, 'soft_opc': nan} step=3154




2022-04-20 17:32.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173206/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:32.25 [info     ] FQE_20220420173206: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00015905678990375563, 'time_algorithm_update': 0.005001329513917486, 'loss': 0.005548285749595028, 'time_step': 0.005235644708196801, 'init_value': -1.7885938882827759, 'ave_value': -1.0543391405038436, 'soft_opc': nan} step=3320




2022-04-20 17:32.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173206/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:32.26 [info     ] FQE_20220420173206: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00016282552696136106, 'time_algorithm_update': 0.004973229155483016, 'loss': 0.006017536301858701, 'time_step': 0.005213394222489323, 'init_value': -1.8700830936431885, 'ave_value': -1.1206817724905607, 'soft_opc': nan} step=3486




2022-04-20 17:32.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173206/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:32.27 [info     ] FQE_20220420173206: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.0001713813069355057, 'time_algorithm_update': 0.004997105483549187, 'loss': 0.006122708044306611, 'time_step': 0.005245597965746041, 'init_value': -1.9446204900741577, 'ave_value': -1.1604488149000047, 'soft_opc': nan} step=3652




2022-04-20 17:32.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173206/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:32.28 [info     ] FQE_20220420173206: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00016280972813985435, 'time_algorithm_update': 0.004401224205292851, 'loss': 0.006737434430782829, 'time_step': 0.004634478006018214, 'init_value': -2.063707113265991, 'ave_value': -1.271843908490353, 'soft_opc': nan} step=3818




2022-04-20 17:32.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173206/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:32.29 [info     ] FQE_20220420173206: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.0001616377428353551, 'time_algorithm_update': 0.005017241799687764, 'loss': 0.0070204932059593364, 'time_step': 0.0052528496248176295, 'init_value': -2.1233272552490234, 'ave_value': -1.3024056902037815, 'soft_opc': nan} step=3984




2022-04-20 17:32.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173206/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:32.30 [info     ] FQE_20220420173206: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00015954511711396366, 'time_algorithm_update': 0.005051628652825413, 'loss': 0.007121097908007452, 'time_step': 0.0052849758102233154, 'init_value': -2.2155275344848633, 'ave_value': -1.3650136564489086, 'soft_opc': nan} step=4150




2022-04-20 17:32.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173206/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:32.31 [info     ] FQE_20220420173206: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00016125713486269297, 'time_algorithm_update': 0.004965895629790892, 'loss': 0.007577781384552728, 'time_step': 0.0052003242883337545, 'init_value': -2.286715030670166, 'ave_value': -1.4194166480890802, 'soft_opc': nan} step=4316




2022-04-20 17:32.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173206/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:32.32 [info     ] FQE_20220420173206: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00016030633305928795, 'time_algorithm_update': 0.004985132849360087, 'loss': 0.008118743762789092, 'time_step': 0.005217780549842191, 'init_value': -2.344867706298828, 'ave_value': -1.4500209286653747, 'soft_opc': nan} step=4482




2022-04-20 17:32.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173206/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:32.33 [info     ] FQE_20220420173206: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00016394149826233647, 'time_algorithm_update': 0.005120350653866687, 'loss': 0.008606462326344013, 'time_step': 0.005363408341465226, 'init_value': -2.4371604919433594, 'ave_value': -1.5251771182033678, 'soft_opc': nan} step=4648




2022-04-20 17:32.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173206/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:32.34 [info     ] FQE_20220420173206: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00016305532800145895, 'time_algorithm_update': 0.0049986623855958505, 'loss': 0.009144751319987139, 'time_step': 0.005235949194574931, 'init_value': -2.5566086769104004, 'ave_value': -1.642695998660549, 'soft_opc': nan} step=4814




2022-04-20 17:32.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173206/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:32.35 [info     ] FQE_20220420173206: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.000160872218120529, 'time_algorithm_update': 0.005093103431793581, 'loss': 0.009438176813650405, 'time_step': 0.005332544625523579, 'init_value': -2.560670852661133, 'ave_value': -1.6235129486328947, 'soft_opc': nan} step=4980




2022-04-20 17:32.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173206/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:32.36 [info     ] FQE_20220420173206: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.0001638625041548028, 'time_algorithm_update': 0.005058025739279138, 'loss': 0.010116828026261225, 'time_step': 0.00530024608933782, 'init_value': -2.7355971336364746, 'ave_value': -1.755562761598812, 'soft_opc': nan} step=5146




2022-04-20 17:32.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173206/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:32.36 [info     ] FQE_20220420173206: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.0001619910619345056, 'time_algorithm_update': 0.0048270383513117415, 'loss': 0.010659353984295425, 'time_step': 0.005060573658311224, 'init_value': -2.862304210662842, 'ave_value': -1.8556742825818056, 'soft_opc': nan} step=5312




2022-04-20 17:32.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173206/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:32.37 [info     ] FQE_20220420173206: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00016254402068724115, 'time_algorithm_update': 0.004585995731583561, 'loss': 0.011151867133217403, 'time_step': 0.004823709108743323, 'init_value': -2.9315714836120605, 'ave_value': -1.9099478178968032, 'soft_opc': nan} step=5478




2022-04-20 17:32.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173206/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:32.38 [info     ] FQE_20220420173206: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00016506608710231552, 'time_algorithm_update': 0.00519801335162427, 'loss': 0.0116155160028566, 'time_step': 0.005439120602894978, 'init_value': -2.988560199737549, 'ave_value': -1.9312905797449587, 'soft_opc': nan} step=5644




2022-04-20 17:32.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173206/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:32.39 [info     ] FQE_20220420173206: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.0001637662749692618, 'time_algorithm_update': 0.005037898040679564, 'loss': 0.012545870276902947, 'time_step': 0.005272127059568842, 'init_value': -3.133242607116699, 'ave_value': -2.038842463550401, 'soft_opc': nan} step=5810




2022-04-20 17:32.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173206/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:32.40 [info     ] FQE_20220420173206: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00016495549535176842, 'time_algorithm_update': 0.00513387731758945, 'loss': 0.013399547704422555, 'time_step': 0.005372996789863311, 'init_value': -3.2045345306396484, 'ave_value': -2.06092738403859, 'soft_opc': nan} step=5976




2022-04-20 17:32.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173206/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:32.41 [info     ] FQE_20220420173206: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00015953506331845937, 'time_algorithm_update': 0.005044487585504371, 'loss': 0.013518874133735087, 'time_step': 0.005278903317738728, 'init_value': -3.2131075859069824, 'ave_value': -2.0404562672067366, 'soft_opc': nan} step=6142




2022-04-20 17:32.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173206/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:32.42 [info     ] FQE_20220420173206: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00016126144363219478, 'time_algorithm_update': 0.005001895398978728, 'loss': 0.013628293232745435, 'time_step': 0.005241504634719297, 'init_value': -3.2665295600891113, 'ave_value': -2.0902545916042414, 'soft_opc': nan} step=6308




2022-04-20 17:32.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173206/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:32.43 [info     ] FQE_20220420173206: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00016912494797304453, 'time_algorithm_update': 0.004996302616165345, 'loss': 0.014141051142184759, 'time_step': 0.005241226000958179, 'init_value': -3.343146800994873, 'ave_value': -2.123342794261302, 'soft_opc': nan} step=6474




2022-04-20 17:32.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173206/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:32.44 [info     ] FQE_20220420173206: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00016858922429831632, 'time_algorithm_update': 0.005107842296002859, 'loss': 0.014910268956018298, 'time_step': 0.00535318219518087, 'init_value': -3.472986936569214, 'ave_value': -2.2267293784538755, 'soft_opc': nan} step=6640




2022-04-20 17:32.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173206/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:32.45 [info     ] FQE_20220420173206: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00016194653798298663, 'time_algorithm_update': 0.005052477480417274, 'loss': 0.015632975286192608, 'time_step': 0.005286233970917851, 'init_value': -3.5560755729675293, 'ave_value': -2.281674353719001, 'soft_opc': nan} step=6806




2022-04-20 17:32.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173206/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:32.46 [info     ] FQE_20220420173206: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00015963560127350222, 'time_algorithm_update': 0.004370534276387778, 'loss': 0.016281120361288988, 'time_step': 0.004599094390869141, 'init_value': -3.6704859733581543, 'ave_value': -2.3951009364211346, 'soft_opc': nan} step=6972




2022-04-20 17:32.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173206/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:32.47 [info     ] FQE_20220420173206: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00016332390796707337, 'time_algorithm_update': 0.005004874194960996, 'loss': 0.01646039463896641, 'time_step': 0.005240372864596815, 'init_value': -3.7006688117980957, 'ave_value': -2.3915118698234177, 'soft_opc': nan} step=7138




2022-04-20 17:32.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173206/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:32.48 [info     ] FQE_20220420173206: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.0001615170972893037, 'time_algorithm_update': 0.005016777888838067, 'loss': 0.016701959787464016, 'time_step': 0.0052469423018306136, 'init_value': -3.719508171081543, 'ave_value': -2.42394951941479, 'soft_opc': nan} step=7304




2022-04-20 17:32.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173206/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:32.49 [info     ] FQE_20220420173206: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00016283558075686535, 'time_algorithm_update': 0.005028088408780385, 'loss': 0.01669242474459482, 'time_step': 0.005260918513838067, 'init_value': -3.803574800491333, 'ave_value': -2.4943791399756026, 'soft_opc': nan} step=7470




2022-04-20 17:32.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173206/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:32.50 [info     ] FQE_20220420173206: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00016406358006488845, 'time_algorithm_update': 0.005147737192820354, 'loss': 0.017873562909578568, 'time_step': 0.005387541759445007, 'init_value': -3.80342960357666, 'ave_value': -2.4514817694046847, 'soft_opc': nan} step=7636




2022-04-20 17:32.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173206/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:32.51 [info     ] FQE_20220420173206: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00016264599489878458, 'time_algorithm_update': 0.004996256655957325, 'loss': 0.018677555020416087, 'time_step': 0.005235692104661321, 'init_value': -3.8937249183654785, 'ave_value': -2.518651339106119, 'soft_opc': nan} step=7802




2022-04-20 17:32.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173206/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:32.52 [info     ] FQE_20220420173206: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00016073577375297086, 'time_algorithm_update': 0.00511425230876509, 'loss': 0.018492360040054556, 'time_step': 0.005348824592958014, 'init_value': -4.018930435180664, 'ave_value': -2.6035756167238326, 'soft_opc': nan} step=7968




2022-04-20 17:32.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173206/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:32.53 [info     ] FQE_20220420173206: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00016435370387801206, 'time_algorithm_update': 0.005005167191287121, 'loss': 0.020089537152145284, 'time_step': 0.005243498158742146, 'init_value': -4.194572448730469, 'ave_value': -2.7415571114782513, 'soft_opc': nan} step=8134




2022-04-20 17:32.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173206/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:32.54 [info     ] FQE_20220420173206: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.0001610072262315865, 'time_algorithm_update': 0.005013643977153732, 'loss': 0.021374400000996798, 'time_step': 0.005246071930391243, 'init_value': -4.240593433380127, 'ave_value': -2.7527990527012647, 'soft_opc': nan} step=8300




2022-04-20 17:32.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173206/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.43210892e-01 -3.25999953e-02
 -1.38000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.28049971e-01  6.00000000e-01  4.67532035e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.87189108e-01  2.46000047e-02
 -8.00013420e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.61927906e-02  2.08952959e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.53789108e-01  1.32000047e-02
  8.79998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -9.71586383e-02 -6.00000000e-01 -5.33093061e-02]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.39489108e-01 -4.75999953e-02
 -9.30001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  1.41298638e-01  5.43892365e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.9573146

start
[ 0.00000000e+00  7.95731469e+08  5.32108923e-02 -3.61999953e-02
 -7.30001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  6.00000000e-01  6.00000000e-01]
Read chunk # 239 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.41110892e-01  3.18000047e-02
  1.19998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -6.00000000e-01  5.97482117e-01]
Read chunk # 240 out of 4999
torch.Size([44400, 6])
2022-04-20 17:32.54 [debug    ] RoundIterator is selected.
2022-04-20 17:32.54 [info     ] Directory is created at d3rlpy_logs/FQE_20220420173254
2022-04-20 17:32.54 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 17:32.54 [debug    ] Building models...
2022-04-20 17:32.54 [debug    ] Models have been built.
2022-04-20 17:32.54 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420173254/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batc

Epoch 1/50:   0%|          | 0/355 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 17:32.56 [info     ] FQE_20220420173254: epoch=1 step=355 epoch=1 metrics={'time_sample_batch': 0.00016990581028897996, 'time_algorithm_update': 0.00502899062465614, 'loss': 0.029298432743255522, 'time_step': 0.005273708155457402, 'init_value': -0.8892775774002075, 'ave_value': -0.8945140416787858, 'soft_opc': nan} step=355




2022-04-20 17:32.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173254/model_355.pt


Epoch 2/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:32.58 [info     ] FQE_20220420173254: epoch=2 step=710 epoch=2 metrics={'time_sample_batch': 0.00017363924375722107, 'time_algorithm_update': 0.004983261269582829, 'loss': 0.026040812214495432, 'time_step': 0.00523049126208668, 'init_value': -1.898688554763794, 'ave_value': -1.8854901433451294, 'soft_opc': nan} step=710




2022-04-20 17:32.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173254/model_710.pt


Epoch 3/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:33.00 [info     ] FQE_20220420173254: epoch=3 step=1065 epoch=3 metrics={'time_sample_batch': 0.00017352775788642992, 'time_algorithm_update': 0.004986241837622414, 'loss': 0.027083880960626503, 'time_step': 0.005233389223125619, 'init_value': -2.5440165996551514, 'ave_value': -2.4300981827645995, 'soft_opc': nan} step=1065




2022-04-20 17:33.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173254/model_1065.pt


Epoch 4/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:33.02 [info     ] FQE_20220420173254: epoch=4 step=1420 epoch=4 metrics={'time_sample_batch': 0.00017479842817279655, 'time_algorithm_update': 0.004928069719126527, 'loss': 0.03237758444123705, 'time_step': 0.005176884019878549, 'init_value': -3.5137264728546143, 'ave_value': -3.3055168370098986, 'soft_opc': nan} step=1420




2022-04-20 17:33.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173254/model_1420.pt


Epoch 5/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:33.04 [info     ] FQE_20220420173254: epoch=5 step=1775 epoch=5 metrics={'time_sample_batch': 0.00016879296638596227, 'time_algorithm_update': 0.0048003344468667474, 'loss': 0.03817816213507887, 'time_step': 0.005042259458085181, 'init_value': -4.170973777770996, 'ave_value': -3.8602333740435513, 'soft_opc': nan} step=1775




2022-04-20 17:33.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173254/model_1775.pt


Epoch 6/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:33.06 [info     ] FQE_20220420173254: epoch=6 step=2130 epoch=6 metrics={'time_sample_batch': 0.00017101260977731624, 'time_algorithm_update': 0.0050300316071846115, 'loss': 0.049716707833216224, 'time_step': 0.00527372763190471, 'init_value': -5.083244323730469, 'ave_value': -4.626194028962916, 'soft_opc': nan} step=2130




2022-04-20 17:33.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173254/model_2130.pt


Epoch 7/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:33.08 [info     ] FQE_20220420173254: epoch=7 step=2485 epoch=7 metrics={'time_sample_batch': 0.00017425778885962257, 'time_algorithm_update': 0.00504385316875619, 'loss': 0.06021063296570325, 'time_step': 0.005290902500421229, 'init_value': -5.5251593589782715, 'ave_value': -4.983601443787396, 'soft_opc': nan} step=2485




2022-04-20 17:33.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173254/model_2485.pt


Epoch 8/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:33.10 [info     ] FQE_20220420173254: epoch=8 step=2840 epoch=8 metrics={'time_sample_batch': 0.00017484073907556667, 'time_algorithm_update': 0.0049678681601940744, 'loss': 0.07329112987860407, 'time_step': 0.005218032380224953, 'init_value': -6.30993127822876, 'ave_value': -5.618027699149023, 'soft_opc': nan} step=2840




2022-04-20 17:33.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173254/model_2840.pt


Epoch 9/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:33.12 [info     ] FQE_20220420173254: epoch=9 step=3195 epoch=9 metrics={'time_sample_batch': 0.00017022079145404654, 'time_algorithm_update': 0.004662061073410679, 'loss': 0.08907925052122331, 'time_step': 0.004906310497874945, 'init_value': -6.959505558013916, 'ave_value': -6.248761989812018, 'soft_opc': nan} step=3195




2022-04-20 17:33.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173254/model_3195.pt


Epoch 10/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:33.14 [info     ] FQE_20220420173254: epoch=10 step=3550 epoch=10 metrics={'time_sample_batch': 0.0001711939422177597, 'time_algorithm_update': 0.005030265996153926, 'loss': 0.10592911570319827, 'time_step': 0.005273826357344506, 'init_value': -7.727020263671875, 'ave_value': -6.936383940716738, 'soft_opc': nan} step=3550




2022-04-20 17:33.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173254/model_3550.pt


Epoch 11/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:33.16 [info     ] FQE_20220420173254: epoch=11 step=3905 epoch=11 metrics={'time_sample_batch': 0.000171562651513328, 'time_algorithm_update': 0.0050106444828946824, 'loss': 0.12098647171867566, 'time_step': 0.005254131639507455, 'init_value': -8.30721664428711, 'ave_value': -7.423923269061892, 'soft_opc': nan} step=3905




2022-04-20 17:33.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173254/model_3905.pt


Epoch 12/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:33.18 [info     ] FQE_20220420173254: epoch=12 step=4260 epoch=12 metrics={'time_sample_batch': 0.0001714478076343805, 'time_algorithm_update': 0.005046939849853516, 'loss': 0.1442763946373278, 'time_step': 0.0052894672877352, 'init_value': -8.618780136108398, 'ave_value': -7.648083376978431, 'soft_opc': nan} step=4260




2022-04-20 17:33.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173254/model_4260.pt


Epoch 13/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:33.20 [info     ] FQE_20220420173254: epoch=13 step=4615 epoch=13 metrics={'time_sample_batch': 0.00017204754789110641, 'time_algorithm_update': 0.004742467235511458, 'loss': 0.1600786284976442, 'time_step': 0.004988771760967416, 'init_value': -9.222376823425293, 'ave_value': -8.23537459500623, 'soft_opc': nan} step=4615




2022-04-20 17:33.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173254/model_4615.pt


Epoch 14/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:33.22 [info     ] FQE_20220420173254: epoch=14 step=4970 epoch=14 metrics={'time_sample_batch': 0.00017159287358673526, 'time_algorithm_update': 0.005029096737713881, 'loss': 0.1793210946128402, 'time_step': 0.005274587281992738, 'init_value': -9.727783203125, 'ave_value': -8.640263609114454, 'soft_opc': nan} step=4970




2022-04-20 17:33.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173254/model_4970.pt


Epoch 15/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:33.24 [info     ] FQE_20220420173254: epoch=15 step=5325 epoch=15 metrics={'time_sample_batch': 0.0001736426017653774, 'time_algorithm_update': 0.0050146640186578455, 'loss': 0.20365178271813292, 'time_step': 0.0052641123113497884, 'init_value': -10.363961219787598, 'ave_value': -9.168037396662676, 'soft_opc': nan} step=5325




2022-04-20 17:33.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173254/model_5325.pt


Epoch 16/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:33.26 [info     ] FQE_20220420173254: epoch=16 step=5680 epoch=16 metrics={'time_sample_batch': 0.00017238066230021733, 'time_algorithm_update': 0.005039225161915094, 'loss': 0.22610019759707886, 'time_step': 0.00528703877623652, 'init_value': -10.957977294921875, 'ave_value': -9.723738726049643, 'soft_opc': nan} step=5680




2022-04-20 17:33.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173254/model_5680.pt


Epoch 17/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:33.28 [info     ] FQE_20220420173254: epoch=17 step=6035 epoch=17 metrics={'time_sample_batch': 0.00017016303371375716, 'time_algorithm_update': 0.005049738413851026, 'loss': 0.2547921650636364, 'time_step': 0.005293546596043547, 'init_value': -11.536919593811035, 'ave_value': -10.245265447475889, 'soft_opc': nan} step=6035




2022-04-20 17:33.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173254/model_6035.pt


Epoch 18/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:33.30 [info     ] FQE_20220420173254: epoch=18 step=6390 epoch=18 metrics={'time_sample_batch': 0.0001691039179412412, 'time_algorithm_update': 0.004649234153855015, 'loss': 0.2723368672365454, 'time_step': 0.004892429835359815, 'init_value': -11.842910766601562, 'ave_value': -10.453307106445617, 'soft_opc': nan} step=6390




2022-04-20 17:33.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173254/model_6390.pt


Epoch 19/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:33.32 [info     ] FQE_20220420173254: epoch=19 step=6745 epoch=19 metrics={'time_sample_batch': 0.00017926256421586158, 'time_algorithm_update': 0.005042716147194446, 'loss': 0.3011345399307533, 'time_step': 0.005299717943433305, 'init_value': -12.57198429107666, 'ave_value': -10.994349185425783, 'soft_opc': nan} step=6745




2022-04-20 17:33.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173254/model_6745.pt


Epoch 20/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:33.34 [info     ] FQE_20220420173254: epoch=20 step=7100 epoch=20 metrics={'time_sample_batch': 0.00017441225723481514, 'time_algorithm_update': 0.005032552128106775, 'loss': 0.3271736742640045, 'time_step': 0.005282372488102443, 'init_value': -13.159896850585938, 'ave_value': -11.32491997025207, 'soft_opc': nan} step=7100




2022-04-20 17:33.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173254/model_7100.pt


Epoch 21/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:33.36 [info     ] FQE_20220420173254: epoch=21 step=7455 epoch=21 metrics={'time_sample_batch': 0.00017379572693730744, 'time_algorithm_update': 0.005084437383732325, 'loss': 0.3616897484628667, 'time_step': 0.00533221808957382, 'init_value': -13.648630142211914, 'ave_value': -11.638763513691254, 'soft_opc': nan} step=7455




2022-04-20 17:33.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173254/model_7455.pt


Epoch 22/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:33.38 [info     ] FQE_20220420173254: epoch=22 step=7810 epoch=22 metrics={'time_sample_batch': 0.00017289981036119057, 'time_algorithm_update': 0.004632085477802116, 'loss': 0.3853036723453814, 'time_step': 0.004878866168814645, 'init_value': -13.983990669250488, 'ave_value': -11.878354452590685, 'soft_opc': nan} step=7810




2022-04-20 17:33.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173254/model_7810.pt


Epoch 23/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:33.40 [info     ] FQE_20220420173254: epoch=23 step=8165 epoch=23 metrics={'time_sample_batch': 0.0001747326112129319, 'time_algorithm_update': 0.005077536005369375, 'loss': 0.4142925922562119, 'time_step': 0.00532814213927363, 'init_value': -14.491040229797363, 'ave_value': -12.144758729927213, 'soft_opc': nan} step=8165




2022-04-20 17:33.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173254/model_8165.pt


Epoch 24/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:33.42 [info     ] FQE_20220420173254: epoch=24 step=8520 epoch=24 metrics={'time_sample_batch': 0.0001759602989948971, 'time_algorithm_update': 0.005032649510343309, 'loss': 0.43892644779677964, 'time_step': 0.005285584758704817, 'init_value': -15.155671119689941, 'ave_value': -12.481478953626166, 'soft_opc': nan} step=8520




2022-04-20 17:33.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173254/model_8520.pt


Epoch 25/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:33.44 [info     ] FQE_20220420173254: epoch=25 step=8875 epoch=25 metrics={'time_sample_batch': 0.00017560972294337313, 'time_algorithm_update': 0.005054210609113667, 'loss': 0.46071444055668903, 'time_step': 0.0053069705694494115, 'init_value': -15.613008499145508, 'ave_value': -12.675349966330188, 'soft_opc': nan} step=8875




2022-04-20 17:33.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173254/model_8875.pt


Epoch 26/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:33.46 [info     ] FQE_20220420173254: epoch=26 step=9230 epoch=26 metrics={'time_sample_batch': 0.00017419868791607065, 'time_algorithm_update': 0.005119609161162041, 'loss': 0.478769409173811, 'time_step': 0.0053704214767670965, 'init_value': -16.547945022583008, 'ave_value': -13.19193988400901, 'soft_opc': nan} step=9230




2022-04-20 17:33.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173254/model_9230.pt


Epoch 27/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:33.48 [info     ] FQE_20220420173254: epoch=27 step=9585 epoch=27 metrics={'time_sample_batch': 0.0001720932168020329, 'time_algorithm_update': 0.004677910200307067, 'loss': 0.512191804550903, 'time_step': 0.004923821838808731, 'init_value': -17.326520919799805, 'ave_value': -13.681216586428183, 'soft_opc': nan} step=9585




2022-04-20 17:33.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173254/model_9585.pt


Epoch 28/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:33.50 [info     ] FQE_20220420173254: epoch=28 step=9940 epoch=28 metrics={'time_sample_batch': 0.0001744444941131162, 'time_algorithm_update': 0.00504658860220036, 'loss': 0.5424853320167937, 'time_step': 0.005297199437316035, 'init_value': -17.987455368041992, 'ave_value': -14.000587955962487, 'soft_opc': nan} step=9940




2022-04-20 17:33.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173254/model_9940.pt


Epoch 29/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:33.52 [info     ] FQE_20220420173254: epoch=29 step=10295 epoch=29 metrics={'time_sample_batch': 0.00017452777271539393, 'time_algorithm_update': 0.004996180198561977, 'loss': 0.5647417182758661, 'time_step': 0.005244813166873556, 'init_value': -18.768098831176758, 'ave_value': -14.58908778105262, 'soft_opc': nan} step=10295




2022-04-20 17:33.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173254/model_10295.pt


Epoch 30/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:33.54 [info     ] FQE_20220420173254: epoch=30 step=10650 epoch=30 metrics={'time_sample_batch': 0.00017317113742022446, 'time_algorithm_update': 0.005047599362655425, 'loss': 0.5869342556406915, 'time_step': 0.005294132904267647, 'init_value': -18.885568618774414, 'ave_value': -14.360677421035932, 'soft_opc': nan} step=10650




2022-04-20 17:33.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173254/model_10650.pt


Epoch 31/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:33.56 [info     ] FQE_20220420173254: epoch=31 step=11005 epoch=31 metrics={'time_sample_batch': 0.0001704880889032928, 'time_algorithm_update': 0.004584446759291099, 'loss': 0.6033769148126454, 'time_step': 0.004829687467763122, 'init_value': -19.72109603881836, 'ave_value': -14.768889854054423, 'soft_opc': nan} step=11005




2022-04-20 17:33.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173254/model_11005.pt


Epoch 32/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:33.58 [info     ] FQE_20220420173254: epoch=32 step=11360 epoch=32 metrics={'time_sample_batch': 0.00017334373903946137, 'time_algorithm_update': 0.005103758019460758, 'loss': 0.6363387888166266, 'time_step': 0.0053529101358333105, 'init_value': -20.371118545532227, 'ave_value': -14.927387518611194, 'soft_opc': nan} step=11360




2022-04-20 17:33.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173254/model_11360.pt


Epoch 33/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:34.00 [info     ] FQE_20220420173254: epoch=33 step=11715 epoch=33 metrics={'time_sample_batch': 0.00017602880236128687, 'time_algorithm_update': 0.005070195399539571, 'loss': 0.673412378615057, 'time_step': 0.005321485223904462, 'init_value': -21.343420028686523, 'ave_value': -15.600663704869064, 'soft_opc': nan} step=11715




2022-04-20 17:34.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173254/model_11715.pt


Epoch 34/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:34.02 [info     ] FQE_20220420173254: epoch=34 step=12070 epoch=34 metrics={'time_sample_batch': 0.00017529205537178148, 'time_algorithm_update': 0.005041235265597491, 'loss': 0.6978012041726582, 'time_step': 0.005290191945895343, 'init_value': -22.008106231689453, 'ave_value': -15.950292749663616, 'soft_opc': nan} step=12070




2022-04-20 17:34.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173254/model_12070.pt


Epoch 35/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:34.04 [info     ] FQE_20220420173254: epoch=35 step=12425 epoch=35 metrics={'time_sample_batch': 0.00017438337836467045, 'time_algorithm_update': 0.004701296712311221, 'loss': 0.7162443291658247, 'time_step': 0.004953354848942286, 'init_value': -22.32464027404785, 'ave_value': -15.91758435310477, 'soft_opc': nan} step=12425




2022-04-20 17:34.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173254/model_12425.pt


Epoch 36/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:34.06 [info     ] FQE_20220420173254: epoch=36 step=12780 epoch=36 metrics={'time_sample_batch': 0.00017235648464149152, 'time_algorithm_update': 0.005027638018970758, 'loss': 0.7100982771792882, 'time_step': 0.0052741319360867355, 'init_value': -22.63062858581543, 'ave_value': -16.1728833872403, 'soft_opc': nan} step=12780




2022-04-20 17:34.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173254/model_12780.pt


Epoch 37/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:34.08 [info     ] FQE_20220420173254: epoch=37 step=13135 epoch=37 metrics={'time_sample_batch': 0.0001682113593732807, 'time_algorithm_update': 0.00504852617290658, 'loss': 0.7332492239878211, 'time_step': 0.005292020717137296, 'init_value': -23.30514144897461, 'ave_value': -16.526797818721416, 'soft_opc': nan} step=13135




2022-04-20 17:34.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173254/model_13135.pt


Epoch 38/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:34.10 [info     ] FQE_20220420173254: epoch=38 step=13490 epoch=38 metrics={'time_sample_batch': 0.0001722275371282873, 'time_algorithm_update': 0.005000362261919908, 'loss': 0.7493117002376788, 'time_step': 0.005248320270592058, 'init_value': -23.628393173217773, 'ave_value': -16.69127881952023, 'soft_opc': nan} step=13490




2022-04-20 17:34.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173254/model_13490.pt


Epoch 39/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:34.12 [info     ] FQE_20220420173254: epoch=39 step=13845 epoch=39 metrics={'time_sample_batch': 0.00017457008361816407, 'time_algorithm_update': 0.00511966691890233, 'loss': 0.7703417799422438, 'time_step': 0.0053694704888572154, 'init_value': -23.971302032470703, 'ave_value': -16.87688523702026, 'soft_opc': nan} step=13845




2022-04-20 17:34.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173254/model_13845.pt


Epoch 40/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:34.13 [info     ] FQE_20220420173254: epoch=40 step=14200 epoch=40 metrics={'time_sample_batch': 0.0001682617294956261, 'time_algorithm_update': 0.003538269392201598, 'loss': 0.7753680383655387, 'time_step': 0.003776952582345882, 'init_value': -24.07564926147461, 'ave_value': -16.75365616009119, 'soft_opc': nan} step=14200




2022-04-20 17:34.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173254/model_14200.pt


Epoch 41/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:34.15 [info     ] FQE_20220420173254: epoch=41 step=14555 epoch=41 metrics={'time_sample_batch': 0.00016734499326893982, 'time_algorithm_update': 0.0034474399727834784, 'loss': 0.7955363221361603, 'time_step': 0.003686111074098399, 'init_value': -24.3732852935791, 'ave_value': -16.801360190527912, 'soft_opc': nan} step=14555




2022-04-20 17:34.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173254/model_14555.pt


Epoch 42/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:34.16 [info     ] FQE_20220420173254: epoch=42 step=14910 epoch=42 metrics={'time_sample_batch': 0.00016901190851775693, 'time_algorithm_update': 0.003569653336430939, 'loss': 0.8027821102350111, 'time_step': 0.003810311706972794, 'init_value': -24.81588363647461, 'ave_value': -17.229720535013282, 'soft_opc': nan} step=14910




2022-04-20 17:34.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173254/model_14910.pt


Epoch 43/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:34.18 [info     ] FQE_20220420173254: epoch=43 step=15265 epoch=43 metrics={'time_sample_batch': 0.0001690508614123707, 'time_algorithm_update': 0.0034681199302136058, 'loss': 0.8035816189111539, 'time_step': 0.003710365295410156, 'init_value': -24.951343536376953, 'ave_value': -17.311713221678787, 'soft_opc': nan} step=15265




2022-04-20 17:34.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173254/model_15265.pt


Epoch 44/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:34.19 [info     ] FQE_20220420173254: epoch=44 step=15620 epoch=44 metrics={'time_sample_batch': 0.00017102268380178533, 'time_algorithm_update': 0.0035175343634377063, 'loss': 0.8021607522865836, 'time_step': 0.003763705911770673, 'init_value': -24.8620548248291, 'ave_value': -17.202970704373975, 'soft_opc': nan} step=15620




2022-04-20 17:34.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173254/model_15620.pt


Epoch 45/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:34.20 [info     ] FQE_20220420173254: epoch=45 step=15975 epoch=45 metrics={'time_sample_batch': 0.00016693195826570753, 'time_algorithm_update': 0.0034447811019252725, 'loss': 0.8291675628385913, 'time_step': 0.0036838162113243425, 'init_value': -24.91341209411621, 'ave_value': -17.3831290388161, 'soft_opc': nan} step=15975




2022-04-20 17:34.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173254/model_15975.pt


Epoch 46/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:34.22 [info     ] FQE_20220420173254: epoch=46 step=16330 epoch=46 metrics={'time_sample_batch': 0.00016841351146429357, 'time_algorithm_update': 0.0035162724239725463, 'loss': 0.8247928752235963, 'time_step': 0.003754842785042776, 'init_value': -25.325193405151367, 'ave_value': -18.220289666863444, 'soft_opc': nan} step=16330




2022-04-20 17:34.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173254/model_16330.pt


Epoch 47/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:34.23 [info     ] FQE_20220420173254: epoch=47 step=16685 epoch=47 metrics={'time_sample_batch': 0.00016884803771972657, 'time_algorithm_update': 0.003490332482566296, 'loss': 0.8321167102686956, 'time_step': 0.0037332440765810685, 'init_value': -25.038143157958984, 'ave_value': -18.370598899499733, 'soft_opc': nan} step=16685




2022-04-20 17:34.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173254/model_16685.pt


Epoch 48/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:34.25 [info     ] FQE_20220420173254: epoch=48 step=17040 epoch=48 metrics={'time_sample_batch': 0.000167244253024249, 'time_algorithm_update': 0.0034823733316340917, 'loss': 0.8268775808979089, 'time_step': 0.0037228275352800396, 'init_value': -24.934524536132812, 'ave_value': -18.731950297712935, 'soft_opc': nan} step=17040




2022-04-20 17:34.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173254/model_17040.pt


Epoch 49/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:34.26 [info     ] FQE_20220420173254: epoch=49 step=17395 epoch=49 metrics={'time_sample_batch': 0.00016944106196013976, 'time_algorithm_update': 0.00342349133021395, 'loss': 0.8232985292626939, 'time_step': 0.0036667501422720894, 'init_value': -24.49384117126465, 'ave_value': -18.90887366244222, 'soft_opc': nan} step=17395




2022-04-20 17:34.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173254/model_17395.pt


Epoch 50/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:34.28 [info     ] FQE_20220420173254: epoch=50 step=17750 epoch=50 metrics={'time_sample_batch': 0.00016556592054770027, 'time_algorithm_update': 0.003427681452791456, 'loss': 0.804242266478463, 'time_step': 0.0036683505689594107, 'init_value': -24.18825912475586, 'ave_value': -19.048182949434818, 'soft_opc': nan} step=17750




2022-04-20 17:34.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173254/model_17750.pt
search iteration:  25
using hyper params:  [0.009339115758230683, 0.0011312181152706919, 2.9964011475234506e-05, 3]
2022-04-20 17:34.28 [debug    ] RoundIterator is selected.
2022-04-20 17:34.28 [info     ] Directory is created at d3rlpy_logs/TD3PlusBC_20220420173428
2022-04-20 17:34.28 [debug    ] Fitting scaler...              scaler=standard
2022-04-20 17:34.28 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 17:34.28 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 17:34.28 [info     ] Parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173428/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.009339115758

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:34.31 [info     ] TD3PlusBC_20220420173428: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.00034562816396791336, 'time_algorithm_update': 0.006750145153692592, 'critic_loss': 6.178830587375931, 'actor_loss': 2.4974218390838443, 'time_step': 0.007174783979940136, 'td_error': 0.8273345600991416, 'init_value': -4.295762062072754, 'ave_value': -2.424003513337633} step=342
2022-04-20 17:34.31 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173428/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:34.34 [info     ] TD3PlusBC_20220420173428: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.00033846793816103574, 'time_algorithm_update': 0.0067512849618119805, 'critic_loss': 1.33285169895978, 'actor_loss': 2.342851684804548, 'time_step': 0.007161245011446769, 'td_error': 0.861405668851457, 'init_value': -6.2688775062561035, 'ave_value': -3.5984811145664812} step=684
2022-04-20 17:34.34 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173428/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:34.36 [info     ] TD3PlusBC_20220420173428: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.00034270091363561083, 'time_algorithm_update': 0.00672085103932877, 'critic_loss': 1.717701995146205, 'actor_loss': 2.325838829341688, 'time_step': 0.007138342188115706, 'td_error': 0.9332826300881876, 'init_value': -8.257718086242676, 'ave_value': -4.711679152999753} step=1026
2022-04-20 17:34.36 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173428/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:34.39 [info     ] TD3PlusBC_20220420173428: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.0003389440781888906, 'time_algorithm_update': 0.006708950327153792, 'critic_loss': 2.1830995549061143, 'actor_loss': 2.31398802472834, 'time_step': 0.00712235629209998, 'td_error': 1.0296741979285715, 'init_value': -10.424978256225586, 'ave_value': -5.978664199813722} step=1368
2022-04-20 17:34.39 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173428/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:34.42 [info     ] TD3PlusBC_20220420173428: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.00034035297862270423, 'time_algorithm_update': 0.0066927838743778695, 'critic_loss': 2.741626051957147, 'actor_loss': 2.3151287126262288, 'time_step': 0.007111076025934944, 'td_error': 1.1702790728072854, 'init_value': -12.604220390319824, 'ave_value': -7.240658467300069} step=1710
2022-04-20 17:34.42 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173428/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:34.45 [info     ] TD3PlusBC_20220420173428: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.00034724759776689855, 'time_algorithm_update': 0.0067747050558614455, 'critic_loss': 3.284153587114044, 'actor_loss': 2.304769235744811, 'time_step': 0.007198839159736856, 'td_error': 1.329999261753073, 'init_value': -14.759458541870117, 'ave_value': -8.443380707875901} step=2052
2022-04-20 17:34.45 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173428/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:34.48 [info     ] TD3PlusBC_20220420173428: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.00034333878790425974, 'time_algorithm_update': 0.00674786274893242, 'critic_loss': 3.8323000662508067, 'actor_loss': 2.3076394942768834, 'time_step': 0.007166200213962131, 'td_error': 1.5200625305959883, 'init_value': -17.00102996826172, 'ave_value': -9.739936532059073} step=2394
2022-04-20 17:34.48 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173428/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:34.51 [info     ] TD3PlusBC_20220420173428: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.0003442492401390745, 'time_algorithm_update': 0.006789037358691121, 'critic_loss': 4.4269036486832025, 'actor_loss': 2.3091305598878025, 'time_step': 0.007209883098713836, 'td_error': 1.7548671899612214, 'init_value': -19.268224716186523, 'ave_value': -11.040742212544334} step=2736
2022-04-20 17:34.51 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173428/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:34.53 [info     ] TD3PlusBC_20220420173428: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.00034053074686150804, 'time_algorithm_update': 0.006197396077607807, 'critic_loss': 5.062517621712378, 'actor_loss': 2.3082872677964774, 'time_step': 0.006615089394195735, 'td_error': 1.9877971696200847, 'init_value': -21.27260971069336, 'ave_value': -12.164833625221377} step=3078
2022-04-20 17:34.53 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173428/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:34.56 [info     ] TD3PlusBC_20220420173428: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.0003451596923738892, 'time_algorithm_update': 0.006785781759964793, 'critic_loss': 5.675529448086755, 'actor_loss': 2.307398754253722, 'time_step': 0.007210537006980495, 'td_error': 2.2590866973135797, 'init_value': -23.380043029785156, 'ave_value': -13.383372284913717} step=3420
2022-04-20 17:34.56 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173428/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:34.59 [info     ] TD3PlusBC_20220420173428: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.00034356953804953055, 'time_algorithm_update': 0.006689739506147061, 'critic_loss': 6.3497063496656585, 'actor_loss': 2.3090770495565316, 'time_step': 0.00710944055813795, 'td_error': 2.532269497991796, 'init_value': -25.401941299438477, 'ave_value': -14.561774937460317} step=3762
2022-04-20 17:34.59 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173428/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:35.02 [info     ] TD3PlusBC_20220420173428: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.00034358348065649557, 'time_algorithm_update': 0.006704957861649363, 'critic_loss': 6.958599403587698, 'actor_loss': 2.3038004225457622, 'time_step': 0.007127188102543702, 'td_error': 2.814383160598512, 'init_value': -27.254846572875977, 'ave_value': -15.595886800455366} step=4104
2022-04-20 17:35.02 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173428/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:35.05 [info     ] TD3PlusBC_20220420173428: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.00034282360857690287, 'time_algorithm_update': 0.006768061403642621, 'critic_loss': 7.6615150874121145, 'actor_loss': 2.3055351812239975, 'time_step': 0.0071878028891937075, 'td_error': 3.1389107954311273, 'init_value': -29.316492080688477, 'ave_value': -16.74912695538324} step=4446
2022-04-20 17:35.05 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173428/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:35.08 [info     ] TD3PlusBC_20220420173428: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.00034062137380678055, 'time_algorithm_update': 0.006758981280856662, 'critic_loss': 8.410343467840674, 'actor_loss': 2.305154179969029, 'time_step': 0.007177602477938111, 'td_error': 3.4467464409419577, 'init_value': -31.138538360595703, 'ave_value': -17.79462351506874} step=4788
2022-04-20 17:35.08 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173428/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:35.10 [info     ] TD3PlusBC_20220420173428: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.000347134662650482, 'time_algorithm_update': 0.006783486806858353, 'critic_loss': 9.101912583872588, 'actor_loss': 2.30391828497948, 'time_step': 0.007209608429356625, 'td_error': 3.748579803366114, 'init_value': -33.002498626708984, 'ave_value': -18.85803731437139} step=5130
2022-04-20 17:35.11 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173428/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:35.13 [info     ] TD3PlusBC_20220420173428: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.0003389510494923731, 'time_algorithm_update': 0.006656561678613139, 'critic_loss': 9.963105535646628, 'actor_loss': 2.3042086662604793, 'time_step': 0.0070732016312448605, 'td_error': 4.108040977348067, 'init_value': -34.836090087890625, 'ave_value': -19.870413795248353} step=5472
2022-04-20 17:35.13 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173428/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:35.16 [info     ] TD3PlusBC_20220420173428: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.00034145862735502903, 'time_algorithm_update': 0.006751761798970184, 'critic_loss': 10.823663298149555, 'actor_loss': 2.307247563412315, 'time_step': 0.007166421204282527, 'td_error': 4.413186829921853, 'init_value': -36.5423469543457, 'ave_value': -20.812863341560547} step=5814
2022-04-20 17:35.16 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173428/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:35.19 [info     ] TD3PlusBC_20220420173428: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.00034079844491523607, 'time_algorithm_update': 0.006750193952816968, 'critic_loss': 11.743599688797666, 'actor_loss': 2.3099835950728744, 'time_step': 0.0071629125472397834, 'td_error': 4.752265855731075, 'init_value': -38.31732940673828, 'ave_value': -21.892468814016866} step=6156
2022-04-20 17:35.19 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173428/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:35.22 [info     ] TD3PlusBC_20220420173428: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.00034302298785650245, 'time_algorithm_update': 0.006747418676900585, 'critic_loss': 12.694344268904793, 'actor_loss': 2.3054181637122615, 'time_step': 0.0071612227032756244, 'td_error': 5.069363783838403, 'init_value': -39.96831512451172, 'ave_value': -22.76560633979398} step=6498
2022-04-20 17:35.22 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173428/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:35.25 [info     ] TD3PlusBC_20220420173428: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.00034606456756591797, 'time_algorithm_update': 0.006860956811068351, 'critic_loss': 13.7402629210935, 'actor_loss': 2.3027765918196295, 'time_step': 0.007276350294637401, 'td_error': 5.4104355028881095, 'init_value': -41.5443115234375, 'ave_value': -23.64485557468176} step=6840
2022-04-20 17:35.25 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173428/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:35.28 [info     ] TD3PlusBC_20220420173428: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.0003438909151400739, 'time_algorithm_update': 0.006755613444144265, 'critic_loss': 14.956555797342668, 'actor_loss': 2.307964663756521, 'time_step': 0.007176232616803799, 'td_error': 5.728735366535214, 'init_value': -43.20527267456055, 'ave_value': -24.53173015672655} step=7182
2022-04-20 17:35.28 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173428/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:35.31 [info     ] TD3PlusBC_20220420173428: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.0003435507155301278, 'time_algorithm_update': 0.0066758882232576785, 'critic_loss': 16.113335334766678, 'actor_loss': 2.3060245416317766, 'time_step': 0.007092627168398852, 'td_error': 6.031748290072614, 'init_value': -45.087791442871094, 'ave_value': -25.53815006676541} step=7524
2022-04-20 17:35.31 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173428/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:35.33 [info     ] TD3PlusBC_20220420173428: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.0003412160259938379, 'time_algorithm_update': 0.006733738190946523, 'critic_loss': 17.438332103149236, 'actor_loss': 2.306814720756129, 'time_step': 0.007145616046169348, 'td_error': 6.374779242793435, 'init_value': -46.50153350830078, 'ave_value': -26.37158421275013} step=7866
2022-04-20 17:35.33 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173428/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:35.36 [info     ] TD3PlusBC_20220420173428: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.0003395742840237088, 'time_algorithm_update': 0.006756279900757193, 'critic_loss': 18.8390530759131, 'actor_loss': 2.3068921775148628, 'time_step': 0.007166427478455661, 'td_error': 6.712749179154972, 'init_value': -48.37852096557617, 'ave_value': -27.34089519709634} step=8208
2022-04-20 17:35.36 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173428/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:35.39 [info     ] TD3PlusBC_20220420173428: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.0003405851230286715, 'time_algorithm_update': 0.0067967629572104294, 'critic_loss': 20.417155299270362, 'actor_loss': 2.3083830409579806, 'time_step': 0.00720712037114372, 'td_error': 7.002082928547238, 'init_value': -49.6806755065918, 'ave_value': -28.135945612153936} step=8550
2022-04-20 17:35.39 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173428/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:35.42 [info     ] TD3PlusBC_20220420173428: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.00033684571584065753, 'time_algorithm_update': 0.006744013195149383, 'critic_loss': 21.94235192404853, 'actor_loss': 2.3084157391598352, 'time_step': 0.0071503167961075986, 'td_error': 7.313258916482569, 'init_value': -51.16337203979492, 'ave_value': -28.894559472690982} step=8892
2022-04-20 17:35.42 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173428/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:35.45 [info     ] TD3PlusBC_20220420173428: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.0003472078613370483, 'time_algorithm_update': 0.006800453565274066, 'critic_loss': 23.5764319436592, 'actor_loss': 2.3072531139641477, 'time_step': 0.007223663971438045, 'td_error': 7.618240144042561, 'init_value': -52.68833541870117, 'ave_value': -29.793420294723475} step=9234
2022-04-20 17:35.45 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173428/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:35.48 [info     ] TD3PlusBC_20220420173428: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.0003454824637251291, 'time_algorithm_update': 0.00673650440416838, 'critic_loss': 25.24391762136716, 'actor_loss': 2.3130809466044107, 'time_step': 0.007154881605627941, 'td_error': 7.916548088102461, 'init_value': -54.2642936706543, 'ave_value': -30.574200486419723} step=9576
2022-04-20 17:35.48 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173428/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:35.51 [info     ] TD3PlusBC_20220420173428: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.00034175769627442835, 'time_algorithm_update': 0.0067897825910334005, 'critic_loss': 27.040213356241146, 'actor_loss': 2.3110461555726345, 'time_step': 0.0072044008656551965, 'td_error': 8.164198410365746, 'init_value': -55.3951530456543, 'ave_value': -31.14502039393754} step=9918
2022-04-20 17:35.51 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173428/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:35.53 [info     ] TD3PlusBC_20220420173428: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.0003400525154426084, 'time_algorithm_update': 0.006743797781871773, 'critic_loss': 28.9632193219592, 'actor_loss': 2.3130932370124504, 'time_step': 0.007155189040111519, 'td_error': 8.472657938159312, 'init_value': -57.002845764160156, 'ave_value': -32.02841232936922} step=10260
2022-04-20 17:35.53 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173428/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:35.56 [info     ] TD3PlusBC_20220420173428: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.00034392368026644164, 'time_algorithm_update': 0.00683365858089157, 'critic_loss': 30.836147913458753, 'actor_loss': 2.309025553931967, 'time_step': 0.007250085908767075, 'td_error': 8.747688417774766, 'init_value': -58.325965881347656, 'ave_value': -32.73708327425497} step=10602
2022-04-20 17:35.56 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173428/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:35.59 [info     ] TD3PlusBC_20220420173428: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.00034476860224852087, 'time_algorithm_update': 0.006779810141401681, 'critic_loss': 32.84732888595403, 'actor_loss': 2.3082513976515386, 'time_step': 0.007200149764791566, 'td_error': 8.988852644168935, 'init_value': -59.733436584472656, 'ave_value': -33.4479054257246} step=10944
2022-04-20 17:35.59 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173428/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:36.02 [info     ] TD3PlusBC_20220420173428: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.00033982803947047184, 'time_algorithm_update': 0.006684858896578961, 'critic_loss': 34.61824184551573, 'actor_loss': 2.307836804473609, 'time_step': 0.007101572745027598, 'td_error': 9.22398437394317, 'init_value': -60.966590881347656, 'ave_value': -34.10792663702414} step=11286
2022-04-20 17:36.02 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173428/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:36.05 [info     ] TD3PlusBC_20220420173428: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.00033965724253515054, 'time_algorithm_update': 0.006765533608999866, 'critic_loss': 36.641210243715875, 'actor_loss': 2.309956975847657, 'time_step': 0.007177994962324176, 'td_error': 9.481422828792002, 'init_value': -62.351219177246094, 'ave_value': -34.780621792208066} step=11628
2022-04-20 17:36.05 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173428/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:36.08 [info     ] TD3PlusBC_20220420173428: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.00034624512432611474, 'time_algorithm_update': 0.0067702357531988135, 'critic_loss': 38.84398428041335, 'actor_loss': 2.3114530552200407, 'time_step': 0.007187623726694207, 'td_error': 9.770825375238429, 'init_value': -63.24354934692383, 'ave_value': -35.41135612828842} step=11970
2022-04-20 17:36.08 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173428/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:36.11 [info     ] TD3PlusBC_20220420173428: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.00034491011970921566, 'time_algorithm_update': 0.006841122755530285, 'critic_loss': 40.80963599890993, 'actor_loss': 2.309915534236975, 'time_step': 0.00725966169123064, 'td_error': 9.964425963058774, 'init_value': -64.42677307128906, 'ave_value': -36.00790666018294} step=12312
2022-04-20 17:36.11 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173428/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:36.14 [info     ] TD3PlusBC_20220420173428: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.0003457355220415439, 'time_algorithm_update': 0.006766146386575978, 'critic_loss': 43.033024654053804, 'actor_loss': 2.312394967553211, 'time_step': 0.007185662001894231, 'td_error': 10.24582063550126, 'init_value': -65.92212677001953, 'ave_value': -36.81237255822884} step=12654
2022-04-20 17:36.14 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173428/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:36.16 [info     ] TD3PlusBC_20220420173428: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.00034674357252511363, 'time_algorithm_update': 0.0068021678088004125, 'critic_loss': 45.24264766180027, 'actor_loss': 2.3139024268813997, 'time_step': 0.0072242342240629144, 'td_error': 10.420324099229678, 'init_value': -66.64990234375, 'ave_value': -37.19473463126676} step=12996
2022-04-20 17:36.16 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173428/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:36.20 [info     ] TD3PlusBC_20220420173428: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.0003523303751359906, 'time_algorithm_update': 0.008877687286912348, 'critic_loss': 47.507659131323386, 'actor_loss': 2.313809210794014, 'time_step': 0.009304449572200663, 'td_error': 10.686672975577165, 'init_value': -68.2527847290039, 'ave_value': -37.91081803108591} step=13338
2022-04-20 17:36.20 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173428/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:36.24 [info     ] TD3PlusBC_20220420173428: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.00034902737154598123, 'time_algorithm_update': 0.008867972078379135, 'critic_loss': 49.73911815777159, 'actor_loss': 2.318087021509806, 'time_step': 0.009290548095926207, 'td_error': 10.928236903139844, 'init_value': -69.27091217041016, 'ave_value': -38.566921280002525} step=13680
2022-04-20 17:36.24 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173428/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:36.27 [info     ] TD3PlusBC_20220420173428: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.00034680840564750093, 'time_algorithm_update': 0.008547509622852705, 'critic_loss': 52.069634197748194, 'actor_loss': 2.3183707842352796, 'time_step': 0.008972521413836563, 'td_error': 11.138322657205448, 'init_value': -70.0054931640625, 'ave_value': -39.03339953576928} step=14022
2022-04-20 17:36.28 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173428/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:36.31 [info     ] TD3PlusBC_20220420173428: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.00034439842603360005, 'time_algorithm_update': 0.008960475698549148, 'critic_loss': 54.44088319031118, 'actor_loss': 2.3140854835510254, 'time_step': 0.00938085993828132, 'td_error': 11.290721472747633, 'init_value': -70.79576110839844, 'ave_value': -39.58459928059204} step=14364
2022-04-20 17:36.31 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173428/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:36.35 [info     ] TD3PlusBC_20220420173428: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.00035226554201360337, 'time_algorithm_update': 0.008837196562025283, 'critic_loss': 56.779824597096585, 'actor_loss': 2.3169196614047936, 'time_step': 0.009267229085777238, 'td_error': 11.558378723244335, 'init_value': -71.70391845703125, 'ave_value': -40.03806515946456} step=14706
2022-04-20 17:36.35 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173428/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:36.38 [info     ] TD3PlusBC_20220420173428: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.0003488126553987202, 'time_algorithm_update': 0.009002758048431219, 'critic_loss': 59.32485631753129, 'actor_loss': 2.3162290603793854, 'time_step': 0.009425108195745458, 'td_error': 11.645368034750877, 'init_value': -72.50321197509766, 'ave_value': -40.40142487730604} step=15048
2022-04-20 17:36.39 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173428/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:36.42 [info     ] TD3PlusBC_20220420173428: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.000333890580294425, 'time_algorithm_update': 0.008726209924932112, 'critic_loss': 61.662388996771206, 'actor_loss': 2.3156963454352484, 'time_step': 0.009133651242618673, 'td_error': 11.847361573973298, 'init_value': -73.64913177490234, 'ave_value': -41.03844896268049} step=15390
2022-04-20 17:36.42 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173428/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:36.46 [info     ] TD3PlusBC_20220420173428: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.00030468709287587663, 'time_algorithm_update': 0.00800964497683341, 'critic_loss': 64.13007628011425, 'actor_loss': 2.3166519312830696, 'time_step': 0.008380064490245797, 'td_error': 12.046600365367212, 'init_value': -74.78826904296875, 'ave_value': -41.66341480118172} step=15732
2022-04-20 17:36.46 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173428/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:36.49 [info     ] TD3PlusBC_20220420173428: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.000332029242264597, 'time_algorithm_update': 0.008659338393406562, 'critic_loss': 67.00114186604817, 'actor_loss': 2.3147618324435943, 'time_step': 0.009063548511928983, 'td_error': 12.156537018283608, 'init_value': -75.15385437011719, 'ave_value': -41.91330113462625} step=16074
2022-04-20 17:36.49 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173428/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:36.53 [info     ] TD3PlusBC_20220420173428: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.0003449442790962799, 'time_algorithm_update': 0.008794230327271578, 'critic_loss': 69.50549817224692, 'actor_loss': 2.3203506595210026, 'time_step': 0.009214306435389825, 'td_error': 12.357790236472503, 'init_value': -76.2426986694336, 'ave_value': -42.41843069013921} step=16416
2022-04-20 17:36.53 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173428/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:36.56 [info     ] TD3PlusBC_20220420173428: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.0003459551181012427, 'time_algorithm_update': 0.008897867816233496, 'critic_loss': 72.12535456607216, 'actor_loss': 2.3191795028441136, 'time_step': 0.009321578761987519, 'td_error': 12.551621614936755, 'init_value': -77.13334655761719, 'ave_value': -42.977937925561996} step=16758
2022-04-20 17:36.56 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173428/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:37.00 [info     ] TD3PlusBC_20220420173428: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.0003479663391559445, 'time_algorithm_update': 0.008838619405066061, 'critic_loss': 74.71613693237305, 'actor_loss': 2.3174895933497024, 'time_step': 0.009263265202617088, 'td_error': 12.665515085372833, 'init_value': -77.93502044677734, 'ave_value': -43.48289508885922} step=17100
2022-04-20 17:37.00 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173428/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 102 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.28189108e-01  

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 17:37.01 [info     ] FQE_20220420173700: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00016045426747885095, 'time_algorithm_update': 0.004957094249955143, 'loss': 0.009173951714849436, 'time_step': 0.005192437803888896, 'init_value': -0.5819200277328491, 'ave_value': -0.4852809524482435, 'soft_opc': nan} step=166




2022-04-20 17:37.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173700/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:37.02 [info     ] FQE_20220420173700: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00015775841402720255, 'time_algorithm_update': 0.0040646374943744705, 'loss': 0.006677389802714159, 'time_step': 0.00429175847984222, 'init_value': -0.7375706434249878, 'ave_value': -0.5684531032233625, 'soft_opc': nan} step=332




2022-04-20 17:37.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173700/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:37.03 [info     ] FQE_20220420173700: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00016143953943827065, 'time_algorithm_update': 0.005092195717685194, 'loss': 0.006084251636919756, 'time_step': 0.005324092256017478, 'init_value': -0.778762936592102, 'ave_value': -0.571389340636161, 'soft_opc': nan} step=498




2022-04-20 17:37.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173700/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:37.04 [info     ] FQE_20220420173700: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00016129016876220703, 'time_algorithm_update': 0.0049470792333763764, 'loss': 0.0062010422691209125, 'time_step': 0.005182773233896278, 'init_value': -0.8191455006599426, 'ave_value': -0.5660623496985651, 'soft_opc': nan} step=664




2022-04-20 17:37.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173700/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:37.05 [info     ] FQE_20220420173700: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00016294617250741246, 'time_algorithm_update': 0.005139520369380353, 'loss': 0.005898866293312286, 'time_step': 0.005373643105288586, 'init_value': -0.8422256708145142, 'ave_value': -0.5507233989903251, 'soft_opc': nan} step=830




2022-04-20 17:37.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173700/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:37.06 [info     ] FQE_20220420173700: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00016324060509003788, 'time_algorithm_update': 0.005076867988310665, 'loss': 0.005651257570316813, 'time_step': 0.005311816571706749, 'init_value': -0.8661980628967285, 'ave_value': -0.5588937840416087, 'soft_opc': nan} step=996




2022-04-20 17:37.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173700/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:37.07 [info     ] FQE_20220420173700: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00016240183129368057, 'time_algorithm_update': 0.005015236785612911, 'loss': 0.005423173231229815, 'time_step': 0.0052512180374329346, 'init_value': -0.8999456167221069, 'ave_value': -0.5742332151075741, 'soft_opc': nan} step=1162




2022-04-20 17:37.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173700/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:37.08 [info     ] FQE_20220420173700: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.0001645777598921075, 'time_algorithm_update': 0.005101143595684005, 'loss': 0.005174638281284596, 'time_step': 0.00533844045845859, 'init_value': -0.9944626688957214, 'ave_value': -0.6293789628725331, 'soft_opc': nan} step=1328




2022-04-20 17:37.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173700/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:37.09 [info     ] FQE_20220420173700: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00016386394041130342, 'time_algorithm_update': 0.005032311002892184, 'loss': 0.005036658342332995, 'time_step': 0.005271091518631901, 'init_value': -0.9757657051086426, 'ave_value': -0.6092519268997618, 'soft_opc': nan} step=1494




2022-04-20 17:37.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173700/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:37.10 [info     ] FQE_20220420173700: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00016643914831690042, 'time_algorithm_update': 0.004983680794037968, 'loss': 0.005034076624987535, 'time_step': 0.005226478519209896, 'init_value': -0.972917914390564, 'ave_value': -0.5871577749716806, 'soft_opc': nan} step=1660




2022-04-20 17:37.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173700/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:37.10 [info     ] FQE_20220420173700: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00015623598213655403, 'time_algorithm_update': 0.004410075854106122, 'loss': 0.005029969959497362, 'time_step': 0.004636052143142884, 'init_value': -1.0179381370544434, 'ave_value': -0.6236642210870176, 'soft_opc': nan} step=1826




2022-04-20 17:37.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173700/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:37.11 [info     ] FQE_20220420173700: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00014661737235195665, 'time_algorithm_update': 0.004869507019778332, 'loss': 0.0050571903653712155, 'time_step': 0.005078518247029868, 'init_value': -1.0157511234283447, 'ave_value': -0.6088583171434767, 'soft_opc': nan} step=1992




2022-04-20 17:37.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173700/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:37.12 [info     ] FQE_20220420173700: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00014355814600565346, 'time_algorithm_update': 0.005034264311733016, 'loss': 0.005344873060702335, 'time_step': 0.005244226340787956, 'init_value': -1.0818995237350464, 'ave_value': -0.6532576629371794, 'soft_opc': nan} step=2158




2022-04-20 17:37.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173700/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:37.13 [info     ] FQE_20220420173700: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.0001438899212572948, 'time_algorithm_update': 0.0050020749310413035, 'loss': 0.005339279985448054, 'time_step': 0.005208690482449819, 'init_value': -1.1065869331359863, 'ave_value': -0.6580451366265078, 'soft_opc': nan} step=2324




2022-04-20 17:37.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173700/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:37.14 [info     ] FQE_20220420173700: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.0001420917281185288, 'time_algorithm_update': 0.004878252385610558, 'loss': 0.0055811040360110266, 'time_step': 0.005083675844123565, 'init_value': -1.1307801008224487, 'ave_value': -0.6664136053608345, 'soft_opc': nan} step=2490




2022-04-20 17:37.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173700/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:37.15 [info     ] FQE_20220420173700: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00015348124216838055, 'time_algorithm_update': 0.004901120461613299, 'loss': 0.006221305613310626, 'time_step': 0.005119547786482845, 'init_value': -1.2005691528320312, 'ave_value': -0.7222847006208187, 'soft_opc': nan} step=2656




2022-04-20 17:37.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173700/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:37.16 [info     ] FQE_20220420173700: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00015669127544724797, 'time_algorithm_update': 0.005030958049268608, 'loss': 0.00658528868975791, 'time_step': 0.005255871508494917, 'init_value': -1.2339725494384766, 'ave_value': -0.7247900218174265, 'soft_opc': nan} step=2822




2022-04-20 17:37.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173700/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:37.17 [info     ] FQE_20220420173700: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00016514364495334854, 'time_algorithm_update': 0.005052981606448989, 'loss': 0.007012554192606988, 'time_step': 0.005294210939522249, 'init_value': -1.2868742942810059, 'ave_value': -0.7660842929203231, 'soft_opc': nan} step=2988




2022-04-20 17:37.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173700/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:37.18 [info     ] FQE_20220420173700: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.0001649669854037733, 'time_algorithm_update': 0.005045985601034509, 'loss': 0.007536779410284327, 'time_step': 0.005284473120448101, 'init_value': -1.3673733472824097, 'ave_value': -0.8289113436934648, 'soft_opc': nan} step=3154




2022-04-20 17:37.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173700/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:37.19 [info     ] FQE_20220420173700: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00016224815184811512, 'time_algorithm_update': 0.00410705445760704, 'loss': 0.008207908099382577, 'time_step': 0.0043415563652314335, 'init_value': -1.400235652923584, 'ave_value': -0.8353616619902151, 'soft_opc': nan} step=3320




2022-04-20 17:37.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173700/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:37.20 [info     ] FQE_20220420173700: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00016441689916403898, 'time_algorithm_update': 0.005105245544249753, 'loss': 0.008399489243845848, 'time_step': 0.005344063402658485, 'init_value': -1.4604476690292358, 'ave_value': -0.8660200960934162, 'soft_opc': nan} step=3486




2022-04-20 17:37.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173700/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:37.21 [info     ] FQE_20220420173700: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00016422874956245883, 'time_algorithm_update': 0.004996884300048093, 'loss': 0.009201919852828899, 'time_step': 0.00523343574569886, 'init_value': -1.489271879196167, 'ave_value': -0.9118363112613962, 'soft_opc': nan} step=3652




2022-04-20 17:37.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173700/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:37.22 [info     ] FQE_20220420173700: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.0001629519175334149, 'time_algorithm_update': 0.005037142569760242, 'loss': 0.009982079454046866, 'time_step': 0.005275443375828755, 'init_value': -1.6064410209655762, 'ave_value': -0.9799582084556958, 'soft_opc': nan} step=3818




2022-04-20 17:37.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173700/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:37.23 [info     ] FQE_20220420173700: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00016373036855674652, 'time_algorithm_update': 0.005094668951379247, 'loss': 0.010564731339312506, 'time_step': 0.005332681069891137, 'init_value': -1.5595531463623047, 'ave_value': -0.9294676241573987, 'soft_opc': nan} step=3984




2022-04-20 17:37.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173700/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:37.24 [info     ] FQE_20220420173700: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00016668618443500563, 'time_algorithm_update': 0.0050432308610663355, 'loss': 0.01132479368594015, 'time_step': 0.005282054464501071, 'init_value': -1.6734881401062012, 'ave_value': -0.990357346483716, 'soft_opc': nan} step=4150




2022-04-20 17:37.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173700/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:37.25 [info     ] FQE_20220420173700: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00016538349978895072, 'time_algorithm_update': 0.0050927659115159365, 'loss': 0.012257598402225468, 'time_step': 0.005333568676408515, 'init_value': -1.7371118068695068, 'ave_value': -1.004304326379353, 'soft_opc': nan} step=4316




2022-04-20 17:37.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173700/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:37.26 [info     ] FQE_20220420173700: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.0001620398946555264, 'time_algorithm_update': 0.005114470619753182, 'loss': 0.012918305130260268, 'time_step': 0.005345865904566753, 'init_value': -1.7816786766052246, 'ave_value': -1.0215132831721692, 'soft_opc': nan} step=4482




2022-04-20 17:37.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173700/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:37.27 [info     ] FQE_20220420173700: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00016136772661324008, 'time_algorithm_update': 0.0051023098359625025, 'loss': 0.012134840822054904, 'time_step': 0.005336093615336591, 'init_value': -1.9360053539276123, 'ave_value': -1.1349345798964974, 'soft_opc': nan} step=4648




2022-04-20 17:37.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173700/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:37.27 [info     ] FQE_20220420173700: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.0001592320131968303, 'time_algorithm_update': 0.0043321259050484165, 'loss': 0.014844273515073695, 'time_step': 0.004564167505287263, 'init_value': -2.0317625999450684, 'ave_value': -1.1674180241519803, 'soft_opc': nan} step=4814




2022-04-20 17:37.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173700/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:37.28 [info     ] FQE_20220420173700: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00016388548425881258, 'time_algorithm_update': 0.004974707063422145, 'loss': 0.015122086200330138, 'time_step': 0.0052110387618283195, 'init_value': -1.9882619380950928, 'ave_value': -1.1086491949990527, 'soft_opc': nan} step=4980




2022-04-20 17:37.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173700/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:37.29 [info     ] FQE_20220420173700: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00016280541937035252, 'time_algorithm_update': 0.005037929638322577, 'loss': 0.016118502536092896, 'time_step': 0.005273576242377959, 'init_value': -2.0647811889648438, 'ave_value': -1.1419508088212293, 'soft_opc': nan} step=5146




2022-04-20 17:37.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173700/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:37.30 [info     ] FQE_20220420173700: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00016627685133233127, 'time_algorithm_update': 0.00505096510232213, 'loss': 0.017390576104285668, 'time_step': 0.005290801266589797, 'init_value': -2.178161382675171, 'ave_value': -1.202837288285698, 'soft_opc': nan} step=5312




2022-04-20 17:37.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173700/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:37.31 [info     ] FQE_20220420173700: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00016162194401384835, 'time_algorithm_update': 0.005002645124872047, 'loss': 0.01860386220014553, 'time_step': 0.005234662308750382, 'init_value': -2.253267765045166, 'ave_value': -1.2519078987675745, 'soft_opc': nan} step=5478




2022-04-20 17:37.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173700/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:37.32 [info     ] FQE_20220420173700: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00016293180994240633, 'time_algorithm_update': 0.005053191299898079, 'loss': 0.019735974694478763, 'time_step': 0.005289205585617617, 'init_value': -2.335632085800171, 'ave_value': -1.2773325220891485, 'soft_opc': nan} step=5644




2022-04-20 17:37.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173700/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:37.33 [info     ] FQE_20220420173700: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00016486644744873047, 'time_algorithm_update': 0.005048774811158697, 'loss': 0.019511017609662545, 'time_step': 0.005289583321077278, 'init_value': -2.4180877208709717, 'ave_value': -1.2826231849086178, 'soft_opc': nan} step=5810




2022-04-20 17:37.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173700/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:37.34 [info     ] FQE_20220420173700: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.0001649583678647696, 'time_algorithm_update': 0.0050171800406582385, 'loss': 0.02007757415027212, 'time_step': 0.005256328238062112, 'init_value': -2.4590845108032227, 'ave_value': -1.3280451112099596, 'soft_opc': nan} step=5976




2022-04-20 17:37.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173700/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:37.35 [info     ] FQE_20220420173700: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00016664022422698606, 'time_algorithm_update': 0.005057172602917774, 'loss': 0.021036787918651004, 'time_step': 0.0052979911666318595, 'init_value': -2.6264238357543945, 'ave_value': -1.4150543003871634, 'soft_opc': nan} step=6142




2022-04-20 17:37.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173700/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:37.36 [info     ] FQE_20220420173700: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00016364419316670982, 'time_algorithm_update': 0.0047974414136036335, 'loss': 0.022512547356211186, 'time_step': 0.005035647426743105, 'init_value': -2.6606333255767822, 'ave_value': -1.4228242908579272, 'soft_opc': nan} step=6308




2022-04-20 17:37.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173700/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:37.37 [info     ] FQE_20220420173700: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.0001611551606511495, 'time_algorithm_update': 0.0045253138944327115, 'loss': 0.02329747654786558, 'time_step': 0.00476090735699757, 'init_value': -2.7791907787323, 'ave_value': -1.5026759036153823, 'soft_opc': nan} step=6474




2022-04-20 17:37.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173700/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:37.38 [info     ] FQE_20220420173700: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00016433790505650532, 'time_algorithm_update': 0.005131613777344485, 'loss': 0.023992965482483922, 'time_step': 0.005369314228195742, 'init_value': -2.85750412940979, 'ave_value': -1.494473453282236, 'soft_opc': nan} step=6640




2022-04-20 17:37.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173700/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:37.39 [info     ] FQE_20220420173700: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00016598672751920768, 'time_algorithm_update': 0.005101022950137954, 'loss': 0.025821440596503754, 'time_step': 0.00533933237374547, 'init_value': -3.0374245643615723, 'ave_value': -1.6427662864730166, 'soft_opc': nan} step=6806




2022-04-20 17:37.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173700/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:37.40 [info     ] FQE_20220420173700: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00016476878200668887, 'time_algorithm_update': 0.004984262477920716, 'loss': 0.027036198107694018, 'time_step': 0.005221203149083149, 'init_value': -3.044233560562134, 'ave_value': -1.6104966183503469, 'soft_opc': nan} step=6972




2022-04-20 17:37.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173700/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:37.41 [info     ] FQE_20220420173700: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00017067035996770285, 'time_algorithm_update': 0.004961476268538509, 'loss': 0.02798230550742809, 'time_step': 0.005207680794129889, 'init_value': -3.10030198097229, 'ave_value': -1.6421358273238749, 'soft_opc': nan} step=7138




2022-04-20 17:37.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173700/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:37.42 [info     ] FQE_20220420173700: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00016126862491469784, 'time_algorithm_update': 0.004942145692296775, 'loss': 0.02980903821228723, 'time_step': 0.0051799854600285905, 'init_value': -3.345975399017334, 'ave_value': -1.819015933398728, 'soft_opc': nan} step=7304




2022-04-20 17:37.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173700/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:37.43 [info     ] FQE_20220420173700: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00016188477895346033, 'time_algorithm_update': 0.005007320139781538, 'loss': 0.03061071621514969, 'time_step': 0.005243867276662804, 'init_value': -3.3240904808044434, 'ave_value': -1.7609203716253374, 'soft_opc': nan} step=7470




2022-04-20 17:37.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173700/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:37.44 [info     ] FQE_20220420173700: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.0001645073833235775, 'time_algorithm_update': 0.005003516932567918, 'loss': 0.031734633743370246, 'time_step': 0.005241743053298399, 'init_value': -3.5472006797790527, 'ave_value': -1.86653313694505, 'soft_opc': nan} step=7636




2022-04-20 17:37.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173700/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:37.45 [info     ] FQE_20220420173700: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00016239752252417873, 'time_algorithm_update': 0.004996977656720632, 'loss': 0.032757233003580785, 'time_step': 0.00523254383041198, 'init_value': -3.5349769592285156, 'ave_value': -1.8625835712092953, 'soft_opc': nan} step=7802




2022-04-20 17:37.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173700/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:37.45 [info     ] FQE_20220420173700: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00016164492411785815, 'time_algorithm_update': 0.004245610122221062, 'loss': 0.03425475044866059, 'time_step': 0.004478012222841561, 'init_value': -3.612490653991699, 'ave_value': -1.8778584279254213, 'soft_opc': nan} step=7968




2022-04-20 17:37.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173700/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:37.46 [info     ] FQE_20220420173700: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00016508619469332407, 'time_algorithm_update': 0.005081183939095003, 'loss': 0.03528274272018707, 'time_step': 0.005320368042911391, 'init_value': -3.656468391418457, 'ave_value': -1.8535654199485843, 'soft_opc': nan} step=8134




2022-04-20 17:37.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173700/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:37.47 [info     ] FQE_20220420173700: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00016033649444580078, 'time_algorithm_update': 0.005029725741191083, 'loss': 0.03584437219900113, 'time_step': 0.005266236971659833, 'init_value': -3.766392707824707, 'ave_value': -1.904184763281195, 'soft_opc': nan} step=8300




2022-04-20 17:37.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173700/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.43210892e-01 -3.25999953e-02
 -1.38000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.28049971e-01  6.00000000e-01  4.67532035e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.87189108e-01  2.46000047e-02
 -8.00013420e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.61927906e-02  2.08952959e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.53789108e-01  1.32000047e-02
  8.79998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -9.71586383e-02 -6.00000000e-01 -5.33093061e-02]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.39489108e-01 -4.75999953e-02
 -9.30001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  1.41298638e-01  5.43892365e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.9573146

2022-04-20 17:37.48 [info     ] Directory is created at d3rlpy_logs/FQE_20220420173748
2022-04-20 17:37.48 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 17:37.48 [debug    ] Building models...
2022-04-20 17:37.48 [debug    ] Models have been built.
2022-04-20 17:37.48 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420173748/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 17:37.50 [info     ] FQE_20220420173748: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00016326128050338392, 'time_algorithm_update': 0.004986099032468574, 'loss': 0.033488071263702804, 'time_step': 0.005223837702773338, 'init_value': -0.9243327379226685, 'ave_value': -0.934825759958308, 'soft_opc': nan} step=344




2022-04-20 17:37.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173748/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:37.52 [info     ] FQE_20220420173748: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.0001681523267612901, 'time_algorithm_update': 0.005047653996667197, 'loss': 0.02658749978209651, 'time_step': 0.00529098580049914, 'init_value': -1.4700833559036255, 'ave_value': -1.5109143937258303, 'soft_opc': nan} step=688




2022-04-20 17:37.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173748/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:37.53 [info     ] FQE_20220420173748: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00016751400260038154, 'time_algorithm_update': 0.004817538483198299, 'loss': 0.028402673113051544, 'time_step': 0.0050571470759635745, 'init_value': -2.0930089950561523, 'ave_value': -2.262273765701029, 'soft_opc': nan} step=1032




2022-04-20 17:37.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173748/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:37.55 [info     ] FQE_20220420173748: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.0001692182795945988, 'time_algorithm_update': 0.004921866710795913, 'loss': 0.031280485138860205, 'time_step': 0.005163564238437387, 'init_value': -2.4413342475891113, 'ave_value': -2.742018932097818, 'soft_opc': nan} step=1376




2022-04-20 17:37.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173748/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:37.57 [info     ] FQE_20220420173748: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00016759994418122048, 'time_algorithm_update': 0.005101917094962541, 'loss': 0.03824150988691335, 'time_step': 0.005342930555343628, 'init_value': -2.851834297180176, 'ave_value': -3.3315968167822314, 'soft_opc': nan} step=1720




2022-04-20 17:37.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173748/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:37.59 [info     ] FQE_20220420173748: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00017051433407983116, 'time_algorithm_update': 0.005018103954403899, 'loss': 0.046426933302089225, 'time_step': 0.0052648755007011945, 'init_value': -3.346395492553711, 'ave_value': -3.950871140246808, 'soft_opc': nan} step=2064




2022-04-20 17:37.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173748/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:38.01 [info     ] FQE_20220420173748: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00016937976659730424, 'time_algorithm_update': 0.004980203024176664, 'loss': 0.05793256107934339, 'time_step': 0.005224532166192698, 'init_value': -3.8803300857543945, 'ave_value': -4.571729163236304, 'soft_opc': nan} step=2408




2022-04-20 17:38.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173748/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:38.03 [info     ] FQE_20220420173748: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.0001651637775953426, 'time_algorithm_update': 0.004584774721500485, 'loss': 0.07195413086108517, 'time_step': 0.004824179549549901, 'init_value': -4.277592658996582, 'ave_value': -4.960171872972058, 'soft_opc': nan} step=2752




2022-04-20 17:38.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173748/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:38.05 [info     ] FQE_20220420173748: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00017069245493689248, 'time_algorithm_update': 0.005076761162558267, 'loss': 0.08588966031472177, 'time_step': 0.005321854075720144, 'init_value': -4.670023441314697, 'ave_value': -5.370799712796889, 'soft_opc': nan} step=3096




2022-04-20 17:38.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173748/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:38.07 [info     ] FQE_20220420173748: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00017088998195736906, 'time_algorithm_update': 0.005032389662986578, 'loss': 0.10410910683987272, 'time_step': 0.005278436943542126, 'init_value': -5.1631927490234375, 'ave_value': -5.798696697441398, 'soft_opc': nan} step=3440




2022-04-20 17:38.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173748/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:38.09 [info     ] FQE_20220420173748: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00016920095266297807, 'time_algorithm_update': 0.005034056513808494, 'loss': 0.11987512104408166, 'time_step': 0.005278790412947189, 'init_value': -5.638300895690918, 'ave_value': -6.237363257330133, 'soft_opc': nan} step=3784




2022-04-20 17:38.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173748/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:38.11 [info     ] FQE_20220420173748: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.0001688724340394486, 'time_algorithm_update': 0.004871987326200618, 'loss': 0.13819043749271956, 'time_step': 0.005115921414175699, 'init_value': -6.203858852386475, 'ave_value': -6.7879211994066315, 'soft_opc': nan} step=4128




2022-04-20 17:38.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173748/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:38.13 [info     ] FQE_20220420173748: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00016801579054011854, 'time_algorithm_update': 0.004913775726806286, 'loss': 0.1498711203647301, 'time_step': 0.005154245814611745, 'init_value': -6.649224758148193, 'ave_value': -7.236554613969025, 'soft_opc': nan} step=4472




2022-04-20 17:38.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173748/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:38.15 [info     ] FQE_20220420173748: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00016853351925694667, 'time_algorithm_update': 0.005049890557000804, 'loss': 0.1624676339425753, 'time_step': 0.005294056825859602, 'init_value': -7.008471965789795, 'ave_value': -7.535340752201742, 'soft_opc': nan} step=4816




2022-04-20 17:38.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173748/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:38.17 [info     ] FQE_20220420173748: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00016950452050497366, 'time_algorithm_update': 0.0049590620883675504, 'loss': 0.1707356160961438, 'time_step': 0.005202704390814138, 'init_value': -7.294218063354492, 'ave_value': -7.870433141727017, 'soft_opc': nan} step=5160




2022-04-20 17:38.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173748/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:38.19 [info     ] FQE_20220420173748: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00017136543296104254, 'time_algorithm_update': 0.005020102789235669, 'loss': 0.18080729438360174, 'time_step': 0.005268249400826388, 'init_value': -7.788181781768799, 'ave_value': -8.40328516316273, 'soft_opc': nan} step=5504




2022-04-20 17:38.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173748/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:38.20 [info     ] FQE_20220420173748: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00016909976338231288, 'time_algorithm_update': 0.004627020553100941, 'loss': 0.18926681714067453, 'time_step': 0.004871021176493445, 'init_value': -8.045120239257812, 'ave_value': -8.76965859239535, 'soft_opc': nan} step=5848




2022-04-20 17:38.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173748/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:38.22 [info     ] FQE_20220420173748: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00017158929691758266, 'time_algorithm_update': 0.005065032909082812, 'loss': 0.19208986187072166, 'time_step': 0.005309639282004778, 'init_value': -8.136144638061523, 'ave_value': -8.879429936321317, 'soft_opc': nan} step=6192




2022-04-20 17:38.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173748/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:38.24 [info     ] FQE_20220420173748: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00016604675803073618, 'time_algorithm_update': 0.0050221764764120415, 'loss': 0.19602989577579982, 'time_step': 0.005261694969132889, 'init_value': -8.501091003417969, 'ave_value': -9.293795399697547, 'soft_opc': nan} step=6536




2022-04-20 17:38.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173748/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:38.26 [info     ] FQE_20220420173748: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.0001713072144707968, 'time_algorithm_update': 0.004975997431333675, 'loss': 0.1919577404271898, 'time_step': 0.0052242043406464335, 'init_value': -8.57776927947998, 'ave_value': -9.443916298781504, 'soft_opc': nan} step=6880




2022-04-20 17:38.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173748/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:38.28 [info     ] FQE_20220420173748: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00016797351282696392, 'time_algorithm_update': 0.004893535098364187, 'loss': 0.19290447388342474, 'time_step': 0.005137632752573768, 'init_value': -8.732362747192383, 'ave_value': -9.659415084795254, 'soft_opc': nan} step=7224




2022-04-20 17:38.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173748/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:38.30 [info     ] FQE_20220420173748: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00017137097757916118, 'time_algorithm_update': 0.004818771467652432, 'loss': 0.19063083186940571, 'time_step': 0.0050641617109609205, 'init_value': -8.817413330078125, 'ave_value': -9.819399636792259, 'soft_opc': nan} step=7568




2022-04-20 17:38.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173748/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:38.32 [info     ] FQE_20220420173748: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.0001682285652604214, 'time_algorithm_update': 0.005071550607681274, 'loss': 0.19168100459501147, 'time_step': 0.005314817955327588, 'init_value': -8.978782653808594, 'ave_value': -10.02127428307929, 'soft_opc': nan} step=7912




2022-04-20 17:38.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173748/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:38.34 [info     ] FQE_20220420173748: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00017254990200663722, 'time_algorithm_update': 0.0050672673901846245, 'loss': 0.18892819608756622, 'time_step': 0.00531960781230483, 'init_value': -9.127429962158203, 'ave_value': -10.161066769124721, 'soft_opc': nan} step=8256




2022-04-20 17:38.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173748/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:38.36 [info     ] FQE_20220420173748: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00016839282457218615, 'time_algorithm_update': 0.005073731721833695, 'loss': 0.18788560645026697, 'time_step': 0.005316597777743673, 'init_value': -9.36751937866211, 'ave_value': -10.507635018343592, 'soft_opc': nan} step=8600




2022-04-20 17:38.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173748/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:38.38 [info     ] FQE_20220420173748: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00016949551050053088, 'time_algorithm_update': 0.004550805618596631, 'loss': 0.19140387366547487, 'time_step': 0.004794806241989136, 'init_value': -9.475601196289062, 'ave_value': -10.70524092852075, 'soft_opc': nan} step=8944




2022-04-20 17:38.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173748/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:38.40 [info     ] FQE_20220420173748: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00017210148101629212, 'time_algorithm_update': 0.00511096244634584, 'loss': 0.19299223504657315, 'time_step': 0.005358539348424867, 'init_value': -9.575045585632324, 'ave_value': -10.878004583664477, 'soft_opc': nan} step=9288




2022-04-20 17:38.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173748/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:38.42 [info     ] FQE_20220420173748: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.0001719490040180295, 'time_algorithm_update': 0.005021177058996156, 'loss': 0.19690728714408048, 'time_step': 0.005268766436465951, 'init_value': -9.704574584960938, 'ave_value': -11.10127375727144, 'soft_opc': nan} step=9632




2022-04-20 17:38.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173748/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:38.44 [info     ] FQE_20220420173748: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00016943105431490167, 'time_algorithm_update': 0.005023168269978013, 'loss': 0.20275398446680154, 'time_step': 0.005267514738925668, 'init_value': -9.896857261657715, 'ave_value': -11.337439126474285, 'soft_opc': nan} step=9976




2022-04-20 17:38.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173748/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:38.46 [info     ] FQE_20220420173748: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00017368724179822346, 'time_algorithm_update': 0.0050534883210825365, 'loss': 0.21243121879998333, 'time_step': 0.0053018594897070595, 'init_value': -10.047203063964844, 'ave_value': -11.541165846348678, 'soft_opc': nan} step=10320




2022-04-20 17:38.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173748/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:38.48 [info     ] FQE_20220420173748: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.0001693146173344102, 'time_algorithm_update': 0.004900929539702659, 'loss': 0.2225044939428741, 'time_step': 0.0051418411177258155, 'init_value': -10.072522163391113, 'ave_value': -11.690290847805649, 'soft_opc': nan} step=10664




2022-04-20 17:38.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173748/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:38.50 [info     ] FQE_20220420173748: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00017696757649266444, 'time_algorithm_update': 0.005197120960368667, 'loss': 0.23352818519180252, 'time_step': 0.0054492292016051535, 'init_value': -10.288774490356445, 'ave_value': -11.85451783001725, 'soft_opc': nan} step=11008




2022-04-20 17:38.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173748/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:38.52 [info     ] FQE_20220420173748: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00017035977784977404, 'time_algorithm_update': 0.005077712757642879, 'loss': 0.2500657616941215, 'time_step': 0.005322274773619896, 'init_value': -10.304614067077637, 'ave_value': -11.866273058449476, 'soft_opc': nan} step=11352




2022-04-20 17:38.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173748/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:38.54 [info     ] FQE_20220420173748: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.0001730170360831327, 'time_algorithm_update': 0.005042446907176528, 'loss': 0.26130719812110415, 'time_step': 0.005293319391649823, 'init_value': -10.682323455810547, 'ave_value': -12.138085288836553, 'soft_opc': nan} step=11696




2022-04-20 17:38.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173748/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:38.55 [info     ] FQE_20220420173748: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00016948303510976392, 'time_algorithm_update': 0.004612467316694038, 'loss': 0.2711709602465203, 'time_step': 0.004854416431382645, 'init_value': -11.034570693969727, 'ave_value': -12.410212829583225, 'soft_opc': nan} step=12040




2022-04-20 17:38.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173748/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:38.57 [info     ] FQE_20220420173748: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00017646856086198673, 'time_algorithm_update': 0.005152153414349223, 'loss': 0.2914784750034816, 'time_step': 0.005404987307481988, 'init_value': -11.069914817810059, 'ave_value': -12.413144924029025, 'soft_opc': nan} step=12384




2022-04-20 17:38.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173748/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:38.59 [info     ] FQE_20220420173748: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00017125523367593455, 'time_algorithm_update': 0.005204042723012525, 'loss': 0.3089872866186725, 'time_step': 0.00544785413631173, 'init_value': -11.561081886291504, 'ave_value': -12.741886559196617, 'soft_opc': nan} step=12728




2022-04-20 17:38.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173748/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:39.01 [info     ] FQE_20220420173748: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00017506507940070573, 'time_algorithm_update': 0.00509272342504457, 'loss': 0.3223054727517761, 'time_step': 0.00534504305484683, 'init_value': -11.770174026489258, 'ave_value': -13.012360177258278, 'soft_opc': nan} step=13072




2022-04-20 17:39.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173748/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:39.03 [info     ] FQE_20220420173748: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00017152830611827762, 'time_algorithm_update': 0.00478719902593036, 'loss': 0.33319888317546004, 'time_step': 0.005035105832787447, 'init_value': -12.060409545898438, 'ave_value': -13.187275503465443, 'soft_opc': nan} step=13416




2022-04-20 17:39.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173748/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:39.05 [info     ] FQE_20220420173748: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.0001745882422425026, 'time_algorithm_update': 0.005096531191537547, 'loss': 0.35269931773655117, 'time_step': 0.005348223586415135, 'init_value': -12.35116958618164, 'ave_value': -13.46284888014158, 'soft_opc': nan} step=13760




2022-04-20 17:39.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173748/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:39.07 [info     ] FQE_20220420173748: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.0001683165860730548, 'time_algorithm_update': 0.005057816588601401, 'loss': 0.3647205954969882, 'time_step': 0.005300766506860423, 'init_value': -12.448005676269531, 'ave_value': -13.336897513414586, 'soft_opc': nan} step=14104




2022-04-20 17:39.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173748/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:39.09 [info     ] FQE_20220420173748: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00017209801562996797, 'time_algorithm_update': 0.00503164668415868, 'loss': 0.37917703665727964, 'time_step': 0.005281005487885586, 'init_value': -12.300357818603516, 'ave_value': -13.002569664396693, 'soft_opc': nan} step=14448




2022-04-20 17:39.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173748/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:39.11 [info     ] FQE_20220420173748: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00017277445904044218, 'time_algorithm_update': 0.005033842352933662, 'loss': 0.389516671215249, 'time_step': 0.005282419365505839, 'init_value': -12.388053894042969, 'ave_value': -13.204776215243996, 'soft_opc': nan} step=14792




2022-04-20 17:39.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173748/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:39.13 [info     ] FQE_20220420173748: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.0001700395761534225, 'time_algorithm_update': 0.004518720992775851, 'loss': 0.39140775175981746, 'time_step': 0.004763756380524746, 'init_value': -12.381670951843262, 'ave_value': -13.090381328954345, 'soft_opc': nan} step=15136




2022-04-20 17:39.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173748/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:39.15 [info     ] FQE_20220420173748: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00017264901205550794, 'time_algorithm_update': 0.005000914945158847, 'loss': 0.3989558428861634, 'time_step': 0.005249618097793224, 'init_value': -12.485008239746094, 'ave_value': -13.245161202946585, 'soft_opc': nan} step=15480




2022-04-20 17:39.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173748/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:39.17 [info     ] FQE_20220420173748: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00017336981241093127, 'time_algorithm_update': 0.005115728045618812, 'loss': 0.40890905272952, 'time_step': 0.005364593378333158, 'init_value': -12.973285675048828, 'ave_value': -13.566409470639474, 'soft_opc': nan} step=15824




2022-04-20 17:39.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173748/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:39.19 [info     ] FQE_20220420173748: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00017376486645188441, 'time_algorithm_update': 0.005040984514147737, 'loss': 0.4161104294092416, 'time_step': 0.005294234253639399, 'init_value': -12.980130195617676, 'ave_value': -13.551463278338549, 'soft_opc': nan} step=16168




2022-04-20 17:39.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173748/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:39.21 [info     ] FQE_20220420173748: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00017494864242021427, 'time_algorithm_update': 0.004739581152450207, 'loss': 0.4339338806566111, 'time_step': 0.004991590976715088, 'init_value': -13.532188415527344, 'ave_value': -14.369904076687254, 'soft_opc': nan} step=16512




2022-04-20 17:39.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173748/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:39.23 [info     ] FQE_20220420173748: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.0001700991807981979, 'time_algorithm_update': 0.0050603380036908525, 'loss': 0.43804855880782356, 'time_step': 0.0053051924982736275, 'init_value': -13.168037414550781, 'ave_value': -13.904494815995614, 'soft_opc': nan} step=16856




2022-04-20 17:39.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173748/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:39.25 [info     ] FQE_20220420173748: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00016997927843138229, 'time_algorithm_update': 0.005051647507867148, 'loss': 0.4389607692245654, 'time_step': 0.005298159150190131, 'init_value': -13.430496215820312, 'ave_value': -14.112583491532552, 'soft_opc': nan} step=17200




2022-04-20 17:39.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173748/model_17200.pt
search iteration:  26
using hyper params:  [0.007376723762668144, 0.0035476405697418977, 5.8846479476818484e-05, 5]
2022-04-20 17:39.25 [debug    ] RoundIterator is selected.
2022-04-20 17:39.25 [info     ] Directory is created at d3rlpy_logs/TD3PlusBC_20220420173925
2022-04-20 17:39.25 [debug    ] Fitting scaler...              scaler=standard
2022-04-20 17:39.25 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 17:39.25 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 17:39.25 [info     ] Parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173925/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.007376723762

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:39.29 [info     ] TD3PlusBC_20220420173925: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.0003721811618024146, 'time_algorithm_update': 0.008927031567222193, 'critic_loss': 7.447421031737188, 'actor_loss': 2.6419720105957567, 'time_step': 0.0093789163388704, 'td_error': 0.9158917858556617, 'init_value': -8.010416030883789, 'ave_value': -4.978398785665883} step=342
2022-04-20 17:39.29 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173925/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:39.32 [info     ] TD3PlusBC_20220420173925: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.00036995383033975525, 'time_algorithm_update': 0.008368325512311613, 'critic_loss': 3.061441169147603, 'actor_loss': 2.5423072430125453, 'time_step': 0.00881403370907432, 'td_error': 1.0472112665494882, 'init_value': -11.296579360961914, 'ave_value': -6.980338491240541} step=684
2022-04-20 17:39.32 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173925/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:39.36 [info     ] TD3PlusBC_20220420173925: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.00037802171986005457, 'time_algorithm_update': 0.008943856111046864, 'critic_loss': 4.831136413833551, 'actor_loss': 2.5349509353526156, 'time_step': 0.0093999717667786, 'td_error': 1.2648282781503954, 'init_value': -15.009068489074707, 'ave_value': -9.347056244206573} step=1026
2022-04-20 17:39.36 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173925/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:39.40 [info     ] TD3PlusBC_20220420173925: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.00037525271811680486, 'time_algorithm_update': 0.008385549511825829, 'critic_loss': 6.756526768556115, 'actor_loss': 2.528741416875382, 'time_step': 0.00884096176303618, 'td_error': 1.5436821573408839, 'init_value': -18.747638702392578, 'ave_value': -11.678118434775838} step=1368
2022-04-20 17:39.40 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173925/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:39.43 [info     ] TD3PlusBC_20220420173925: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.0003754311834859569, 'time_algorithm_update': 0.008981033375388697, 'critic_loss': 9.136551451961896, 'actor_loss': 2.5282517148737322, 'time_step': 0.009434852683753298, 'td_error': 1.902872713894145, 'init_value': -22.548446655273438, 'ave_value': -14.030002833960435} step=1710
2022-04-20 17:39.43 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173925/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:39.47 [info     ] TD3PlusBC_20220420173925: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.0003719573829606263, 'time_algorithm_update': 0.008828706908644292, 'critic_loss': 11.881800247214692, 'actor_loss': 2.526078293895164, 'time_step': 0.009269991813347353, 'td_error': 2.2879027203770788, 'init_value': -25.931591033935547, 'ave_value': -16.223825614477533} step=2052
2022-04-20 17:39.47 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173925/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:39.51 [info     ] TD3PlusBC_20220420173925: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.0003702257111755728, 'time_algorithm_update': 0.008520009224874931, 'critic_loss': 14.883659610971373, 'actor_loss': 2.5252937322471576, 'time_step': 0.008963818438569008, 'td_error': 2.702410564169032, 'init_value': -29.72422218322754, 'ave_value': -18.61754653521352} step=2394
2022-04-20 17:39.51 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173925/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:39.54 [info     ] TD3PlusBC_20220420173925: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.00037125885835167957, 'time_algorithm_update': 0.00876645316854555, 'critic_loss': 18.069846864332234, 'actor_loss': 2.5236905834131074, 'time_step': 0.009210474309865494, 'td_error': 3.054677321088864, 'init_value': -32.610652923583984, 'ave_value': -20.662350256544965} step=2736
2022-04-20 17:39.54 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173925/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:39.58 [info     ] TD3PlusBC_20220420173925: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.0003720745008591323, 'time_algorithm_update': 0.008611525708471823, 'critic_loss': 21.79723538850483, 'actor_loss': 2.523480865690443, 'time_step': 0.009056487975761904, 'td_error': 3.407387775062879, 'init_value': -35.48637771606445, 'ave_value': -22.605168116863666} step=3078
2022-04-20 17:39.58 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173925/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:40.01 [info     ] TD3PlusBC_20220420173925: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.0003732916904471771, 'time_algorithm_update': 0.008805164816783883, 'critic_loss': 25.569831915069045, 'actor_loss': 2.5236675753230937, 'time_step': 0.009255775931285836, 'td_error': 3.8094644812981193, 'init_value': -39.39924240112305, 'ave_value': -24.62566022033543} step=3420
2022-04-20 17:40.01 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173925/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:40.05 [info     ] TD3PlusBC_20220420173925: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.00037804751368293983, 'time_algorithm_update': 0.008814499392146952, 'critic_loss': 29.383341566163892, 'actor_loss': 2.5229662956550105, 'time_step': 0.009264699896873787, 'td_error': 4.143550526676687, 'init_value': -41.35595703125, 'ave_value': -26.349622954919532} step=3762
2022-04-20 17:40.05 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173925/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:40.09 [info     ] TD3PlusBC_20220420173925: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.00036971889741239494, 'time_algorithm_update': 0.0082790684281734, 'critic_loss': 33.53734576353553, 'actor_loss': 2.521654966978999, 'time_step': 0.008724308153342086, 'td_error': 4.369914343377792, 'init_value': -44.310028076171875, 'ave_value': -28.16663836394042} step=4104
2022-04-20 17:40.09 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173925/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:40.12 [info     ] TD3PlusBC_20220420173925: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.0003743255347536321, 'time_algorithm_update': 0.008870637207700495, 'critic_loss': 37.84061223303365, 'actor_loss': 2.5206666132162887, 'time_step': 0.009320242363109923, 'td_error': 4.763968185232061, 'init_value': -47.29429244995117, 'ave_value': -29.91422628437085} step=4446
2022-04-20 17:40.12 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173925/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:40.16 [info     ] TD3PlusBC_20220420173925: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.00037538099010088293, 'time_algorithm_update': 0.008871318304050736, 'critic_loss': 42.223519141213934, 'actor_loss': 2.521272479442128, 'time_step': 0.009318665454262182, 'td_error': 5.2943537155169045, 'init_value': -49.73516082763672, 'ave_value': -31.572568364635966} step=4788
2022-04-20 17:40.16 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173925/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:40.20 [info     ] TD3PlusBC_20220420173925: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.00037644829666405393, 'time_algorithm_update': 0.008928459987305758, 'critic_loss': 46.6844529603657, 'actor_loss': 2.5206419334077, 'time_step': 0.009380452814157943, 'td_error': 5.685444186128321, 'init_value': -52.33061599731445, 'ave_value': -33.458590141656465} step=5130
2022-04-20 17:40.20 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173925/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:40.23 [info     ] TD3PlusBC_20220420173925: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.0003767118119356925, 'time_algorithm_update': 0.008846614095899794, 'critic_loss': 51.158279106630914, 'actor_loss': 2.520380202789753, 'time_step': 0.009298479347898249, 'td_error': 6.012964788311922, 'init_value': -54.86260986328125, 'ave_value': -34.872573924033425} step=5472
2022-04-20 17:40.23 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173925/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:40.27 [info     ] TD3PlusBC_20220420173925: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.0003757532577068485, 'time_algorithm_update': 0.008515112581308822, 'critic_loss': 55.629328326175084, 'actor_loss': 2.519788368403563, 'time_step': 0.00896377800500881, 'td_error': 6.384888590658667, 'init_value': -57.15686798095703, 'ave_value': -36.32978736182708} step=5814
2022-04-20 17:40.27 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173925/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:40.31 [info     ] TD3PlusBC_20220420173925: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.00037368417483324195, 'time_algorithm_update': 0.00881454470561959, 'critic_loss': 60.29453868754426, 'actor_loss': 2.520263289847569, 'time_step': 0.009264520734374285, 'td_error': 6.6671262263778965, 'init_value': -58.123138427734375, 'ave_value': -37.55779520474567} step=6156
2022-04-20 17:40.31 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173925/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:40.34 [info     ] TD3PlusBC_20220420173925: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.0003774479815834447, 'time_algorithm_update': 0.008774868926109626, 'critic_loss': 64.53634040118658, 'actor_loss': 2.519915823350873, 'time_step': 0.009229158100328948, 'td_error': 7.081709553377212, 'init_value': -61.000404357910156, 'ave_value': -39.06938133975013} step=6498
2022-04-20 17:40.34 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173925/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:40.38 [info     ] TD3PlusBC_20220420173925: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.00037281136763723274, 'time_algorithm_update': 0.008641161416706285, 'critic_loss': 69.15271618491725, 'actor_loss': 2.522020754061247, 'time_step': 0.00909184363850376, 'td_error': 7.7464958776974555, 'init_value': -64.33240509033203, 'ave_value': -41.000272048250395} step=6840
2022-04-20 17:40.38 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173925/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:40.42 [info     ] TD3PlusBC_20220420173925: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.00037969204417446204, 'time_algorithm_update': 0.008885307618749071, 'critic_loss': 73.849788342303, 'actor_loss': 2.519232341420581, 'time_step': 0.009342686474671838, 'td_error': 7.809361449656473, 'init_value': -64.76053619384766, 'ave_value': -41.71632817610318} step=7182
2022-04-20 17:40.42 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173925/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:40.45 [info     ] TD3PlusBC_20220420173925: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.0003702842701248258, 'time_algorithm_update': 0.008503939673217416, 'critic_loss': 78.17396440561753, 'actor_loss': 2.520014973411783, 'time_step': 0.008948426497609992, 'td_error': 8.259082889581801, 'init_value': -66.46440887451172, 'ave_value': -43.07264756025394} step=7524
2022-04-20 17:40.45 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173925/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:40.49 [info     ] TD3PlusBC_20220420173925: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.00037193786331087523, 'time_algorithm_update': 0.008743861962480155, 'critic_loss': 82.32857011493884, 'actor_loss': 2.520775542621724, 'time_step': 0.009192804844058745, 'td_error': 8.53159448779066, 'init_value': -68.3598861694336, 'ave_value': -44.090946921779114} step=7866
2022-04-20 17:40.49 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173925/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:40.53 [info     ] TD3PlusBC_20220420173925: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.0003739218962819953, 'time_algorithm_update': 0.009022410850078738, 'critic_loss': 86.97830661974456, 'actor_loss': 2.5207397714692945, 'time_step': 0.009471622823971754, 'td_error': 8.840599340308515, 'init_value': -68.43293762207031, 'ave_value': -45.12364251208072} step=8208
2022-04-20 17:40.53 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173925/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:40.56 [info     ] TD3PlusBC_20220420173925: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.0003684222349646496, 'time_algorithm_update': 0.008643865585327148, 'critic_loss': 91.14561215897052, 'actor_loss': 2.5207410221211393, 'time_step': 0.009087304622806304, 'td_error': 9.32087370310156, 'init_value': -72.22957611083984, 'ave_value': -46.51186747963834} step=8550
2022-04-20 17:40.56 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173925/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:41.00 [info     ] TD3PlusBC_20220420173925: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.0003705240829646239, 'time_algorithm_update': 0.008732901679144965, 'critic_loss': 95.29841794465717, 'actor_loss': 2.5230177132009763, 'time_step': 0.009178386097065887, 'td_error': 9.367828646582351, 'init_value': -72.16484069824219, 'ave_value': -47.288976285924115} step=8892
2022-04-20 17:41.00 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173925/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:41.04 [info     ] TD3PlusBC_20220420173925: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.00037118775105615806, 'time_algorithm_update': 0.008477906037492362, 'critic_loss': 99.77048136616311, 'actor_loss': 2.5225400394863553, 'time_step': 0.008916786539624308, 'td_error': 9.671784139490057, 'init_value': -73.43544006347656, 'ave_value': -48.269194702695096} step=9234
2022-04-20 17:41.04 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173925/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:41.07 [info     ] TD3PlusBC_20220420173925: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.00037193995470192, 'time_algorithm_update': 0.008721767113222713, 'critic_loss': 103.1650228333055, 'actor_loss': 2.523521426128365, 'time_step': 0.009158115637929816, 'td_error': 9.233950115331423, 'init_value': -72.50655364990234, 'ave_value': -48.84520934815527} step=9576
2022-04-20 17:41.07 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173925/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:41.11 [info     ] TD3PlusBC_20220420173925: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.00036996080164323775, 'time_algorithm_update': 0.00883389843834771, 'critic_loss': 107.1603002715529, 'actor_loss': 2.523384103998106, 'time_step': 0.00926944386889363, 'td_error': 10.06189543899203, 'init_value': -74.6739273071289, 'ave_value': -49.801690052792196} step=9918
2022-04-20 17:41.11 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173925/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:41.15 [info     ] TD3PlusBC_20220420173925: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.00037321221758747656, 'time_algorithm_update': 0.008510462024755645, 'critic_loss': 110.77141617892082, 'actor_loss': 2.523914576971043, 'time_step': 0.008947532776503534, 'td_error': 10.018503382050946, 'init_value': -75.34122467041016, 'ave_value': -50.70323851415233} step=10260
2022-04-20 17:41.15 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173925/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:41.18 [info     ] TD3PlusBC_20220420173925: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.00037249905324121665, 'time_algorithm_update': 0.00892955517908286, 'critic_loss': 114.28737102753935, 'actor_loss': 2.523922032082987, 'time_step': 0.009370340938456574, 'td_error': 10.131689558770255, 'init_value': -76.46464538574219, 'ave_value': -51.78774631786949} step=10602
2022-04-20 17:41.18 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173925/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:41.22 [info     ] TD3PlusBC_20220420173925: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.00037472499044317946, 'time_algorithm_update': 0.008409624211272301, 'critic_loss': 118.02483947932372, 'actor_loss': 2.5252069701925355, 'time_step': 0.008848245380914698, 'td_error': 10.817879844898675, 'init_value': -77.686279296875, 'ave_value': -52.57766008394024} step=10944
2022-04-20 17:41.22 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173925/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:41.26 [info     ] TD3PlusBC_20220420173925: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.0003786540170859175, 'time_algorithm_update': 0.008918290946915833, 'critic_loss': 120.9508583113464, 'actor_loss': 2.525844456856711, 'time_step': 0.009366612685354133, 'td_error': 10.19408990944463, 'init_value': -76.29859924316406, 'ave_value': -52.71938687147816} step=11286
2022-04-20 17:41.26 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173925/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:41.29 [info     ] TD3PlusBC_20220420173925: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.00037539005279541016, 'time_algorithm_update': 0.008806379914980882, 'critic_loss': 124.25847364726819, 'actor_loss': 2.524383847476446, 'time_step': 0.009247389453196387, 'td_error': 10.802695948725788, 'init_value': -76.71109771728516, 'ave_value': -53.598267432094154} step=11628
2022-04-20 17:41.29 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173925/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:41.33 [info     ] TD3PlusBC_20220420173925: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.0003738912225466723, 'time_algorithm_update': 0.008500249762284128, 'critic_loss': 127.33979534126861, 'actor_loss': 2.526358357647009, 'time_step': 0.008936002937673826, 'td_error': 11.385042085921938, 'init_value': -79.26844787597656, 'ave_value': -54.546218878074235} step=11970
2022-04-20 17:41.33 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173925/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:41.37 [info     ] TD3PlusBC_20220420173925: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.00037083360883924697, 'time_algorithm_update': 0.008822200591104072, 'critic_loss': 130.30108265570033, 'actor_loss': 2.526637403588546, 'time_step': 0.009258173362553468, 'td_error': 11.24719159788303, 'init_value': -78.01732635498047, 'ave_value': -54.8595814986532} step=12312
2022-04-20 17:41.37 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173925/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:41.40 [info     ] TD3PlusBC_20220420173925: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.0003745207312511422, 'time_algorithm_update': 0.00840802359999272, 'critic_loss': 132.96488368162636, 'actor_loss': 2.5250490074269254, 'time_step': 0.008849832746717665, 'td_error': 11.454320404591716, 'init_value': -78.06019592285156, 'ave_value': -55.378920096697136} step=12654
2022-04-20 17:41.40 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173925/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:41.44 [info     ] TD3PlusBC_20220420173925: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.0003735858794541387, 'time_algorithm_update': 0.00885167526222809, 'critic_loss': 135.65096932126764, 'actor_loss': 2.5266620415693137, 'time_step': 0.009288801087273492, 'td_error': 11.892179410468241, 'init_value': -80.3442611694336, 'ave_value': -56.2943486403011} step=12996
2022-04-20 17:41.44 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173925/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:41.47 [info     ] TD3PlusBC_20220420173925: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.0003740264658342328, 'time_algorithm_update': 0.008812477017006678, 'critic_loss': 138.33293919256556, 'actor_loss': 2.5268086829380683, 'time_step': 0.009253303209940592, 'td_error': 11.633204971589128, 'init_value': -80.83937072753906, 'ave_value': -56.73984687465234} step=13338
2022-04-20 17:41.47 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173925/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:41.51 [info     ] TD3PlusBC_20220420173925: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.00037216164215266354, 'time_algorithm_update': 0.008579093810410528, 'critic_loss': 140.77935518855938, 'actor_loss': 2.528134619283397, 'time_step': 0.009016783613907663, 'td_error': 12.218570039336623, 'init_value': -81.28814697265625, 'ave_value': -57.201237217262054} step=13680
2022-04-20 17:41.51 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173925/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:41.55 [info     ] TD3PlusBC_20220420173925: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.0003747835493924325, 'time_algorithm_update': 0.008708052467881587, 'critic_loss': 143.49327366254482, 'actor_loss': 2.5278330211750943, 'time_step': 0.009146043431689168, 'td_error': 12.507207179528976, 'init_value': -81.71647644042969, 'ave_value': -57.95963423092975} step=14022
2022-04-20 17:41.55 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173925/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:41.58 [info     ] TD3PlusBC_20220420173925: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.00037469152818646345, 'time_algorithm_update': 0.008503846954881098, 'critic_loss': 145.88665483708968, 'actor_loss': 2.5273787905598244, 'time_step': 0.008940708567524514, 'td_error': 12.499647851086094, 'init_value': -81.9170913696289, 'ave_value': -58.308399157734705} step=14364
2022-04-20 17:41.58 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173925/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:42.02 [info     ] TD3PlusBC_20220420173925: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.00038158614733065777, 'time_algorithm_update': 0.008920220603719789, 'critic_loss': 147.7450463813648, 'actor_loss': 2.5284977363564116, 'time_step': 0.009364373502675553, 'td_error': 12.276005692809093, 'init_value': -79.33421325683594, 'ave_value': -58.30613146625576} step=14706
2022-04-20 17:42.02 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173925/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:42.06 [info     ] TD3PlusBC_20220420173925: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.0003739023766322443, 'time_algorithm_update': 0.008887705747147052, 'critic_loss': 149.75622641133984, 'actor_loss': 2.5288435227689687, 'time_step': 0.009325445744029263, 'td_error': 12.266038390632602, 'init_value': -79.41200256347656, 'ave_value': -58.70575978423922} step=15048
2022-04-20 17:42.06 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173925/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:42.09 [info     ] TD3PlusBC_20220420173925: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.0003674183672631693, 'time_algorithm_update': 0.00854595014226367, 'critic_loss': 151.6206044648823, 'actor_loss': 2.529007687206157, 'time_step': 0.008976616357502184, 'td_error': 12.601551724563915, 'init_value': -80.87406158447266, 'ave_value': -59.174433391715425} step=15390
2022-04-20 17:42.09 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173925/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:42.13 [info     ] TD3PlusBC_20220420173925: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.0003760795147098296, 'time_algorithm_update': 0.00893971445964791, 'critic_loss': 153.53074297988624, 'actor_loss': 2.5281465179041813, 'time_step': 0.009379592555308203, 'td_error': 13.315801466033694, 'init_value': -82.56128692626953, 'ave_value': -60.231206210326775} step=15732
2022-04-20 17:42.13 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173925/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:42.17 [info     ] TD3PlusBC_20220420173925: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.000377839071708813, 'time_algorithm_update': 0.008639962352507295, 'critic_loss': 155.23549988952993, 'actor_loss': 2.5309824608919915, 'time_step': 0.009081473127443191, 'td_error': 12.812350484634107, 'init_value': -80.67667388916016, 'ave_value': -60.046946195511666} step=16074
2022-04-20 17:42.17 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173925/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:42.20 [info     ] TD3PlusBC_20220420173925: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.00036641449956168907, 'time_algorithm_update': 0.008817811458431489, 'critic_loss': 156.60782576443856, 'actor_loss': 2.5296663694214403, 'time_step': 0.009248038481550607, 'td_error': 13.116132847697298, 'init_value': -80.75396728515625, 'ave_value': -60.33270208980905} step=16416
2022-04-20 17:42.20 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173925/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:42.24 [info     ] TD3PlusBC_20220420173925: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.0003767975589685273, 'time_algorithm_update': 0.00877381137937133, 'critic_loss': 158.07203404387536, 'actor_loss': 2.531375901740894, 'time_step': 0.009219205867477327, 'td_error': 12.536770903001013, 'init_value': -79.89631652832031, 'ave_value': -60.76310488423047} step=16758
2022-04-20 17:42.24 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173925/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:42.28 [info     ] TD3PlusBC_20220420173925: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.00037110130689297505, 'time_algorithm_update': 0.008495697501110054, 'critic_loss': 159.0918928893686, 'actor_loss': 2.5292788137469375, 'time_step': 0.008928812038131624, 'td_error': 13.431338933915464, 'init_value': -81.69210052490234, 'ave_value': -61.22626216762087} step=17100
2022-04-20 17:42.28 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420173925/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 102 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.28189108e-01 

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 17:42.29 [info     ] FQE_20220420174228: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.0001620528209640319, 'time_algorithm_update': 0.005135250378804034, 'loss': 0.008192638454240668, 'time_step': 0.005370645637971809, 'init_value': -0.5351865291595459, 'ave_value': -0.5141844616414191, 'soft_opc': nan} step=166




2022-04-20 17:42.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174228/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:42.30 [info     ] FQE_20220420174228: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00016167077673486917, 'time_algorithm_update': 0.005052272095737687, 'loss': 0.005887585819743753, 'time_step': 0.005284313695976533, 'init_value': -0.7117432355880737, 'ave_value': -0.6361162493022176, 'soft_opc': nan} step=332




2022-04-20 17:42.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174228/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:42.31 [info     ] FQE_20220420174228: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00016385388661579913, 'time_algorithm_update': 0.0050579725977886155, 'loss': 0.005148557128373489, 'time_step': 0.00529355600655797, 'init_value': -0.7717074155807495, 'ave_value': -0.6624871406804871, 'soft_opc': nan} step=498




2022-04-20 17:42.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174228/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:42.32 [info     ] FQE_20220420174228: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00016327363898955197, 'time_algorithm_update': 0.005033705607954278, 'loss': 0.005275248657878354, 'time_step': 0.0052722462688583925, 'init_value': -0.8696492314338684, 'ave_value': -0.7053711991320859, 'soft_opc': nan} step=664




2022-04-20 17:42.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174228/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:42.33 [info     ] FQE_20220420174228: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.0001657454364271049, 'time_algorithm_update': 0.0049931801945330145, 'loss': 0.005094237622793719, 'time_step': 0.005236466246915151, 'init_value': -0.9453028440475464, 'ave_value': -0.7285921821991602, 'soft_opc': nan} step=830




2022-04-20 17:42.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174228/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:42.34 [info     ] FQE_20220420174228: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00016745889043233482, 'time_algorithm_update': 0.0050340086580759075, 'loss': 0.004965372920788106, 'time_step': 0.005276721644114299, 'init_value': -1.047980785369873, 'ave_value': -0.806465507412816, 'soft_opc': nan} step=996




2022-04-20 17:42.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174228/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:42.34 [info     ] FQE_20220420174228: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00015813758574336408, 'time_algorithm_update': 0.004392787634608257, 'loss': 0.004875978668141796, 'time_step': 0.004623357071933976, 'init_value': -1.0827052593231201, 'ave_value': -0.7869646404576194, 'soft_opc': nan} step=1162




2022-04-20 17:42.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174228/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:42.35 [info     ] FQE_20220420174228: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00016098855489707855, 'time_algorithm_update': 0.0049237314477024305, 'loss': 0.004784715223036347, 'time_step': 0.005159278950059271, 'init_value': -1.1615285873413086, 'ave_value': -0.8214927568763226, 'soft_opc': nan} step=1328




2022-04-20 17:42.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174228/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:42.36 [info     ] FQE_20220420174228: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00016309123441397426, 'time_algorithm_update': 0.0050598153148789, 'loss': 0.004720951967693147, 'time_step': 0.005300055067223239, 'init_value': -1.2247501611709595, 'ave_value': -0.8378149983783563, 'soft_opc': nan} step=1494




2022-04-20 17:42.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174228/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:42.37 [info     ] FQE_20220420174228: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.0001619063228009695, 'time_algorithm_update': 0.004966697060918233, 'loss': 0.004904861461668533, 'time_step': 0.0052036406045936675, 'init_value': -1.2970354557037354, 'ave_value': -0.8582478168177176, 'soft_opc': nan} step=1660




2022-04-20 17:42.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174228/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:42.38 [info     ] FQE_20220420174228: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00016577559781361776, 'time_algorithm_update': 0.005100266042962132, 'loss': 0.004952375815637095, 'time_step': 0.005336992711905974, 'init_value': -1.4244332313537598, 'ave_value': -0.93793220830796, 'soft_opc': nan} step=1826




2022-04-20 17:42.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174228/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:42.39 [info     ] FQE_20220420174228: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.0001594172902854092, 'time_algorithm_update': 0.004964393305491252, 'loss': 0.005081125413746494, 'time_step': 0.005200296999460243, 'init_value': -1.4994386434555054, 'ave_value': -0.9693114420852146, 'soft_opc': nan} step=1992




2022-04-20 17:42.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174228/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:42.40 [info     ] FQE_20220420174228: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00016647074595991387, 'time_algorithm_update': 0.005044223314308259, 'loss': 0.005523968290212194, 'time_step': 0.005287862685789545, 'init_value': -1.607546091079712, 'ave_value': -1.0236002135048579, 'soft_opc': nan} step=2158




2022-04-20 17:42.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174228/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:42.41 [info     ] FQE_20220420174228: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00016595656613269485, 'time_algorithm_update': 0.005069443978459002, 'loss': 0.005773591706585364, 'time_step': 0.005307197570800781, 'init_value': -1.6700103282928467, 'ave_value': -1.0303567921759578, 'soft_opc': nan} step=2324




2022-04-20 17:42.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174228/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:42.42 [info     ] FQE_20220420174228: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00016417417181543558, 'time_algorithm_update': 0.004959122244134007, 'loss': 0.006094435561611037, 'time_step': 0.005198780312595597, 'init_value': -1.7605488300323486, 'ave_value': -1.0798553332183007, 'soft_opc': nan} step=2490




2022-04-20 17:42.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174228/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:42.43 [info     ] FQE_20220420174228: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00016616338706878294, 'time_algorithm_update': 0.0048188717968492625, 'loss': 0.006964931200000356, 'time_step': 0.005060757499143302, 'init_value': -1.8617199659347534, 'ave_value': -1.1479163561419055, 'soft_opc': nan} step=2656




2022-04-20 17:42.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174228/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:42.44 [info     ] FQE_20220420174228: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00015815482082137143, 'time_algorithm_update': 0.0042533989412238795, 'loss': 0.006936015153769404, 'time_step': 0.004486135689609022, 'init_value': -1.9895238876342773, 'ave_value': -1.2092749551005728, 'soft_opc': nan} step=2822




2022-04-20 17:42.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174228/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:42.45 [info     ] FQE_20220420174228: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00016926426485360386, 'time_algorithm_update': 0.005120505769568753, 'loss': 0.00771383229918015, 'time_step': 0.005363984280321972, 'init_value': -2.02447509765625, 'ave_value': -1.2086127558270017, 'soft_opc': nan} step=2988




2022-04-20 17:42.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174228/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:42.46 [info     ] FQE_20220420174228: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00016775475927146086, 'time_algorithm_update': 0.005037645259535456, 'loss': 0.008371737780027002, 'time_step': 0.00527878985347518, 'init_value': -2.1639556884765625, 'ave_value': -1.3051201085652318, 'soft_opc': nan} step=3154




2022-04-20 17:42.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174228/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:42.47 [info     ] FQE_20220420174228: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.0001648219234972115, 'time_algorithm_update': 0.0050243311617747845, 'loss': 0.009058974411882774, 'time_step': 0.005266071802162263, 'init_value': -2.2445056438446045, 'ave_value': -1.3722156309732445, 'soft_opc': nan} step=3320




2022-04-20 17:42.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174228/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:42.48 [info     ] FQE_20220420174228: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00016417991684143803, 'time_algorithm_update': 0.005064735929649997, 'loss': 0.009895016441362944, 'time_step': 0.0053032449929110975, 'init_value': -2.280616521835327, 'ave_value': -1.3899825150096738, 'soft_opc': nan} step=3486




2022-04-20 17:42.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174228/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:42.49 [info     ] FQE_20220420174228: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00017127789646746163, 'time_algorithm_update': 0.004943755735833961, 'loss': 0.010334989421366972, 'time_step': 0.005189318254769567, 'init_value': -2.4420254230499268, 'ave_value': -1.4916704146376065, 'soft_opc': nan} step=3652




2022-04-20 17:42.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174228/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:42.50 [info     ] FQE_20220420174228: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00016322624252503178, 'time_algorithm_update': 0.005030525736061923, 'loss': 0.010841054235254022, 'time_step': 0.00526911666594356, 'init_value': -2.537977457046509, 'ave_value': -1.5666871326228788, 'soft_opc': nan} step=3818




2022-04-20 17:42.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174228/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:42.50 [info     ] FQE_20220420174228: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00017000393695141896, 'time_algorithm_update': 0.004963611981954919, 'loss': 0.01149219379224534, 'time_step': 0.005211318831845939, 'init_value': -2.5208046436309814, 'ave_value': -1.5311643412034657, 'soft_opc': nan} step=3984




2022-04-20 17:42.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174228/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:42.51 [info     ] FQE_20220420174228: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00016605854034423828, 'time_algorithm_update': 0.005042086164635348, 'loss': 0.011856462049062353, 'time_step': 0.00528280419039439, 'init_value': -2.6202433109283447, 'ave_value': -1.5849957953870029, 'soft_opc': nan} step=4150




2022-04-20 17:42.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174228/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:42.52 [info     ] FQE_20220420174228: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00016015839863972492, 'time_algorithm_update': 0.004072203693619694, 'loss': 0.012225908535134316, 'time_step': 0.004305194659405444, 'init_value': -2.7185654640197754, 'ave_value': -1.6869668856464528, 'soft_opc': nan} step=4316




2022-04-20 17:42.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174228/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:42.53 [info     ] FQE_20220420174228: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00016733824488628343, 'time_algorithm_update': 0.004968443548822978, 'loss': 0.013357140257791897, 'time_step': 0.005211078977010336, 'init_value': -2.7518513202667236, 'ave_value': -1.7057075504079326, 'soft_opc': nan} step=4482




2022-04-20 17:42.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174228/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:42.54 [info     ] FQE_20220420174228: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.0001699364328958902, 'time_algorithm_update': 0.005103424371006977, 'loss': 0.01377896644478013, 'time_step': 0.005346391574445978, 'init_value': -2.847672462463379, 'ave_value': -1.7818947278130968, 'soft_opc': nan} step=4648




2022-04-20 17:42.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174228/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:42.55 [info     ] FQE_20220420174228: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.0001623903412416757, 'time_algorithm_update': 0.005028611206146608, 'loss': 0.014175527494159493, 'time_step': 0.005266091909753271, 'init_value': -2.8805856704711914, 'ave_value': -1.7910267183070516, 'soft_opc': nan} step=4814




2022-04-20 17:42.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174228/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:42.56 [info     ] FQE_20220420174228: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.0001639213906713279, 'time_algorithm_update': 0.004991155072867152, 'loss': 0.015299681723482502, 'time_step': 0.0052286745553993316, 'init_value': -2.915724992752075, 'ave_value': -1.8017270765930147, 'soft_opc': nan} step=4980




2022-04-20 17:42.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174228/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:42.57 [info     ] FQE_20220420174228: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00016703375850815372, 'time_algorithm_update': 0.00513519292854401, 'loss': 0.015547313551412872, 'time_step': 0.005377791014062353, 'init_value': -3.0485522747039795, 'ave_value': -1.9059611888887646, 'soft_opc': nan} step=5146




2022-04-20 17:42.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174228/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:42.58 [info     ] FQE_20220420174228: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00016355370900717126, 'time_algorithm_update': 0.004944919103599456, 'loss': 0.01648538607784199, 'time_step': 0.005184476634106004, 'init_value': -3.1039419174194336, 'ave_value': -1.9416382948020556, 'soft_opc': nan} step=5312




2022-04-20 17:42.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174228/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:42.59 [info     ] FQE_20220420174228: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00016574112765760306, 'time_algorithm_update': 0.004954702882881624, 'loss': 0.01709274169974904, 'time_step': 0.005194550537201296, 'init_value': -3.146760940551758, 'ave_value': -1.971555844432599, 'soft_opc': nan} step=5478




2022-04-20 17:42.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174228/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:43.00 [info     ] FQE_20220420174228: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00016875439379588668, 'time_algorithm_update': 0.00512582566364702, 'loss': 0.017884747398714256, 'time_step': 0.005367474383618458, 'init_value': -3.189469814300537, 'ave_value': -2.0018859470884007, 'soft_opc': nan} step=5644




2022-04-20 17:43.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174228/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:43.01 [info     ] FQE_20220420174228: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00016126144363219478, 'time_algorithm_update': 0.004221241158175181, 'loss': 0.01784147067536903, 'time_step': 0.004456504281744899, 'init_value': -3.21193790435791, 'ave_value': -2.028746745134662, 'soft_opc': nan} step=5810




2022-04-20 17:43.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174228/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:43.02 [info     ] FQE_20220420174228: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.0001611666507031544, 'time_algorithm_update': 0.005192996507667634, 'loss': 0.018970942755190498, 'time_step': 0.005430424069783774, 'init_value': -3.2181811332702637, 'ave_value': -2.0160237062856687, 'soft_opc': nan} step=5976




2022-04-20 17:43.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174228/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:43.03 [info     ] FQE_20220420174228: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.0001689870673489858, 'time_algorithm_update': 0.00509746821529894, 'loss': 0.018908212030817002, 'time_step': 0.005341055881546204, 'init_value': -3.218822956085205, 'ave_value': -2.030092944201451, 'soft_opc': nan} step=6142




2022-04-20 17:43.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174228/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:43.04 [info     ] FQE_20220420174228: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.0001662553074848221, 'time_algorithm_update': 0.005094024072210473, 'loss': 0.018639187529088413, 'time_step': 0.00533315790704934, 'init_value': -3.3081610202789307, 'ave_value': -2.0901592393380564, 'soft_opc': nan} step=6308




2022-04-20 17:43.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174228/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:43.05 [info     ] FQE_20220420174228: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.0001671084438461855, 'time_algorithm_update': 0.004943630781518407, 'loss': 0.019563849164728433, 'time_step': 0.005186326532478792, 'init_value': -3.395341396331787, 'ave_value': -2.1569304762257113, 'soft_opc': nan} step=6474




2022-04-20 17:43.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174228/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:43.06 [info     ] FQE_20220420174228: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.0001654036073799593, 'time_algorithm_update': 0.0051112261163183004, 'loss': 0.020209348482831596, 'time_step': 0.005348551704222898, 'init_value': -3.422499179840088, 'ave_value': -2.169281466734839, 'soft_opc': nan} step=6640




2022-04-20 17:43.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174228/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:43.07 [info     ] FQE_20220420174228: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00016873715871787933, 'time_algorithm_update': 0.005087323935635118, 'loss': 0.021213349728446722, 'time_step': 0.005328599228916398, 'init_value': -3.4789490699768066, 'ave_value': -2.191231329547795, 'soft_opc': nan} step=6806




2022-04-20 17:43.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174228/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:43.08 [info     ] FQE_20220420174228: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.0001667177820780191, 'time_algorithm_update': 0.004996818232249065, 'loss': 0.021704863703596484, 'time_step': 0.005236798022166792, 'init_value': -3.592639446258545, 'ave_value': -2.2918630125286343, 'soft_opc': nan} step=6972




2022-04-20 17:43.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174228/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:43.08 [info     ] FQE_20220420174228: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.0001627695129578372, 'time_algorithm_update': 0.005001366856586502, 'loss': 0.022670803228337092, 'time_step': 0.00523888203034918, 'init_value': -3.589846134185791, 'ave_value': -2.262530928601821, 'soft_opc': nan} step=7138




2022-04-20 17:43.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174228/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:43.09 [info     ] FQE_20220420174228: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00016340146581810642, 'time_algorithm_update': 0.004680581839687853, 'loss': 0.023038050314798636, 'time_step': 0.004915572074522455, 'init_value': -3.66538405418396, 'ave_value': -2.3554981618582667, 'soft_opc': nan} step=7304




2022-04-20 17:43.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174228/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:43.10 [info     ] FQE_20220420174228: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.0001632765115025532, 'time_algorithm_update': 0.004670404526124518, 'loss': 0.023372918617748935, 'time_step': 0.004904341984944171, 'init_value': -3.671715259552002, 'ave_value': -2.3652675913059498, 'soft_opc': nan} step=7470




2022-04-20 17:43.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174228/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:43.11 [info     ] FQE_20220420174228: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00016873859497437994, 'time_algorithm_update': 0.005137782499014613, 'loss': 0.024196392775889128, 'time_step': 0.0053804768137184975, 'init_value': -3.6984059810638428, 'ave_value': -2.3650167902847667, 'soft_opc': nan} step=7636




2022-04-20 17:43.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174228/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:43.12 [info     ] FQE_20220420174228: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00016445855060255672, 'time_algorithm_update': 0.0051326564995639295, 'loss': 0.023758942708064498, 'time_step': 0.005372453884906079, 'init_value': -3.693702459335327, 'ave_value': -2.3894952711221333, 'soft_opc': nan} step=7802




2022-04-20 17:43.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174228/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:43.13 [info     ] FQE_20220420174228: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.0001645160008625812, 'time_algorithm_update': 0.0050686870712831796, 'loss': 0.024221084157779186, 'time_step': 0.005306377468338932, 'init_value': -3.7148284912109375, 'ave_value': -2.410066549526947, 'soft_opc': nan} step=7968




2022-04-20 17:43.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174228/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:43.14 [info     ] FQE_20220420174228: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00016987898263586573, 'time_algorithm_update': 0.00502727836011404, 'loss': 0.025308900221015316, 'time_step': 0.00527252203010651, 'init_value': -3.8515264987945557, 'ave_value': -2.517879489211886, 'soft_opc': nan} step=8134




2022-04-20 17:43.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174228/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:43.15 [info     ] FQE_20220420174228: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00016844559864825513, 'time_algorithm_update': 0.005092100924756153, 'loss': 0.025855259962116246, 'time_step': 0.005335177283689201, 'init_value': -3.9567017555236816, 'ave_value': -2.6650079334238628, 'soft_opc': nan} step=8300




2022-04-20 17:43.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174228/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.43210892e-01 -3.25999953e-02
 -1.38000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.28049971e-01  6.00000000e-01  4.67532035e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.87189108e-01  2.46000047e-02
 -8.00013420e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.61927906e-02  2.08952959e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.53789108e-01  1.32000047e-02
  8.79998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -9.71586383e-02 -6.00000000e-01 -5.33093061e-02]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.39489108e-01 -4.75999953e-02
 -9.30001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  1.41298638e-01  5.43892365e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.9573146

2022-04-20 17:43.15 [info     ] Directory is created at d3rlpy_logs/FQE_20220420174315
2022-04-20 17:43.15 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 17:43.15 [debug    ] Building models...
2022-04-20 17:43.15 [debug    ] Models have been built.
2022-04-20 17:43.15 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420174315/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 17:43.17 [info     ] FQE_20220420174315: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.0001678397489148517, 'time_algorithm_update': 0.0049761873345042385, 'loss': 0.030853531001200682, 'time_step': 0.005217254854911982, 'init_value': -1.2035130262374878, 'ave_value': -1.1426788855243373, 'soft_opc': nan} step=344




2022-04-20 17:43.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174315/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:43.19 [info     ] FQE_20220420174315: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00016254533168881438, 'time_algorithm_update': 0.0045963338641233226, 'loss': 0.026694113358876906, 'time_step': 0.004831725774809371, 'init_value': -2.0318071842193604, 'ave_value': -1.9304611298817773, 'soft_opc': nan} step=688




2022-04-20 17:43.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174315/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:43.21 [info     ] FQE_20220420174315: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.000171053548191869, 'time_algorithm_update': 0.00503854280294374, 'loss': 0.03186123083930376, 'time_step': 0.005286744860715644, 'init_value': -3.1260576248168945, 'ave_value': -2.984345604989443, 'soft_opc': nan} step=1032




2022-04-20 17:43.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174315/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:43.23 [info     ] FQE_20220420174315: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00017434705135434172, 'time_algorithm_update': 0.0050432966199032096, 'loss': 0.03669587782378374, 'time_step': 0.005294037419696187, 'init_value': -3.8139610290527344, 'ave_value': -3.661417096927091, 'soft_opc': nan} step=1376




2022-04-20 17:43.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174315/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:43.25 [info     ] FQE_20220420174315: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00017020314238792243, 'time_algorithm_update': 0.005110542441523353, 'loss': 0.04553339694799899, 'time_step': 0.0053553692130155345, 'init_value': -4.717260360717773, 'ave_value': -4.565608887313991, 'soft_opc': nan} step=1720




2022-04-20 17:43.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174315/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:43.27 [info     ] FQE_20220420174315: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00017152276150015898, 'time_algorithm_update': 0.004789265089256819, 'loss': 0.056975263669047246, 'time_step': 0.005037225263063298, 'init_value': -5.383467674255371, 'ave_value': -5.290687209945005, 'soft_opc': nan} step=2064




2022-04-20 17:43.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174315/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:43.29 [info     ] FQE_20220420174315: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00017351258632748625, 'time_algorithm_update': 0.005050579475802045, 'loss': 0.07093531042770591, 'time_step': 0.005300270263538804, 'init_value': -6.061912536621094, 'ave_value': -6.109686155442719, 'soft_opc': nan} step=2408




2022-04-20 17:43.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174315/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:43.31 [info     ] FQE_20220420174315: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.0001704561155895854, 'time_algorithm_update': 0.0050430180028427475, 'loss': 0.0908067810517021, 'time_step': 0.0052901222262271615, 'init_value': -6.464561462402344, 'ave_value': -6.722720477005114, 'soft_opc': nan} step=2752




2022-04-20 17:43.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174315/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:43.33 [info     ] FQE_20220420174315: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00017271138900934265, 'time_algorithm_update': 0.005047894494478093, 'loss': 0.11120339370939102, 'time_step': 0.005297312902849774, 'init_value': -6.880209922790527, 'ave_value': -7.3292624396112585, 'soft_opc': nan} step=3096




2022-04-20 17:43.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174315/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:43.35 [info     ] FQE_20220420174315: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00017287772755290187, 'time_algorithm_update': 0.0050270072249479075, 'loss': 0.13427429187631365, 'time_step': 0.0052739464959432915, 'init_value': -7.075469970703125, 'ave_value': -7.846981155066877, 'soft_opc': nan} step=3440




2022-04-20 17:43.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174315/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:43.37 [info     ] FQE_20220420174315: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00016686528228050055, 'time_algorithm_update': 0.004608069048371426, 'loss': 0.1524247262616057, 'time_step': 0.004848127448281577, 'init_value': -7.493717193603516, 'ave_value': -8.590332263271819, 'soft_opc': nan} step=3784




2022-04-20 17:43.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174315/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:43.39 [info     ] FQE_20220420174315: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00017421328744222952, 'time_algorithm_update': 0.005013106867324474, 'loss': 0.17449158804794385, 'time_step': 0.005261909130007722, 'init_value': -7.969658851623535, 'ave_value': -9.287640847332844, 'soft_opc': nan} step=4128




2022-04-20 17:43.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174315/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:43.41 [info     ] FQE_20220420174315: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.0001703590847725092, 'time_algorithm_update': 0.005016108584958453, 'loss': 0.19900748027531906, 'time_step': 0.005263869845589926, 'init_value': -8.433703422546387, 'ave_value': -9.842668615026517, 'soft_opc': nan} step=4472




2022-04-20 17:43.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174315/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:43.43 [info     ] FQE_20220420174315: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.0001712586990622587, 'time_algorithm_update': 0.005094262749649758, 'loss': 0.22721845967435214, 'time_step': 0.005342406588931417, 'init_value': -8.96817398071289, 'ave_value': -10.458465730580123, 'soft_opc': nan} step=4816




2022-04-20 17:43.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174315/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:43.45 [info     ] FQE_20220420174315: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00016886758249859478, 'time_algorithm_update': 0.005025063836297324, 'loss': 0.2508794683529887, 'time_step': 0.005272193010463271, 'init_value': -9.517866134643555, 'ave_value': -10.965093595504358, 'soft_opc': nan} step=5160




2022-04-20 17:43.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174315/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:43.46 [info     ] FQE_20220420174315: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.0001657369524933571, 'time_algorithm_update': 0.003275772166806598, 'loss': 0.27720610990167355, 'time_step': 0.0035178391046302264, 'init_value': -10.227587699890137, 'ave_value': -11.57657934395546, 'soft_opc': nan} step=5504




2022-04-20 17:43.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174315/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:43.47 [info     ] FQE_20220420174315: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.0001666691414145536, 'time_algorithm_update': 0.003506835809973783, 'loss': 0.30299439348367063, 'time_step': 0.0037479726381080096, 'init_value': -11.05693244934082, 'ave_value': -12.14928468529739, 'soft_opc': nan} step=5848




2022-04-20 17:43.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174315/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:43.49 [info     ] FQE_20220420174315: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.000165508930073228, 'time_algorithm_update': 0.0034803616446118023, 'loss': 0.33381445541284804, 'time_step': 0.0037196417187535485, 'init_value': -11.697183609008789, 'ave_value': -12.724124080452833, 'soft_opc': nan} step=6192




2022-04-20 17:43.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174315/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:43.50 [info     ] FQE_20220420174315: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00016348375830539438, 'time_algorithm_update': 0.003520156755003818, 'loss': 0.3582227155349629, 'time_step': 0.0037568066009255343, 'init_value': -11.9966402053833, 'ave_value': -12.85614903690713, 'soft_opc': nan} step=6536




2022-04-20 17:43.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174315/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:43.51 [info     ] FQE_20220420174315: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00016376791998397474, 'time_algorithm_update': 0.0034261581509612326, 'loss': 0.37206038502369854, 'time_step': 0.0036641276159951855, 'init_value': -12.42080307006836, 'ave_value': -13.325792535868482, 'soft_opc': nan} step=6880




2022-04-20 17:43.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174315/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:43.53 [info     ] FQE_20220420174315: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00016290503878926122, 'time_algorithm_update': 0.003444262715273125, 'loss': 0.39228631139104797, 'time_step': 0.0036821607933488, 'init_value': -12.79815673828125, 'ave_value': -13.640542637639015, 'soft_opc': nan} step=7224




2022-04-20 17:43.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174315/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:43.54 [info     ] FQE_20220420174315: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00016570645709370458, 'time_algorithm_update': 0.003486932017082392, 'loss': 0.4003489573139611, 'time_step': 0.003726099812707236, 'init_value': -12.789691925048828, 'ave_value': -13.704560088215246, 'soft_opc': nan} step=7568




2022-04-20 17:43.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174315/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:43.56 [info     ] FQE_20220420174315: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00016633022663205168, 'time_algorithm_update': 0.00347049776897874, 'loss': 0.411875830768326, 'time_step': 0.0037131503570911497, 'init_value': -13.064311981201172, 'ave_value': -14.015426516814811, 'soft_opc': nan} step=7912




2022-04-20 17:43.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174315/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:43.57 [info     ] FQE_20220420174315: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00016833183377288108, 'time_algorithm_update': 0.0034576585126477617, 'loss': 0.41927780247791563, 'time_step': 0.003700024859849797, 'init_value': -13.10092544555664, 'ave_value': -13.991018807121158, 'soft_opc': nan} step=8256




2022-04-20 17:43.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174315/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:43.58 [info     ] FQE_20220420174315: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00016550269237784453, 'time_algorithm_update': 0.0034728999747786413, 'loss': 0.4299565066994969, 'time_step': 0.003713982049808946, 'init_value': -13.253007888793945, 'ave_value': -14.13381114113606, 'soft_opc': nan} step=8600




2022-04-20 17:43.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174315/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:44.00 [info     ] FQE_20220420174315: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.000164750010468239, 'time_algorithm_update': 0.0034523232038630998, 'loss': 0.4441689323027467, 'time_step': 0.0036908776261085686, 'init_value': -13.514533996582031, 'ave_value': -14.37604086629392, 'soft_opc': nan} step=8944




2022-04-20 17:44.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174315/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:44.01 [info     ] FQE_20220420174315: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00016390445620514626, 'time_algorithm_update': 0.0035116776477458864, 'loss': 0.45325338906543544, 'time_step': 0.0037501308807106905, 'init_value': -13.741523742675781, 'ave_value': -14.680422961846128, 'soft_opc': nan} step=9288




2022-04-20 17:44.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174315/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:44.03 [info     ] FQE_20220420174315: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00016571546709814736, 'time_algorithm_update': 0.0035340924595677575, 'loss': 0.4650999537430877, 'time_step': 0.0037752888923467593, 'init_value': -13.738130569458008, 'ave_value': -15.014276433924868, 'soft_opc': nan} step=9632




2022-04-20 17:44.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174315/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:44.04 [info     ] FQE_20220420174315: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.0001668070637902548, 'time_algorithm_update': 0.0035120865633321363, 'loss': 0.4655650898402687, 'time_step': 0.0037535796331804854, 'init_value': -13.297518730163574, 'ave_value': -14.737677600977287, 'soft_opc': nan} step=9976




2022-04-20 17:44.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174315/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:44.05 [info     ] FQE_20220420174315: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.0001687206501184508, 'time_algorithm_update': 0.003484308026557745, 'loss': 0.4718205578906765, 'time_step': 0.003729418959728507, 'init_value': -13.008415222167969, 'ave_value': -14.519183295339278, 'soft_opc': nan} step=10320




2022-04-20 17:44.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174315/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:44.07 [info     ] FQE_20220420174315: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00016311365504597509, 'time_algorithm_update': 0.003421311461648276, 'loss': 0.4720686470310009, 'time_step': 0.003657677838968676, 'init_value': -13.161275863647461, 'ave_value': -14.796506081977943, 'soft_opc': nan} step=10664




2022-04-20 17:44.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174315/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:44.08 [info     ] FQE_20220420174315: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00016684726227161496, 'time_algorithm_update': 0.003495503303616546, 'loss': 0.47193838673275573, 'time_step': 0.0037369118180385855, 'init_value': -13.051090240478516, 'ave_value': -14.99274098486346, 'soft_opc': nan} step=11008




2022-04-20 17:44.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174315/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:44.10 [info     ] FQE_20220420174315: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.000165578237799711, 'time_algorithm_update': 0.0035233393658039183, 'loss': 0.4789448039238016, 'time_step': 0.003761703191801559, 'init_value': -12.703960418701172, 'ave_value': -14.969118561980792, 'soft_opc': nan} step=11352




2022-04-20 17:44.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174315/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:44.11 [info     ] FQE_20220420174315: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00016865550085555677, 'time_algorithm_update': 0.003457564254139745, 'loss': 0.4688976095058024, 'time_step': 0.0037009923957114998, 'init_value': -12.76151180267334, 'ave_value': -15.32383777907051, 'soft_opc': nan} step=11696




2022-04-20 17:44.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174315/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:44.12 [info     ] FQE_20220420174315: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00016454139421152514, 'time_algorithm_update': 0.003511439229166785, 'loss': 0.47369359254966986, 'time_step': 0.0037506250448005145, 'init_value': -12.388090133666992, 'ave_value': -15.054739523783896, 'soft_opc': nan} step=12040




2022-04-20 17:44.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174315/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:44.14 [info     ] FQE_20220420174315: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00016749113105064215, 'time_algorithm_update': 0.0034949945849041607, 'loss': 0.479801184378651, 'time_step': 0.0037398248217826668, 'init_value': -12.7120361328125, 'ave_value': -15.586103982082358, 'soft_opc': nan} step=12384




2022-04-20 17:44.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174315/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:44.15 [info     ] FQE_20220420174315: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00016897362332011378, 'time_algorithm_update': 0.003510385751724243, 'loss': 0.48219348492945524, 'time_step': 0.0037558432235274206, 'init_value': -12.691169738769531, 'ave_value': -15.832847334399215, 'soft_opc': nan} step=12728




2022-04-20 17:44.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174315/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:44.17 [info     ] FQE_20220420174315: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00016773370809333268, 'time_algorithm_update': 0.003507660571918931, 'loss': 0.48448449134458466, 'time_step': 0.003751335448996965, 'init_value': -12.73764419555664, 'ave_value': -16.004121147665018, 'soft_opc': nan} step=13072




2022-04-20 17:44.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174315/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:44.18 [info     ] FQE_20220420174315: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00016611537267995435, 'time_algorithm_update': 0.0034966517326443696, 'loss': 0.49560724038002624, 'time_step': 0.003737505785254545, 'init_value': -12.66206169128418, 'ave_value': -16.242422789382235, 'soft_opc': nan} step=13416




2022-04-20 17:44.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174315/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:44.19 [info     ] FQE_20220420174315: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00016536962154299715, 'time_algorithm_update': 0.003164762674376022, 'loss': 0.5029479603683793, 'time_step': 0.0034035901690638343, 'init_value': -12.65225601196289, 'ave_value': -16.444984683702355, 'soft_opc': nan} step=13760




2022-04-20 17:44.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174315/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:44.21 [info     ] FQE_20220420174315: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00016590051872785702, 'time_algorithm_update': 0.0035127297390338988, 'loss': 0.5046272948979899, 'time_step': 0.0037531922029894454, 'init_value': -12.770530700683594, 'ave_value': -16.59135393524828, 'soft_opc': nan} step=14104




2022-04-20 17:44.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174315/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:44.22 [info     ] FQE_20220420174315: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.0001662754735281301, 'time_algorithm_update': 0.0035196327885916065, 'loss': 0.5052794762832914, 'time_step': 0.003759342570637548, 'init_value': -12.725801467895508, 'ave_value': -16.721982491521484, 'soft_opc': nan} step=14448




2022-04-20 17:44.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174315/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:44.24 [info     ] FQE_20220420174315: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00016476872355438942, 'time_algorithm_update': 0.0035715983357540396, 'loss': 0.506267435166465, 'time_step': 0.0038099510725154435, 'init_value': -12.532273292541504, 'ave_value': -16.84652928803955, 'soft_opc': nan} step=14792




2022-04-20 17:44.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174315/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:44.25 [info     ] FQE_20220420174315: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.0001649475374887156, 'time_algorithm_update': 0.003557088763214821, 'loss': 0.5027056154849138, 'time_step': 0.0037996852120687793, 'init_value': -12.912793159484863, 'ave_value': -17.45692959213042, 'soft_opc': nan} step=15136




2022-04-20 17:44.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174315/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:44.26 [info     ] FQE_20220420174315: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00016560665396756904, 'time_algorithm_update': 0.00353912212127863, 'loss': 0.5110744786351312, 'time_step': 0.003781087176744328, 'init_value': -12.734254837036133, 'ave_value': -17.469947998633458, 'soft_opc': nan} step=15480




2022-04-20 17:44.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174315/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:44.28 [info     ] FQE_20220420174315: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00016681953918102176, 'time_algorithm_update': 0.003583747980206512, 'loss': 0.5180881440444568, 'time_step': 0.003828021676041359, 'init_value': -13.018668174743652, 'ave_value': -17.92316748144877, 'soft_opc': nan} step=15824




2022-04-20 17:44.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174315/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:44.29 [info     ] FQE_20220420174315: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00016631359277769576, 'time_algorithm_update': 0.0035766986913459246, 'loss': 0.5198474680344293, 'time_step': 0.0038166829319887384, 'init_value': -12.888724327087402, 'ave_value': -17.857509826217687, 'soft_opc': nan} step=16168




2022-04-20 17:44.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174315/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:44.31 [info     ] FQE_20220420174315: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.0001668250837991404, 'time_algorithm_update': 0.003527391095494115, 'loss': 0.5164928634618517, 'time_step': 0.0037704408168792725, 'init_value': -12.400124549865723, 'ave_value': -17.58945849764723, 'soft_opc': nan} step=16512




2022-04-20 17:44.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174315/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:44.32 [info     ] FQE_20220420174315: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00016308731810991153, 'time_algorithm_update': 0.003494499727737072, 'loss': 0.5274322720212039, 'time_step': 0.0037320679010346878, 'init_value': -12.527628898620605, 'ave_value': -17.93740184090461, 'soft_opc': nan} step=16856




2022-04-20 17:44.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174315/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:44.33 [info     ] FQE_20220420174315: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00016721528629923976, 'time_algorithm_update': 0.003537320813467336, 'loss': 0.5373550073546899, 'time_step': 0.003779414781304293, 'init_value': -12.81635856628418, 'ave_value': -18.338907758257275, 'soft_opc': nan} step=17200




2022-04-20 17:44.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174315/model_17200.pt
search iteration:  27
using hyper params:  [0.008322175613908852, 0.008613955831356386, 7.458013473622993e-05, 7]
2022-04-20 17:44.33 [debug    ] RoundIterator is selected.
2022-04-20 17:44.33 [info     ] Directory is created at d3rlpy_logs/TD3PlusBC_20220420174433
2022-04-20 17:44.33 [debug    ] Fitting scaler...              scaler=standard
2022-04-20 17:44.34 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 17:44.34 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 17:44.34 [info     ] Parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174433/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.00832217561390

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:44.37 [info     ] TD3PlusBC_20220420174433: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.00039875855919910454, 'time_algorithm_update': 0.0067698056237739425, 'critic_loss': 9.27687189209531, 'actor_loss': 2.659858594860947, 'time_step': 0.007246945336548209, 'td_error': 1.0504742078148999, 'init_value': -11.752270698547363, 'ave_value': -7.488393817812011} step=342
2022-04-20 17:44.37 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174433/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:44.39 [info     ] TD3PlusBC_20220420174433: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.00039229755513152186, 'time_algorithm_update': 0.006782839869895176, 'critic_loss': 6.609625473357084, 'actor_loss': 2.580105702082316, 'time_step': 0.007253184653165047, 'td_error': 1.3211874823001757, 'init_value': -16.08113670349121, 'ave_value': -10.38347720115286} step=684
2022-04-20 17:44.39 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174433/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:44.42 [info     ] TD3PlusBC_20220420174433: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.0004009879820528086, 'time_algorithm_update': 0.006805661826105843, 'critic_loss': 10.181607229667797, 'actor_loss': 2.573161416583591, 'time_step': 0.007285154353805453, 'td_error': 1.6891891469531009, 'init_value': -21.397428512573242, 'ave_value': -13.820841476571} step=1026
2022-04-20 17:44.42 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174433/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:44.45 [info     ] TD3PlusBC_20220420174433: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.00039032955615841156, 'time_algorithm_update': 0.006794720365290056, 'critic_loss': 14.779214553665696, 'actor_loss': 2.5698064572629873, 'time_step': 0.007254023300973992, 'td_error': 2.134916228351259, 'init_value': -26.09561538696289, 'ave_value': -17.00275739436969} step=1368
2022-04-20 17:44.45 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174433/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:44.48 [info     ] TD3PlusBC_20220420174433: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.0003980230866817006, 'time_algorithm_update': 0.006804253622802377, 'critic_loss': 19.38992290468941, 'actor_loss': 2.5679643391168607, 'time_step': 0.007272760770474261, 'td_error': 2.5611926525545403, 'init_value': -31.02689552307129, 'ave_value': -20.092648658995508} step=1710
2022-04-20 17:44.48 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174433/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:44.51 [info     ] TD3PlusBC_20220420174433: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.0003912204887434753, 'time_algorithm_update': 0.006767670313517253, 'critic_loss': 24.025613450167473, 'actor_loss': 2.567111875578674, 'time_step': 0.007227923437865854, 'td_error': 3.097377268531102, 'init_value': -35.038917541503906, 'ave_value': -23.104612681790936} step=2052
2022-04-20 17:44.51 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174433/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:44.54 [info     ] TD3PlusBC_20220420174433: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.0003928231914141025, 'time_algorithm_update': 0.006700700486612599, 'critic_loss': 29.31193887141713, 'actor_loss': 2.565034356033593, 'time_step': 0.007173283058300353, 'td_error': 3.4793765488146136, 'init_value': -39.24908447265625, 'ave_value': -25.842649428017335} step=2394
2022-04-20 17:44.54 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174433/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:44.57 [info     ] TD3PlusBC_20220420174433: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.0003993120806956152, 'time_algorithm_update': 0.006798250633373595, 'critic_loss': 35.294967629059016, 'actor_loss': 2.564513746060823, 'time_step': 0.007269619501125046, 'td_error': 4.240808260882611, 'init_value': -44.40153884887695, 'ave_value': -29.000853249206635} step=2736
2022-04-20 17:44.57 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174433/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:45.00 [info     ] TD3PlusBC_20220420174433: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.0003960327795374463, 'time_algorithm_update': 0.006766628800776967, 'critic_loss': 40.741405682257046, 'actor_loss': 2.564535941296851, 'time_step': 0.007239237166287606, 'td_error': 5.194119229124247, 'init_value': -49.3698844909668, 'ave_value': -31.798453749416833} step=3078
2022-04-20 17:45.00 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174433/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:45.03 [info     ] TD3PlusBC_20220420174433: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.00039365068513747545, 'time_algorithm_update': 0.006800728931761624, 'critic_loss': 45.942939886572766, 'actor_loss': 2.563340071349116, 'time_step': 0.007264810695982816, 'td_error': 5.071496919362134, 'init_value': -50.92945098876953, 'ave_value': -33.69656759742631} step=3420
2022-04-20 17:45.03 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174433/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:45.06 [info     ] TD3PlusBC_20220420174433: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.0003993803994697437, 'time_algorithm_update': 0.006728430240474946, 'critic_loss': 52.104089909826804, 'actor_loss': 2.56207643754301, 'time_step': 0.007204660198144745, 'td_error': 6.0165419662192745, 'init_value': -56.795387268066406, 'ave_value': -36.85790414197563} step=3762
2022-04-20 17:45.06 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174433/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:45.08 [info     ] TD3PlusBC_20220420174433: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.0003953544717085989, 'time_algorithm_update': 0.006793828735574645, 'critic_loss': 57.90753950152481, 'actor_loss': 2.5620090180670307, 'time_step': 0.007266097598605686, 'td_error': 6.387026899407246, 'init_value': -57.97845458984375, 'ave_value': -38.27276433221526} step=4104
2022-04-20 17:45.08 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174433/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:45.11 [info     ] TD3PlusBC_20220420174433: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.00039539141961705615, 'time_algorithm_update': 0.006802640463176526, 'critic_loss': 64.02281460009124, 'actor_loss': 2.562251852269758, 'time_step': 0.007275437751011542, 'td_error': 6.948510420785539, 'init_value': -61.232139587402344, 'ave_value': -40.32351257907101} step=4446
2022-04-20 17:45.11 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174433/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:45.14 [info     ] TD3PlusBC_20220420174433: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.0004011671445523089, 'time_algorithm_update': 0.006776210857413666, 'critic_loss': 69.91483840050056, 'actor_loss': 2.562969290025053, 'time_step': 0.0072528953440705235, 'td_error': 7.643442888692026, 'init_value': -63.8599739074707, 'ave_value': -42.46923873202775} step=4788
2022-04-20 17:45.14 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174433/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:45.17 [info     ] TD3PlusBC_20220420174433: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.00039387864676135325, 'time_algorithm_update': 0.006834815120139317, 'critic_loss': 76.03599558378521, 'actor_loss': 2.562096560907643, 'time_step': 0.00730582426863107, 'td_error': 8.278447223547271, 'init_value': -66.73421478271484, 'ave_value': -44.84925015257386} step=5130
2022-04-20 17:45.17 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174433/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:45.20 [info     ] TD3PlusBC_20220420174433: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.0003956605119314807, 'time_algorithm_update': 0.006811785419084873, 'critic_loss': 81.30645150747912, 'actor_loss': 2.5619563596290456, 'time_step': 0.007283511217574627, 'td_error': 8.39000421922104, 'init_value': -67.62959289550781, 'ave_value': -45.877719152975104} step=5472
2022-04-20 17:45.20 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174433/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:45.23 [info     ] TD3PlusBC_20220420174433: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.00039690000969066954, 'time_algorithm_update': 0.006777118521127087, 'critic_loss': 86.48918650443093, 'actor_loss': 2.563078110678154, 'time_step': 0.007249989007648669, 'td_error': 9.139861234816763, 'init_value': -71.35011291503906, 'ave_value': -47.90785112894484} step=5814
2022-04-20 17:45.23 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174433/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:45.26 [info     ] TD3PlusBC_20220420174433: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.0003970052763732553, 'time_algorithm_update': 0.006781629651610614, 'critic_loss': 91.89544375458657, 'actor_loss': 2.563251311318916, 'time_step': 0.007255377825240643, 'td_error': 9.769264059857617, 'init_value': -73.42521667480469, 'ave_value': -49.61208388609392} step=6156
2022-04-20 17:45.26 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174433/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:45.29 [info     ] TD3PlusBC_20220420174433: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.00039576229296232527, 'time_algorithm_update': 0.006754275651005974, 'critic_loss': 97.26672894215723, 'actor_loss': 2.563880925987199, 'time_step': 0.00723178135721307, 'td_error': 10.395692482722707, 'init_value': -76.063720703125, 'ave_value': -51.30293816963387} step=6498
2022-04-20 17:45.29 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174433/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:45.32 [info     ] TD3PlusBC_20220420174433: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.00039906738794337937, 'time_algorithm_update': 0.006811783327693828, 'critic_loss': 102.75296487863999, 'actor_loss': 2.564337936758298, 'time_step': 0.00727182103876482, 'td_error': 10.432438846042693, 'init_value': -75.98124694824219, 'ave_value': -52.109233000854886} step=6840
2022-04-20 17:45.32 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174433/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:45.35 [info     ] TD3PlusBC_20220420174433: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.00039804469772249633, 'time_algorithm_update': 0.0068571874272753625, 'critic_loss': 107.23091219182601, 'actor_loss': 2.564528505704556, 'time_step': 0.007320111955118458, 'td_error': 11.667507101219522, 'init_value': -77.74324035644531, 'ave_value': -53.853458224456624} step=7182
2022-04-20 17:45.35 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174433/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:45.37 [info     ] TD3PlusBC_20220420174433: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.00039171266276933996, 'time_algorithm_update': 0.006814762862802249, 'critic_loss': 111.767454816584, 'actor_loss': 2.5649171572679665, 'time_step': 0.007265634006924099, 'td_error': 11.744331825766915, 'init_value': -78.32568359375, 'ave_value': -54.79853023167624} step=7524
2022-04-20 17:45.37 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174433/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:45.40 [info     ] TD3PlusBC_20220420174433: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.00039485532637925175, 'time_algorithm_update': 0.006837120530200981, 'critic_loss': 115.93823902509367, 'actor_loss': 2.5649560487758345, 'time_step': 0.007296347478676957, 'td_error': 12.197662585708144, 'init_value': -79.8346939086914, 'ave_value': -55.74326349645299} step=7866
2022-04-20 17:45.40 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174433/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:45.44 [info     ] TD3PlusBC_20220420174433: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.0003963869217543574, 'time_algorithm_update': 0.008902091031883195, 'critic_loss': 120.55601329691926, 'actor_loss': 2.566107445990133, 'time_step': 0.009363583653990985, 'td_error': 12.765436305570278, 'init_value': -82.93376159667969, 'ave_value': -57.392554545514955} step=8208
2022-04-20 17:45.44 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174433/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:45.48 [info     ] TD3PlusBC_20220420174433: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.0004009503370140031, 'time_algorithm_update': 0.008900008006402624, 'critic_loss': 124.59570151881168, 'actor_loss': 2.5664473737192433, 'time_step': 0.009365173808315344, 'td_error': 13.909404110737682, 'init_value': -81.82893371582031, 'ave_value': -58.10858519001159} step=8550
2022-04-20 17:45.48 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174433/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:45.51 [info     ] TD3PlusBC_20220420174433: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.0004064737007631893, 'time_algorithm_update': 0.008605316368459959, 'critic_loss': 128.73386387518275, 'actor_loss': 2.567686057230185, 'time_step': 0.009081347643980506, 'td_error': 13.91906167206508, 'init_value': -81.61351013183594, 'ave_value': -58.69256755512141} step=8892
2022-04-20 17:45.51 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174433/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:45.55 [info     ] TD3PlusBC_20220420174433: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.0004011239224707174, 'time_algorithm_update': 0.008815991251092208, 'critic_loss': 132.9842234829016, 'actor_loss': 2.5680588658093013, 'time_step': 0.009278430576212922, 'td_error': 15.129179482662058, 'init_value': -83.0506820678711, 'ave_value': -59.33270706137491} step=9234
2022-04-20 17:45.55 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174433/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:45.59 [info     ] TD3PlusBC_20220420174433: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.00040083043059410407, 'time_algorithm_update': 0.008648756651850472, 'critic_loss': 136.5069691395899, 'actor_loss': 2.568824406952886, 'time_step': 0.009109065546626932, 'td_error': 15.454855744764885, 'init_value': -83.65821075439453, 'ave_value': -60.73174256780931} step=9576
2022-04-20 17:45.59 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174433/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:46.02 [info     ] TD3PlusBC_20220420174433: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.0004041257657502827, 'time_algorithm_update': 0.008963342995671501, 'critic_loss': 140.37578658611454, 'actor_loss': 2.5676126954151175, 'time_step': 0.009435224951359263, 'td_error': 15.79800510019259, 'init_value': -87.89673614501953, 'ave_value': -62.021252287005886} step=9918
2022-04-20 17:46.02 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174433/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:46.06 [info     ] TD3PlusBC_20220420174433: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.00040282631478114436, 'time_algorithm_update': 0.008773310142650939, 'critic_loss': 143.80980405751725, 'actor_loss': 2.568084340346487, 'time_step': 0.009241569111918846, 'td_error': 15.973290154320985, 'init_value': -88.29874420166016, 'ave_value': -62.92526195404615} step=10260
2022-04-20 17:46.06 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174433/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:46.10 [info     ] TD3PlusBC_20220420174433: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.000397645242032949, 'time_algorithm_update': 0.008447119367052938, 'critic_loss': 147.2525966265048, 'actor_loss': 2.5685095773105733, 'time_step': 0.008907718965184618, 'td_error': 15.653077278409365, 'init_value': -85.04180908203125, 'ave_value': -62.54862072752453} step=10602
2022-04-20 17:46.10 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174433/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:46.13 [info     ] TD3PlusBC_20220420174433: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.0004034593091373555, 'time_algorithm_update': 0.008933351053829081, 'critic_loss': 149.6778310139974, 'actor_loss': 2.5691255379838553, 'time_step': 0.00939989996235273, 'td_error': 16.543409622688014, 'init_value': -87.1199722290039, 'ave_value': -63.898705783146056} step=10944
2022-04-20 17:46.13 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174433/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:46.17 [info     ] TD3PlusBC_20220420174433: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.00040128426245081495, 'time_algorithm_update': 0.008944343405160291, 'critic_loss': 151.78156827067772, 'actor_loss': 2.568225159282573, 'time_step': 0.009413427776760526, 'td_error': 16.70294288845094, 'init_value': -89.46226501464844, 'ave_value': -64.78964567185237} step=11286
2022-04-20 17:46.17 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174433/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:46.21 [info     ] TD3PlusBC_20220420174433: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.00035696280630011305, 'time_algorithm_update': 0.008383497160080581, 'critic_loss': 154.92185681884052, 'actor_loss': 2.5693280097336797, 'time_step': 0.008798179570694415, 'td_error': 16.539428085763607, 'init_value': -87.02699279785156, 'ave_value': -64.25926073548611} step=11628
2022-04-20 17:46.21 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174433/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:46.24 [info     ] TD3PlusBC_20220420174433: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.00037048992357755964, 'time_algorithm_update': 0.008625213165729367, 'critic_loss': 156.89017192104407, 'actor_loss': 2.569078851164433, 'time_step': 0.009054386824892279, 'td_error': 18.084597012431484, 'init_value': -87.12803649902344, 'ave_value': -64.97958588600973} step=11970
2022-04-20 17:46.24 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174433/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:46.28 [info     ] TD3PlusBC_20220420174433: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.00039968922821401854, 'time_algorithm_update': 0.00837833839550353, 'critic_loss': 159.1491796482376, 'actor_loss': 2.5686780332821852, 'time_step': 0.00884077144645111, 'td_error': 17.55485310413429, 'init_value': -85.81938934326172, 'ave_value': -65.20713360618883} step=12312
2022-04-20 17:46.28 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174433/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:46.31 [info     ] TD3PlusBC_20220420174433: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.00040353738773635955, 'time_algorithm_update': 0.009027170158966242, 'critic_loss': 161.17279144197877, 'actor_loss': 2.5698513538516754, 'time_step': 0.00949086292445311, 'td_error': 17.090493833550095, 'init_value': -86.1492691040039, 'ave_value': -65.67660847362282} step=12654
2022-04-20 17:46.31 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174433/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:46.35 [info     ] TD3PlusBC_20220420174433: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.00040106954630355387, 'time_algorithm_update': 0.008860956158554345, 'critic_loss': 163.06686718143217, 'actor_loss': 2.5707778359017177, 'time_step': 0.00932169797127707, 'td_error': 16.94921312694217, 'init_value': -87.06340026855469, 'ave_value': -66.37653847705253} step=12996
2022-04-20 17:46.35 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174433/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:46.39 [info     ] TD3PlusBC_20220420174433: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.00040498532746967514, 'time_algorithm_update': 0.008937457151580275, 'critic_loss': 166.06939420644304, 'actor_loss': 2.569528073595281, 'time_step': 0.009409387209262068, 'td_error': 16.66023792778072, 'init_value': -87.14746856689453, 'ave_value': -66.84041019260094} step=13338
2022-04-20 17:46.39 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174433/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:46.42 [info     ] TD3PlusBC_20220420174433: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.0004037214301482976, 'time_algorithm_update': 0.008804937552290353, 'critic_loss': 167.12865269532676, 'actor_loss': 2.569354189766778, 'time_step': 0.009276532987404985, 'td_error': 18.31648947248961, 'init_value': -87.39271545410156, 'ave_value': -67.44291888206283} step=13680
2022-04-20 17:46.42 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174433/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:46.46 [info     ] TD3PlusBC_20220420174433: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.0003561513465747499, 'time_algorithm_update': 0.007965766895584196, 'critic_loss': 168.38451604118123, 'actor_loss': 2.569533091539528, 'time_step': 0.00838130607939603, 'td_error': 19.531002305486524, 'init_value': -85.4217300415039, 'ave_value': -66.9047126095752} step=14022
2022-04-20 17:46.46 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174433/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:46.50 [info     ] TD3PlusBC_20220420174433: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.00037787323109587733, 'time_algorithm_update': 0.008753365243387502, 'critic_loss': 169.85703264202988, 'actor_loss': 2.5709089438120523, 'time_step': 0.009189467681081672, 'td_error': 18.785832819393857, 'init_value': -88.43870544433594, 'ave_value': -68.30969905971658} step=14364
2022-04-20 17:46.50 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174433/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:46.53 [info     ] TD3PlusBC_20220420174433: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.00040205668287667613, 'time_algorithm_update': 0.00870323111439309, 'critic_loss': 170.97680608292072, 'actor_loss': 2.570388622451247, 'time_step': 0.009170468787700808, 'td_error': 19.719943735526858, 'init_value': -87.32698059082031, 'ave_value': -68.5660770750326} step=14706
2022-04-20 17:46.53 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174433/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:46.57 [info     ] TD3PlusBC_20220420174433: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.00039839047437522843, 'time_algorithm_update': 0.008904615340874208, 'critic_loss': 172.47592975103368, 'actor_loss': 2.5703024696885493, 'time_step': 0.009370113673963044, 'td_error': 19.17494576056711, 'init_value': -87.5160140991211, 'ave_value': -68.52281286192353} step=15048
2022-04-20 17:46.57 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174433/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:47.01 [info     ] TD3PlusBC_20220420174433: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.00040588811127065916, 'time_algorithm_update': 0.008872271978367142, 'critic_loss': 173.47035966839707, 'actor_loss': 2.569067072450069, 'time_step': 0.009340217238978335, 'td_error': 18.476994693897275, 'init_value': -87.0140151977539, 'ave_value': -68.47704766449242} step=15390
2022-04-20 17:47.01 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174433/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:47.04 [info     ] TD3PlusBC_20220420174433: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.0004035297193025288, 'time_algorithm_update': 0.00851805377424809, 'critic_loss': 174.2798833345112, 'actor_loss': 2.569841580084193, 'time_step': 0.008985149232964767, 'td_error': 20.25643000092015, 'init_value': -86.11327362060547, 'ave_value': -68.83076599448114} step=15732
2022-04-20 17:47.04 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174433/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:47.08 [info     ] TD3PlusBC_20220420174433: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.0004013030849702177, 'time_algorithm_update': 0.008901753620794641, 'critic_loss': 174.92519853826155, 'actor_loss': 2.5703825560229565, 'time_step': 0.009366213926794932, 'td_error': 17.725905803836433, 'init_value': -83.91248321533203, 'ave_value': -68.26349628690899} step=16074
2022-04-20 17:47.08 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174433/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:47.11 [info     ] TD3PlusBC_20220420174433: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.00040494768243086964, 'time_algorithm_update': 0.008753217451753672, 'critic_loss': 176.12223492449488, 'actor_loss': 2.5707784577419885, 'time_step': 0.009221325840866356, 'td_error': 19.059105132922095, 'init_value': -87.1712875366211, 'ave_value': -69.80255864598838} step=16416
2022-04-20 17:47.11 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174433/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:47.15 [info     ] TD3PlusBC_20220420174433: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.00039897397247671385, 'time_algorithm_update': 0.008816723935088219, 'critic_loss': 176.68967330664918, 'actor_loss': 2.5707404250987094, 'time_step': 0.009281488887050696, 'td_error': 19.70704682712531, 'init_value': -88.76530456542969, 'ave_value': -70.36901146221838} step=16758
2022-04-20 17:47.15 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174433/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:47.19 [info     ] TD3PlusBC_20220420174433: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.00040115947611847814, 'time_algorithm_update': 0.008849411679987321, 'critic_loss': 177.5058637920179, 'actor_loss': 2.5711976073638736, 'time_step': 0.009313938213370697, 'td_error': 19.988526741012198, 'init_value': -85.13197326660156, 'ave_value': -69.63242444478563} step=17100
2022-04-20 17:47.19 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174433/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 102 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.28189108e-01 

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 17:47.20 [info     ] FQE_20220420174719: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00016253109437873564, 'time_algorithm_update': 0.004948864500206637, 'loss': 0.008188642282319447, 'time_step': 0.0051871538162231445, 'init_value': -0.3040921688079834, 'ave_value': -0.23536037122075623, 'soft_opc': nan} step=166




2022-04-20 17:47.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174719/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:47.21 [info     ] FQE_20220420174719: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00015314228563423617, 'time_algorithm_update': 0.003945352083229157, 'loss': 0.005619439836423171, 'time_step': 0.004169096429663968, 'init_value': -0.4432304799556732, 'ave_value': -0.2995217016310775, 'soft_opc': nan} step=332




2022-04-20 17:47.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174719/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:47.22 [info     ] FQE_20220420174719: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00015941154525940678, 'time_algorithm_update': 0.005048980195838285, 'loss': 0.004825320570303284, 'time_step': 0.0052806799670299855, 'init_value': -0.5386669039726257, 'ave_value': -0.3636128720475008, 'soft_opc': nan} step=498




2022-04-20 17:47.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174719/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:47.23 [info     ] FQE_20220420174719: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00016511922859283816, 'time_algorithm_update': 0.005031242428055729, 'loss': 0.004644696912111108, 'time_step': 0.005269472857555711, 'init_value': -0.5912156701087952, 'ave_value': -0.36531594149340446, 'soft_opc': nan} step=664




2022-04-20 17:47.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174719/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:47.24 [info     ] FQE_20220420174719: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.0001598883824176099, 'time_algorithm_update': 0.005039255303072642, 'loss': 0.004312810343578009, 'time_step': 0.005272468888615987, 'init_value': -0.6657912731170654, 'ave_value': -0.3932814336138534, 'soft_opc': nan} step=830




2022-04-20 17:47.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174719/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:47.25 [info     ] FQE_20220420174719: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.0001591487103197948, 'time_algorithm_update': 0.005081314638436559, 'loss': 0.003944902260035995, 'time_step': 0.005314480827515383, 'init_value': -0.7147655487060547, 'ave_value': -0.42001554117588374, 'soft_opc': nan} step=996




2022-04-20 17:47.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174719/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:47.26 [info     ] FQE_20220420174719: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00015945463295442513, 'time_algorithm_update': 0.0049733009683080465, 'loss': 0.003901469458107758, 'time_step': 0.005206777388791004, 'init_value': -0.7794550657272339, 'ave_value': -0.44245786711128915, 'soft_opc': nan} step=1162




2022-04-20 17:47.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174719/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:47.27 [info     ] FQE_20220420174719: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.0001644829669630671, 'time_algorithm_update': 0.005072613796555853, 'loss': 0.0037536187250313, 'time_step': 0.005309557340231286, 'init_value': -0.857709527015686, 'ave_value': -0.48834548531108546, 'soft_opc': nan} step=1328




2022-04-20 17:47.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174719/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:47.28 [info     ] FQE_20220420174719: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.0001633052366325654, 'time_algorithm_update': 0.005047528140516166, 'loss': 0.0035292739988338337, 'time_step': 0.005285757133759648, 'init_value': -0.9034320712089539, 'ave_value': -0.5147860462827658, 'soft_opc': nan} step=1494




2022-04-20 17:47.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174719/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:47.28 [info     ] FQE_20220420174719: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.0001608133316040039, 'time_algorithm_update': 0.005029495940150985, 'loss': 0.003785270565688745, 'time_step': 0.005260628390024944, 'init_value': -0.9756004214286804, 'ave_value': -0.5466544748023824, 'soft_opc': nan} step=1660




2022-04-20 17:47.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174719/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:47.29 [info     ] FQE_20220420174719: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00015922195940132602, 'time_algorithm_update': 0.004410742277122405, 'loss': 0.0036646974390843906, 'time_step': 0.004640461450599763, 'init_value': -1.0736709833145142, 'ave_value': -0.6185075985607565, 'soft_opc': nan} step=1826




2022-04-20 17:47.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174719/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:47.30 [info     ] FQE_20220420174719: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00015892465430569937, 'time_algorithm_update': 0.0048391933900764185, 'loss': 0.0036871719502995395, 'time_step': 0.005075055432606892, 'init_value': -1.0878387689590454, 'ave_value': -0.6043518698845596, 'soft_opc': nan} step=1992




2022-04-20 17:47.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174719/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:47.31 [info     ] FQE_20220420174719: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.0001579207110117717, 'time_algorithm_update': 0.005111424319715385, 'loss': 0.004384325536007218, 'time_step': 0.005343988717320454, 'init_value': -1.228900671005249, 'ave_value': -0.7196835000000827, 'soft_opc': nan} step=2158




2022-04-20 17:47.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174719/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:47.32 [info     ] FQE_20220420174719: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.0001616018364228398, 'time_algorithm_update': 0.005096722798175122, 'loss': 0.0041116859605367555, 'time_step': 0.00533172595931823, 'init_value': -1.3302571773529053, 'ave_value': -0.7981730708365773, 'soft_opc': nan} step=2324




2022-04-20 17:47.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174719/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:47.33 [info     ] FQE_20220420174719: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.0001611149454691324, 'time_algorithm_update': 0.005102029765944883, 'loss': 0.004600154253491481, 'time_step': 0.005337004201957978, 'init_value': -1.3807554244995117, 'ave_value': -0.8309150373680635, 'soft_opc': nan} step=2490




2022-04-20 17:47.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174719/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:47.34 [info     ] FQE_20220420174719: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00016270057264580783, 'time_algorithm_update': 0.005127270537686635, 'loss': 0.005296258197631687, 'time_step': 0.005364204027566565, 'init_value': -1.493574619293213, 'ave_value': -0.919132137127422, 'soft_opc': nan} step=2656




2022-04-20 17:47.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174719/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:47.35 [info     ] FQE_20220420174719: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.0001667177820780191, 'time_algorithm_update': 0.005096385277897479, 'loss': 0.005506415234974036, 'time_step': 0.005335649812077901, 'init_value': -1.6044578552246094, 'ave_value': -1.0108961984884364, 'soft_opc': nan} step=2822




2022-04-20 17:47.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174719/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:47.36 [info     ] FQE_20220420174719: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00016407363386039273, 'time_algorithm_update': 0.005050129201038775, 'loss': 0.005776917929719034, 'time_step': 0.0052858749067926985, 'init_value': -1.653719186782837, 'ave_value': -1.041938118290928, 'soft_opc': nan} step=2988




2022-04-20 17:47.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174719/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:47.37 [info     ] FQE_20220420174719: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00016266897500279438, 'time_algorithm_update': 0.005058518375258848, 'loss': 0.006471267921698605, 'time_step': 0.005295071257166116, 'init_value': -1.7410274744033813, 'ave_value': -1.086274070685377, 'soft_opc': nan} step=3154




2022-04-20 17:47.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174719/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:47.38 [info     ] FQE_20220420174719: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00016040687101433077, 'time_algorithm_update': 0.00448195187442274, 'loss': 0.006927437345000792, 'time_step': 0.004715706928666815, 'init_value': -1.8765335083007812, 'ave_value': -1.2091704852879048, 'soft_opc': nan} step=3320




2022-04-20 17:47.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174719/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:47.39 [info     ] FQE_20220420174719: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00016387830297630955, 'time_algorithm_update': 0.004872846316142255, 'loss': 0.007234490191822041, 'time_step': 0.005108811769140772, 'init_value': -1.9082709550857544, 'ave_value': -1.221846397838614, 'soft_opc': nan} step=3486




2022-04-20 17:47.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174719/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:47.40 [info     ] FQE_20220420174719: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.0001612485173236893, 'time_algorithm_update': 0.005017060831368688, 'loss': 0.007494676856784128, 'time_step': 0.0052520410124077856, 'init_value': -2.021155834197998, 'ave_value': -1.2951474297086936, 'soft_opc': nan} step=3652




2022-04-20 17:47.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174719/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:47.41 [info     ] FQE_20220420174719: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00016351349382515414, 'time_algorithm_update': 0.005059144583093114, 'loss': 0.008408510657756713, 'time_step': 0.005296165684619582, 'init_value': -2.116304397583008, 'ave_value': -1.3717838862390668, 'soft_opc': nan} step=3818




2022-04-20 17:47.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174719/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:47.42 [info     ] FQE_20220420174719: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.0001613835254347468, 'time_algorithm_update': 0.005118341331022331, 'loss': 0.008623013770101166, 'time_step': 0.005352484174521573, 'init_value': -2.119320869445801, 'ave_value': -1.3512982957367157, 'soft_opc': nan} step=3984




2022-04-20 17:47.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174719/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:47.43 [info     ] FQE_20220420174719: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00016363988439720798, 'time_algorithm_update': 0.005020809460835284, 'loss': 0.009550127418885702, 'time_step': 0.00526012713650623, 'init_value': -2.2590091228485107, 'ave_value': -1.4803294374733358, 'soft_opc': nan} step=4150




2022-04-20 17:47.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174719/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:47.44 [info     ] FQE_20220420174719: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00015825105000691242, 'time_algorithm_update': 0.005032805075128394, 'loss': 0.010460676132359105, 'time_step': 0.005263127476336008, 'init_value': -2.371138572692871, 'ave_value': -1.5511720635735238, 'soft_opc': nan} step=4316




2022-04-20 17:47.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174719/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:47.45 [info     ] FQE_20220420174719: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00015936271253838596, 'time_algorithm_update': 0.00499375613338976, 'loss': 0.011193665863065251, 'time_step': 0.005226471337927393, 'init_value': -2.442791700363159, 'ave_value': -1.6208035614912037, 'soft_opc': nan} step=4482




2022-04-20 17:47.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174719/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:47.46 [info     ] FQE_20220420174719: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00020579257643366433, 'time_algorithm_update': 0.00788555518690362, 'loss': 0.011747380896718297, 'time_step': 0.008168825184006289, 'init_value': -2.52895188331604, 'ave_value': -1.6559991579267892, 'soft_opc': nan} step=4648




2022-04-20 17:47.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174719/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:47.47 [info     ] FQE_20220420174719: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.000158297010214932, 'time_algorithm_update': 0.004028653524008142, 'loss': 0.012667983095722654, 'time_step': 0.004258878259773714, 'init_value': -2.635633945465088, 'ave_value': -1.7405327665644723, 'soft_opc': nan} step=4814




2022-04-20 17:47.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174719/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:47.48 [info     ] FQE_20220420174719: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00016373611358274897, 'time_algorithm_update': 0.005125202328325754, 'loss': 0.01369367194634088, 'time_step': 0.005363057894879077, 'init_value': -2.7404932975769043, 'ave_value': -1.8297287036371124, 'soft_opc': nan} step=4980




2022-04-20 17:47.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174719/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:47.49 [info     ] FQE_20220420174719: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00016441115413803653, 'time_algorithm_update': 0.004994561873286603, 'loss': 0.014903550525756934, 'time_step': 0.0052358644554413945, 'init_value': -2.842477321624756, 'ave_value': -1.93048896303585, 'soft_opc': nan} step=5146




2022-04-20 17:47.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174719/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:47.50 [info     ] FQE_20220420174719: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.0001629634075854198, 'time_algorithm_update': 0.005102385957557035, 'loss': 0.015665926401088215, 'time_step': 0.005336840468716909, 'init_value': -2.9205148220062256, 'ave_value': -1.9612796811974262, 'soft_opc': nan} step=5312




2022-04-20 17:47.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174719/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:47.51 [info     ] FQE_20220420174719: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.000158271157597921, 'time_algorithm_update': 0.005103305161717427, 'loss': 0.0157202050266563, 'time_step': 0.005333502608609487, 'init_value': -2.961210012435913, 'ave_value': -1.9658479501266737, 'soft_opc': nan} step=5478




2022-04-20 17:47.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174719/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:47.52 [info     ] FQE_20220420174719: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00016586177320365445, 'time_algorithm_update': 0.005104172660643796, 'loss': 0.016757408222912933, 'time_step': 0.005340612078287515, 'init_value': -3.0748398303985596, 'ave_value': -2.0443639456823064, 'soft_opc': nan} step=5644




2022-04-20 17:47.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174719/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:47.53 [info     ] FQE_20220420174719: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00016680970249405825, 'time_algorithm_update': 0.005166464541331831, 'loss': 0.01817094823289438, 'time_step': 0.005406918295894761, 'init_value': -3.0989978313446045, 'ave_value': -2.056538814178727, 'soft_opc': nan} step=5810




2022-04-20 17:47.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174719/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:47.54 [info     ] FQE_20220420174719: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00016239895878067935, 'time_algorithm_update': 0.0050805160798222185, 'loss': 0.017891804177134513, 'time_step': 0.005316016185714538, 'init_value': -3.154996156692505, 'ave_value': -2.0851888155816374, 'soft_opc': nan} step=5976




2022-04-20 17:47.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174719/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:47.55 [info     ] FQE_20220420174719: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00016859927809382058, 'time_algorithm_update': 0.005109198122139436, 'loss': 0.018816157871806793, 'time_step': 0.0053534306675554755, 'init_value': -3.2919111251831055, 'ave_value': -2.1959757390792842, 'soft_opc': nan} step=6142




2022-04-20 17:47.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174719/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:47.55 [info     ] FQE_20220420174719: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.0001598754561091044, 'time_algorithm_update': 0.004145972700004118, 'loss': 0.019114396559405535, 'time_step': 0.004380744623850627, 'init_value': -3.371264696121216, 'ave_value': -2.2480468104215894, 'soft_opc': nan} step=6308




2022-04-20 17:47.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174719/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:47.56 [info     ] FQE_20220420174719: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.0001611436705991446, 'time_algorithm_update': 0.005033314946186112, 'loss': 0.020866791376176412, 'time_step': 0.005267948989408562, 'init_value': -3.4669370651245117, 'ave_value': -2.310663503294324, 'soft_opc': nan} step=6474




2022-04-20 17:47.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174719/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:47.57 [info     ] FQE_20220420174719: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.0001600348805806723, 'time_algorithm_update': 0.0051852579576423365, 'loss': 0.02156678412505145, 'time_step': 0.005421828074627612, 'init_value': -3.658787727355957, 'ave_value': -2.4402920672888153, 'soft_opc': nan} step=6640




2022-04-20 17:47.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174719/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:47.58 [info     ] FQE_20220420174719: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00015903524605624647, 'time_algorithm_update': 0.005039327115897673, 'loss': 0.023118549710313553, 'time_step': 0.005270071776516466, 'init_value': -3.6899425983428955, 'ave_value': -2.458237367814726, 'soft_opc': nan} step=6806




2022-04-20 17:47.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174719/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:47.59 [info     ] FQE_20220420174719: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00015963703753000283, 'time_algorithm_update': 0.005002461284039968, 'loss': 0.02131757909482547, 'time_step': 0.005234800189374441, 'init_value': -3.7871994972229004, 'ave_value': -2.491953093216226, 'soft_opc': nan} step=6972




2022-04-20 17:47.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174719/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:48.00 [info     ] FQE_20220420174719: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00016157885631883, 'time_algorithm_update': 0.005173940256417516, 'loss': 0.0250360486055556, 'time_step': 0.005411264408065612, 'init_value': -3.8672709465026855, 'ave_value': -2.539122611136587, 'soft_opc': nan} step=7138




2022-04-20 17:48.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174719/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:48.01 [info     ] FQE_20220420174719: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00016203414962952397, 'time_algorithm_update': 0.004918626992099257, 'loss': 0.02568842835250277, 'time_step': 0.0051562728652034895, 'init_value': -3.9748611450195312, 'ave_value': -2.5827652428265626, 'soft_opc': nan} step=7304




2022-04-20 17:48.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174719/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:48.02 [info     ] FQE_20220420174719: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00016284850706537086, 'time_algorithm_update': 0.005069790116275649, 'loss': 0.027341073273725987, 'time_step': 0.005309698093368347, 'init_value': -4.004889488220215, 'ave_value': -2.600424041874237, 'soft_opc': nan} step=7470




2022-04-20 17:48.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174719/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:48.03 [info     ] FQE_20220420174719: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00016109914664762565, 'time_algorithm_update': 0.004966893828058818, 'loss': 0.027672353093315994, 'time_step': 0.005202758743102292, 'init_value': -4.000514030456543, 'ave_value': -2.563821138569095, 'soft_opc': nan} step=7636




2022-04-20 17:48.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174719/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:48.04 [info     ] FQE_20220420174719: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00015881693506815346, 'time_algorithm_update': 0.004206090088350227, 'loss': 0.029978354264426618, 'time_step': 0.004436373710632324, 'init_value': -4.318121910095215, 'ave_value': -2.8438255289787646, 'soft_opc': nan} step=7802




2022-04-20 17:48.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174719/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:48.05 [info     ] FQE_20220420174719: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00016056629548589867, 'time_algorithm_update': 0.005093015820147043, 'loss': 0.02955055462984727, 'time_step': 0.005326694752796587, 'init_value': -4.253364562988281, 'ave_value': -2.747003053773094, 'soft_opc': nan} step=7968




2022-04-20 17:48.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174719/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:48.06 [info     ] FQE_20220420174719: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00015886145901967245, 'time_algorithm_update': 0.005105066012187177, 'loss': 0.030029374522617065, 'time_step': 0.0053378444120108365, 'init_value': -4.32448673248291, 'ave_value': -2.828329927145361, 'soft_opc': nan} step=8134




2022-04-20 17:48.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174719/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:48.07 [info     ] FQE_20220420174719: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00016523843788238894, 'time_algorithm_update': 0.005129888833287251, 'loss': 0.029609208023287804, 'time_step': 0.005369229489062206, 'init_value': -4.238783836364746, 'ave_value': -2.740170162674543, 'soft_opc': nan} step=8300




2022-04-20 17:48.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174719/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.43210892e-01 -3.25999953e-02
 -1.38000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.28049971e-01  6.00000000e-01  4.67532035e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.87189108e-01  2.46000047e-02
 -8.00013420e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.61927906e-02  2.08952959e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.53789108e-01  1.32000047e-02
  8.79998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -9.71586383e-02 -6.00000000e-01 -5.33093061e-02]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.39489108e-01 -4.75999953e-02
 -9.30001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  1.41298638e-01  5.43892365e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.9573146

2022-04-20 17:48.07 [info     ] Directory is created at d3rlpy_logs/FQE_20220420174807
2022-04-20 17:48.07 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 17:48.07 [debug    ] Building models...
2022-04-20 17:48.07 [debug    ] Models have been built.
2022-04-20 17:48.07 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420174807/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/355 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 17:48.09 [info     ] FQE_20220420174807: epoch=1 step=355 epoch=1 metrics={'time_sample_batch': 0.00016890109424859705, 'time_algorithm_update': 0.005069710503161793, 'loss': 0.028470528792117682, 'time_step': 0.00531512179844816, 'init_value': -1.2275816202163696, 'ave_value': -1.1982713282798707, 'soft_opc': nan} step=355




2022-04-20 17:48.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174807/model_355.pt


Epoch 2/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:48.11 [info     ] FQE_20220420174807: epoch=2 step=710 epoch=2 metrics={'time_sample_batch': 0.00017809061936929194, 'time_algorithm_update': 0.005063614375154737, 'loss': 0.025675392644086355, 'time_step': 0.005319835770298058, 'init_value': -2.3010830879211426, 'ave_value': -2.2774475785756203, 'soft_opc': nan} step=710




2022-04-20 17:48.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174807/model_710.pt


Epoch 3/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:48.13 [info     ] FQE_20220420174807: epoch=3 step=1065 epoch=3 metrics={'time_sample_batch': 0.00017133565016195807, 'time_algorithm_update': 0.004578103481883734, 'loss': 0.027729092551474, 'time_step': 0.004823415380128673, 'init_value': -3.03753662109375, 'ave_value': -3.028563471159573, 'soft_opc': nan} step=1065




2022-04-20 17:48.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174807/model_1065.pt


Epoch 4/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:48.15 [info     ] FQE_20220420174807: epoch=4 step=1420 epoch=4 metrics={'time_sample_batch': 0.00017263654252173196, 'time_algorithm_update': 0.0050746259555010725, 'loss': 0.031335972784690455, 'time_step': 0.005323114529461928, 'init_value': -3.9688796997070312, 'ave_value': -3.985001193709969, 'soft_opc': nan} step=1420




2022-04-20 17:48.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174807/model_1420.pt


Epoch 5/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:48.17 [info     ] FQE_20220420174807: epoch=5 step=1775 epoch=5 metrics={'time_sample_batch': 0.0001732618036404462, 'time_algorithm_update': 0.005035578365057287, 'loss': 0.03780147446660508, 'time_step': 0.0052844887048425805, 'init_value': -4.687847137451172, 'ave_value': -4.728459413447435, 'soft_opc': nan} step=1775




2022-04-20 17:48.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174807/model_1775.pt


Epoch 6/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:48.19 [info     ] FQE_20220420174807: epoch=6 step=2130 epoch=6 metrics={'time_sample_batch': 0.00017378430970957582, 'time_algorithm_update': 0.005022779652770136, 'loss': 0.048057005648881615, 'time_step': 0.005275048672313421, 'init_value': -5.723122596740723, 'ave_value': -5.813428366567791, 'soft_opc': nan} step=2130




2022-04-20 17:48.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174807/model_2130.pt


Epoch 7/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:48.21 [info     ] FQE_20220420174807: epoch=7 step=2485 epoch=7 metrics={'time_sample_batch': 0.0001710374590376733, 'time_algorithm_update': 0.004833827220218283, 'loss': 0.059678129765244434, 'time_step': 0.00508197394894882, 'init_value': -6.36891508102417, 'ave_value': -6.480276131583917, 'soft_opc': nan} step=2485




2022-04-20 17:48.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174807/model_2485.pt


Epoch 8/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:48.23 [info     ] FQE_20220420174807: epoch=8 step=2840 epoch=8 metrics={'time_sample_batch': 0.00017258684400101783, 'time_algorithm_update': 0.004997278267229107, 'loss': 0.0767412084299074, 'time_step': 0.005247018035029022, 'init_value': -7.354487419128418, 'ave_value': -7.52063389697437, 'soft_opc': nan} step=2840




2022-04-20 17:48.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174807/model_2840.pt


Epoch 9/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:48.25 [info     ] FQE_20220420174807: epoch=9 step=3195 epoch=9 metrics={'time_sample_batch': 0.0001768105466600875, 'time_algorithm_update': 0.005058870181231432, 'loss': 0.09065720499821112, 'time_step': 0.005309775177861603, 'init_value': -8.102072715759277, 'ave_value': -8.257896469243859, 'soft_opc': nan} step=3195




2022-04-20 17:48.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174807/model_3195.pt


Epoch 10/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:48.27 [info     ] FQE_20220420174807: epoch=10 step=3550 epoch=10 metrics={'time_sample_batch': 0.0001720092665981239, 'time_algorithm_update': 0.004964478586761045, 'loss': 0.11133370658671352, 'time_step': 0.0052120772885604644, 'init_value': -9.080005645751953, 'ave_value': -9.230676736886897, 'soft_opc': nan} step=3550




2022-04-20 17:48.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174807/model_3550.pt


Epoch 11/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:48.29 [info     ] FQE_20220420174807: epoch=11 step=3905 epoch=11 metrics={'time_sample_batch': 0.00017348544698365977, 'time_algorithm_update': 0.005054089049218406, 'loss': 0.13260535878772048, 'time_step': 0.005300987270516409, 'init_value': -10.113062858581543, 'ave_value': -10.25242281995685, 'soft_opc': nan} step=3905




2022-04-20 17:48.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174807/model_3905.pt


Epoch 12/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:48.31 [info     ] FQE_20220420174807: epoch=12 step=4260 epoch=12 metrics={'time_sample_batch': 0.00017183800818214954, 'time_algorithm_update': 0.004591576482208682, 'loss': 0.15827007118455122, 'time_step': 0.004836627799020687, 'init_value': -10.739800453186035, 'ave_value': -10.92606501002416, 'soft_opc': nan} step=4260




2022-04-20 17:48.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174807/model_4260.pt


Epoch 13/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:48.33 [info     ] FQE_20220420174807: epoch=13 step=4615 epoch=13 metrics={'time_sample_batch': 0.00017461776733398437, 'time_algorithm_update': 0.005016893064472038, 'loss': 0.1771841704635553, 'time_step': 0.00526584504355847, 'init_value': -11.559943199157715, 'ave_value': -11.727742564003133, 'soft_opc': nan} step=4615




2022-04-20 17:48.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174807/model_4615.pt


Epoch 14/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:48.35 [info     ] FQE_20220420174807: epoch=14 step=4970 epoch=14 metrics={'time_sample_batch': 0.00017554659239003357, 'time_algorithm_update': 0.005041749040845415, 'loss': 0.19946767593353568, 'time_step': 0.005293067744080449, 'init_value': -12.094084739685059, 'ave_value': -12.270362456210025, 'soft_opc': nan} step=4970




2022-04-20 17:48.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174807/model_4970.pt


Epoch 15/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:48.37 [info     ] FQE_20220420174807: epoch=15 step=5325 epoch=15 metrics={'time_sample_batch': 0.000175377348778953, 'time_algorithm_update': 0.005114794449067451, 'loss': 0.2227991257230161, 'time_step': 0.005365796827934158, 'init_value': -12.777493476867676, 'ave_value': -13.03363741143023, 'soft_opc': nan} step=5325




2022-04-20 17:48.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174807/model_5325.pt


Epoch 16/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:48.39 [info     ] FQE_20220420174807: epoch=16 step=5680 epoch=16 metrics={'time_sample_batch': 0.00017176816161249724, 'time_algorithm_update': 0.0045727971573950536, 'loss': 0.24493958809950822, 'time_step': 0.004821223944005832, 'init_value': -13.140364646911621, 'ave_value': -13.432068849933193, 'soft_opc': nan} step=5680




2022-04-20 17:48.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174807/model_5680.pt


Epoch 17/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:48.41 [info     ] FQE_20220420174807: epoch=17 step=6035 epoch=17 metrics={'time_sample_batch': 0.00017365401899310903, 'time_algorithm_update': 0.005068373344313931, 'loss': 0.2740021388295671, 'time_step': 0.005316319935758349, 'init_value': -13.75040340423584, 'ave_value': -14.197672459613555, 'soft_opc': nan} step=6035




2022-04-20 17:48.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174807/model_6035.pt


Epoch 18/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:48.43 [info     ] FQE_20220420174807: epoch=18 step=6390 epoch=18 metrics={'time_sample_batch': 0.00017565606345593089, 'time_algorithm_update': 0.005019225536937445, 'loss': 0.29712483810497003, 'time_step': 0.005270360892927143, 'init_value': -13.912335395812988, 'ave_value': -14.52734728221766, 'soft_opc': nan} step=6390




2022-04-20 17:48.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174807/model_6390.pt


Epoch 19/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:48.45 [info     ] FQE_20220420174807: epoch=19 step=6745 epoch=19 metrics={'time_sample_batch': 0.00017526183329837422, 'time_algorithm_update': 0.005024351200587313, 'loss': 0.3224357706297871, 'time_step': 0.005273030509411449, 'init_value': -14.228463172912598, 'ave_value': -15.060402028674458, 'soft_opc': nan} step=6745




2022-04-20 17:48.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174807/model_6745.pt


Epoch 20/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:48.47 [info     ] FQE_20220420174807: epoch=20 step=7100 epoch=20 metrics={'time_sample_batch': 0.000175979775442204, 'time_algorithm_update': 0.005012759356431558, 'loss': 0.34308214597928693, 'time_step': 0.00526409216330085, 'init_value': -14.416723251342773, 'ave_value': -15.452481092883698, 'soft_opc': nan} step=7100




2022-04-20 17:48.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174807/model_7100.pt


Epoch 21/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:48.49 [info     ] FQE_20220420174807: epoch=21 step=7455 epoch=21 metrics={'time_sample_batch': 0.0001728467538323201, 'time_algorithm_update': 0.00483531481783155, 'loss': 0.35653056051109877, 'time_step': 0.005082306391756299, 'init_value': -14.551909446716309, 'ave_value': -15.946105619836382, 'soft_opc': nan} step=7455




2022-04-20 17:48.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174807/model_7455.pt


Epoch 22/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:48.51 [info     ] FQE_20220420174807: epoch=22 step=7810 epoch=22 metrics={'time_sample_batch': 0.00017508587367098097, 'time_algorithm_update': 0.005096449650509257, 'loss': 0.3812570951301867, 'time_step': 0.005349509145172549, 'init_value': -14.932982444763184, 'ave_value': -16.765300128181757, 'soft_opc': nan} step=7810




2022-04-20 17:48.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174807/model_7810.pt


Epoch 23/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:48.53 [info     ] FQE_20220420174807: epoch=23 step=8165 epoch=23 metrics={'time_sample_batch': 0.00017328463809590945, 'time_algorithm_update': 0.005042292366565114, 'loss': 0.4021338674961261, 'time_step': 0.005289561311963578, 'init_value': -15.116422653198242, 'ave_value': -17.23262857018156, 'soft_opc': nan} step=8165




2022-04-20 17:48.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174807/model_8165.pt


Epoch 24/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:48.55 [info     ] FQE_20220420174807: epoch=24 step=8520 epoch=24 metrics={'time_sample_batch': 0.00017402474309357119, 'time_algorithm_update': 0.005099839895543918, 'loss': 0.4123206235415919, 'time_step': 0.005347026905543367, 'init_value': -15.141667366027832, 'ave_value': -17.518796278210775, 'soft_opc': nan} step=8520




2022-04-20 17:48.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174807/model_8520.pt


Epoch 25/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:48.57 [info     ] FQE_20220420174807: epoch=25 step=8875 epoch=25 metrics={'time_sample_batch': 0.00017355596515494334, 'time_algorithm_update': 0.004622540675418478, 'loss': 0.43141398593048813, 'time_step': 0.004873930568426428, 'init_value': -15.318716049194336, 'ave_value': -18.052320129423375, 'soft_opc': nan} step=8875




2022-04-20 17:48.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174807/model_8875.pt


Epoch 26/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:48.59 [info     ] FQE_20220420174807: epoch=26 step=9230 epoch=26 metrics={'time_sample_batch': 0.00017393743488150582, 'time_algorithm_update': 0.005108398786732848, 'loss': 0.4423629978488029, 'time_step': 0.005357126450874436, 'init_value': -15.622092247009277, 'ave_value': -18.804476744105305, 'soft_opc': nan} step=9230




2022-04-20 17:48.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174807/model_9230.pt


Epoch 27/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:49.01 [info     ] FQE_20220420174807: epoch=27 step=9585 epoch=27 metrics={'time_sample_batch': 0.00017518661391567177, 'time_algorithm_update': 0.005038043814645687, 'loss': 0.4604989740787677, 'time_step': 0.005288937394048126, 'init_value': -15.214587211608887, 'ave_value': -18.69221081800778, 'soft_opc': nan} step=9585




2022-04-20 17:49.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174807/model_9585.pt


Epoch 28/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:49.03 [info     ] FQE_20220420174807: epoch=28 step=9940 epoch=28 metrics={'time_sample_batch': 0.00017411205130563655, 'time_algorithm_update': 0.00505173642870406, 'loss': 0.47914357076228503, 'time_step': 0.005301897290726783, 'init_value': -15.834676742553711, 'ave_value': -19.51958467663485, 'soft_opc': nan} step=9940




2022-04-20 17:49.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174807/model_9940.pt


Epoch 29/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:49.05 [info     ] FQE_20220420174807: epoch=29 step=10295 epoch=29 metrics={'time_sample_batch': 0.00017318188304632482, 'time_algorithm_update': 0.00459377732075436, 'loss': 0.5027498921982839, 'time_step': 0.00484358827832719, 'init_value': -15.691319465637207, 'ave_value': -19.664835940591004, 'soft_opc': nan} step=10295




2022-04-20 17:49.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174807/model_10295.pt


Epoch 30/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:49.07 [info     ] FQE_20220420174807: epoch=30 step=10650 epoch=30 metrics={'time_sample_batch': 0.00017612014018313985, 'time_algorithm_update': 0.00512068036576392, 'loss': 0.5231753213829557, 'time_step': 0.0053726605966057574, 'init_value': -16.197359085083008, 'ave_value': -20.403703575965828, 'soft_opc': nan} step=10650




2022-04-20 17:49.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174807/model_10650.pt


Epoch 31/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:49.09 [info     ] FQE_20220420174807: epoch=31 step=11005 epoch=31 metrics={'time_sample_batch': 0.00017398646180058868, 'time_algorithm_update': 0.005030057799648231, 'loss': 0.5429823110221137, 'time_step': 0.0052810554773035185, 'init_value': -16.257596969604492, 'ave_value': -20.597547618237396, 'soft_opc': nan} step=11005




2022-04-20 17:49.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174807/model_11005.pt


Epoch 32/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:49.11 [info     ] FQE_20220420174807: epoch=32 step=11360 epoch=32 metrics={'time_sample_batch': 0.00017741095851844465, 'time_algorithm_update': 0.005073474158703442, 'loss': 0.5573306525572085, 'time_step': 0.005327676047741528, 'init_value': -16.5231876373291, 'ave_value': -21.081693713491642, 'soft_opc': nan} step=11360




2022-04-20 17:49.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174807/model_11360.pt


Epoch 33/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:49.13 [info     ] FQE_20220420174807: epoch=33 step=11715 epoch=33 metrics={'time_sample_batch': 0.00017482529223804744, 'time_algorithm_update': 0.0050596270762698755, 'loss': 0.564690759347778, 'time_step': 0.005310806086365606, 'init_value': -17.05234718322754, 'ave_value': -21.75594018736478, 'soft_opc': nan} step=11715




2022-04-20 17:49.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174807/model_11715.pt


Epoch 34/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:49.15 [info     ] FQE_20220420174807: epoch=34 step=12070 epoch=34 metrics={'time_sample_batch': 0.00017232424776319048, 'time_algorithm_update': 0.004782448352222711, 'loss': 0.5830445221192401, 'time_step': 0.00502976699614189, 'init_value': -16.91790199279785, 'ave_value': -21.74900712612369, 'soft_opc': nan} step=12070




2022-04-20 17:49.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174807/model_12070.pt


Epoch 35/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:49.17 [info     ] FQE_20220420174807: epoch=35 step=12425 epoch=35 metrics={'time_sample_batch': 0.00017307778479347765, 'time_algorithm_update': 0.005020427032255791, 'loss': 0.5775195876582407, 'time_step': 0.0052690136600548115, 'init_value': -17.138660430908203, 'ave_value': -22.0029247791112, 'soft_opc': nan} step=12425




2022-04-20 17:49.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174807/model_12425.pt


Epoch 36/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:49.19 [info     ] FQE_20220420174807: epoch=36 step=12780 epoch=36 metrics={'time_sample_batch': 0.0001742067471356459, 'time_algorithm_update': 0.005061821198799241, 'loss': 0.586115426433758, 'time_step': 0.00531153678894043, 'init_value': -17.06325912475586, 'ave_value': -21.98332817603665, 'soft_opc': nan} step=12780




2022-04-20 17:49.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174807/model_12780.pt


Epoch 37/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:49.21 [info     ] FQE_20220420174807: epoch=37 step=13135 epoch=37 metrics={'time_sample_batch': 0.00017801875799474583, 'time_algorithm_update': 0.005064232920257139, 'loss': 0.596695163793547, 'time_step': 0.005319292444578359, 'init_value': -17.455705642700195, 'ave_value': -22.463331641314344, 'soft_opc': nan} step=13135




2022-04-20 17:49.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174807/model_13135.pt


Epoch 38/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:49.23 [info     ] FQE_20220420174807: epoch=38 step=13490 epoch=38 metrics={'time_sample_batch': 0.00017242767441440635, 'time_algorithm_update': 0.004531955718994141, 'loss': 0.6171639652004545, 'time_step': 0.004780328777474417, 'init_value': -17.58490753173828, 'ave_value': -22.746677078051132, 'soft_opc': nan} step=13490




2022-04-20 17:49.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174807/model_13490.pt


Epoch 39/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:49.25 [info     ] FQE_20220420174807: epoch=39 step=13845 epoch=39 metrics={'time_sample_batch': 0.0001738568426857532, 'time_algorithm_update': 0.00497367885750784, 'loss': 0.628653589845963, 'time_step': 0.005222401148836378, 'init_value': -17.891149520874023, 'ave_value': -23.112094122014145, 'soft_opc': nan} step=13845




2022-04-20 17:49.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174807/model_13845.pt


Epoch 40/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:49.27 [info     ] FQE_20220420174807: epoch=40 step=14200 epoch=40 metrics={'time_sample_batch': 0.00017400123703647667, 'time_algorithm_update': 0.005058757352157378, 'loss': 0.6423831297478205, 'time_step': 0.0053085273420307, 'init_value': -17.81763458251953, 'ave_value': -23.218257364578918, 'soft_opc': nan} step=14200




2022-04-20 17:49.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174807/model_14200.pt


Epoch 41/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:49.29 [info     ] FQE_20220420174807: epoch=41 step=14555 epoch=41 metrics={'time_sample_batch': 0.00017557144165039062, 'time_algorithm_update': 0.005147575324689838, 'loss': 0.6470195930398686, 'time_step': 0.005402800734613983, 'init_value': -18.613178253173828, 'ave_value': -23.953287383377493, 'soft_opc': nan} step=14555




2022-04-20 17:49.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174807/model_14555.pt


Epoch 42/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:49.31 [info     ] FQE_20220420174807: epoch=42 step=14910 epoch=42 metrics={'time_sample_batch': 0.00017091388433751925, 'time_algorithm_update': 0.004596724308712382, 'loss': 0.6595358013739469, 'time_step': 0.004842257163894009, 'init_value': -18.76744270324707, 'ave_value': -24.059047239389812, 'soft_opc': nan} step=14910




2022-04-20 17:49.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174807/model_14910.pt


Epoch 43/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:49.33 [info     ] FQE_20220420174807: epoch=43 step=15265 epoch=43 metrics={'time_sample_batch': 0.00017353648870763644, 'time_algorithm_update': 0.005044949222618425, 'loss': 0.6762127418858065, 'time_step': 0.0052952000792597384, 'init_value': -18.59115982055664, 'ave_value': -24.09994082247582, 'soft_opc': nan} step=15265




2022-04-20 17:49.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174807/model_15265.pt


Epoch 44/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:49.35 [info     ] FQE_20220420174807: epoch=44 step=15620 epoch=44 metrics={'time_sample_batch': 0.00017327254926654656, 'time_algorithm_update': 0.005066148999711158, 'loss': 0.6844354390573334, 'time_step': 0.0053165811887929135, 'init_value': -19.190277099609375, 'ave_value': -24.59725313213878, 'soft_opc': nan} step=15620




2022-04-20 17:49.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174807/model_15620.pt


Epoch 45/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:49.37 [info     ] FQE_20220420174807: epoch=45 step=15975 epoch=45 metrics={'time_sample_batch': 0.0001754948790644256, 'time_algorithm_update': 0.0049866985267316795, 'loss': 0.6991479440054423, 'time_step': 0.005237346299937074, 'init_value': -19.512426376342773, 'ave_value': -24.993548275723555, 'soft_opc': nan} step=15975




2022-04-20 17:49.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174807/model_15975.pt


Epoch 46/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:49.39 [info     ] FQE_20220420174807: epoch=46 step=16330 epoch=46 metrics={'time_sample_batch': 0.00017375005802638095, 'time_algorithm_update': 0.004999300459740867, 'loss': 0.6850948890330086, 'time_step': 0.005249439830511389, 'init_value': -19.47728157043457, 'ave_value': -24.907942754168786, 'soft_opc': nan} step=16330




2022-04-20 17:49.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174807/model_16330.pt


Epoch 47/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:49.40 [info     ] FQE_20220420174807: epoch=47 step=16685 epoch=47 metrics={'time_sample_batch': 0.00016897832843619333, 'time_algorithm_update': 0.004581761695969273, 'loss': 0.688945630691211, 'time_step': 0.004824873427270164, 'init_value': -19.557659149169922, 'ave_value': -25.096620715853483, 'soft_opc': nan} step=16685




2022-04-20 17:49.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174807/model_16685.pt


Epoch 48/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:49.42 [info     ] FQE_20220420174807: epoch=48 step=17040 epoch=48 metrics={'time_sample_batch': 0.000173118080891354, 'time_algorithm_update': 0.005017674137169206, 'loss': 0.7030804774710829, 'time_step': 0.0052670854917714295, 'init_value': -20.229795455932617, 'ave_value': -25.805628171936636, 'soft_opc': nan} step=17040




2022-04-20 17:49.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174807/model_17040.pt


Epoch 49/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:49.45 [info     ] FQE_20220420174807: epoch=49 step=17395 epoch=49 metrics={'time_sample_batch': 0.00017110864881058813, 'time_algorithm_update': 0.005018433718614175, 'loss': 0.703352503530996, 'time_step': 0.005263711365175919, 'init_value': -20.292055130004883, 'ave_value': -25.828785724480525, 'soft_opc': nan} step=17395




2022-04-20 17:49.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174807/model_17395.pt


Epoch 50/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:49.47 [info     ] FQE_20220420174807: epoch=50 step=17750 epoch=50 metrics={'time_sample_batch': 0.00017262042408258143, 'time_algorithm_update': 0.005037694581797425, 'loss': 0.7253301378864218, 'time_step': 0.005285735869071853, 'init_value': -20.572589874267578, 'ave_value': -26.052967250528848, 'soft_opc': nan} step=17750




2022-04-20 17:49.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174807/model_17750.pt
search iteration:  28
using hyper params:  [0.00787238694248224, 0.0005071026087749433, 7.008722751394709e-05, 1]
2022-04-20 17:49.47 [debug    ] RoundIterator is selected.
2022-04-20 17:49.47 [info     ] Directory is created at d3rlpy_logs/TD3PlusBC_20220420174947
2022-04-20 17:49.47 [debug    ] Fitting scaler...              scaler=standard
2022-04-20 17:49.47 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 17:49.47 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 17:49.47 [info     ] Parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174947/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.00787238694248

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:49.50 [info     ] TD3PlusBC_20220420174947: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.0003181298573811849, 'time_algorithm_update': 0.008372508294401114, 'critic_loss': 1.4067270716989946, 'actor_loss': 0.03944174910497944, 'time_step': 0.008768347968832094, 'td_error': 0.8019753396251077, 'init_value': -0.4865192472934723, 'ave_value': 0.15088082586134038} step=342
2022-04-20 17:49.50 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174947/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:49.54 [info     ] TD3PlusBC_20220420174947: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.0003197695079602693, 'time_algorithm_update': 0.008691906929016113, 'critic_loss': 0.21537147266300102, 'actor_loss': -0.008732845756219841, 'time_step': 0.009091702818173415, 'td_error': 0.8038131701827566, 'init_value': -0.7142215371131897, 'ave_value': 0.20523506495094782} step=684
2022-04-20 17:49.54 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174947/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:49.57 [info     ] TD3PlusBC_20220420174947: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.00032075037036025735, 'time_algorithm_update': 0.00842862031613177, 'critic_loss': 0.22381668740458655, 'actor_loss': 0.008294011700397346, 'time_step': 0.008830192493416412, 'td_error': 0.8022093674488896, 'init_value': -0.9778867959976196, 'ave_value': 0.2633220462750241} step=1026
2022-04-20 17:49.57 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174947/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:50.01 [info     ] TD3PlusBC_20220420174947: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.00031665682095533226, 'time_algorithm_update': 0.008765729547244066, 'critic_loss': 0.25048368027684287, 'actor_loss': 0.016971915655317363, 'time_step': 0.009162341642100908, 'td_error': 0.8024576764784568, 'init_value': -1.258694052696228, 'ave_value': 0.321592169277714} step=1368
2022-04-20 17:50.01 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174947/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:50.05 [info     ] TD3PlusBC_20220420174947: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.00032103410241199516, 'time_algorithm_update': 0.008739703579952842, 'critic_loss': 0.28752684669449313, 'actor_loss': 0.03273096907208538, 'time_step': 0.009142334698236476, 'td_error': 0.8036827529914069, 'init_value': -1.5435377359390259, 'ave_value': 0.4176829481093834} step=1710
2022-04-20 17:50.05 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174947/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:50.08 [info     ] TD3PlusBC_20220420174947: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.0003177004250866628, 'time_algorithm_update': 0.008441554175482856, 'critic_loss': 0.3368751979841475, 'actor_loss': 0.027671444071838032, 'time_step': 0.008838816692954615, 'td_error': 0.8094234347765166, 'init_value': -1.855858564376831, 'ave_value': 0.42927388245401366} step=2052
2022-04-20 17:50.08 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174947/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:50.12 [info     ] TD3PlusBC_20220420174947: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.00032247437371148006, 'time_algorithm_update': 0.008714618041501409, 'critic_loss': 0.39034926270445186, 'actor_loss': 0.04082776187804707, 'time_step': 0.009118267667223836, 'td_error': 0.8207038240249133, 'init_value': -2.1492462158203125, 'ave_value': 0.5248695494354333} step=2394
2022-04-20 17:50.12 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174947/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:50.15 [info     ] TD3PlusBC_20220420174947: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.00032006787974932043, 'time_algorithm_update': 0.00875191521226314, 'critic_loss': 0.44731289464217877, 'actor_loss': 0.03716920601607066, 'time_step': 0.009152678718343812, 'td_error': 0.8317467695927931, 'init_value': -2.4792118072509766, 'ave_value': 0.5382313065663366} step=2736
2022-04-20 17:50.15 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174947/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:50.19 [info     ] TD3PlusBC_20220420174947: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.0003215736813015408, 'time_algorithm_update': 0.008850130421376367, 'critic_loss': 0.5083180979024946, 'actor_loss': 0.04610051868254678, 'time_step': 0.009252491053084882, 'td_error': 0.8462091264208819, 'init_value': -2.738724946975708, 'ave_value': 0.6379933240545729} step=3078
2022-04-20 17:50.19 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174947/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:50.23 [info     ] TD3PlusBC_20220420174947: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.00032023658529359696, 'time_algorithm_update': 0.008726662362528127, 'critic_loss': 0.5655455917163551, 'actor_loss': 0.05933629276982525, 'time_step': 0.009125593810053597, 'td_error': 0.8628531925882366, 'init_value': -3.073730945587158, 'ave_value': 0.6714280751528772} step=3420
2022-04-20 17:50.23 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174947/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:50.26 [info     ] TD3PlusBC_20220420174947: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.0003173107292219909, 'time_algorithm_update': 0.008362036699440047, 'critic_loss': 0.6238124252716352, 'actor_loss': 0.04622682739506688, 'time_step': 0.008757481798093918, 'td_error': 0.8832871763339567, 'init_value': -3.307081699371338, 'ave_value': 0.7630394526210462} step=3762
2022-04-20 17:50.26 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174947/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:50.30 [info     ] TD3PlusBC_20220420174947: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.0003187328751324213, 'time_algorithm_update': 0.008750942018296983, 'critic_loss': 0.6871438507883869, 'actor_loss': 0.053595656997453396, 'time_step': 0.0091506918968513, 'td_error': 0.9067285937411247, 'init_value': -3.6145567893981934, 'ave_value': 0.829564462326056} step=4104
2022-04-20 17:50.30 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174947/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:50.34 [info     ] TD3PlusBC_20220420174947: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.00031917694716425665, 'time_algorithm_update': 0.008690769909418116, 'critic_loss': 0.7453910685334987, 'actor_loss': 0.06176152834069659, 'time_step': 0.009088586645516736, 'td_error': 0.9337086152442672, 'init_value': -3.9498214721679688, 'ave_value': 0.8676445853773102} step=4446
2022-04-20 17:50.34 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174947/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:50.37 [info     ] TD3PlusBC_20220420174947: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.0003179757915742216, 'time_algorithm_update': 0.008412850530524003, 'critic_loss': 0.8025397852455315, 'actor_loss': 0.06058912743863307, 'time_step': 0.008808364645082351, 'td_error': 0.9627309963695613, 'init_value': -4.279751777648926, 'ave_value': 0.9082963908973007} step=4788
2022-04-20 17:50.37 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174947/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:50.41 [info     ] TD3PlusBC_20220420174947: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.0003214802658348753, 'time_algorithm_update': 0.00886518913402892, 'critic_loss': 0.8654361450663436, 'actor_loss': 0.056875400354116284, 'time_step': 0.009262747931898687, 'td_error': 0.9999455072128892, 'init_value': -4.574760913848877, 'ave_value': 0.942227876719532} step=5130
2022-04-20 17:50.41 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174947/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:50.44 [info     ] TD3PlusBC_20220420174947: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.0003171211097672669, 'time_algorithm_update': 0.008411873850906105, 'critic_loss': 0.9107742580361882, 'actor_loss': 0.05633185614357915, 'time_step': 0.00880718998044555, 'td_error': 1.0328828274385569, 'init_value': -4.801527976989746, 'ave_value': 1.0426986690199103} step=5472
2022-04-20 17:50.44 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174947/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:50.48 [info     ] TD3PlusBC_20220420174947: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.0003167418708578188, 'time_algorithm_update': 0.008828155478538826, 'critic_loss': 0.9834338185426436, 'actor_loss': 0.07158138809932603, 'time_step': 0.00922305402699967, 'td_error': 1.070167085619735, 'init_value': -5.177874565124512, 'ave_value': 1.0353976046999807} step=5814
2022-04-20 17:50.48 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174947/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:50.52 [info     ] TD3PlusBC_20220420174947: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.00031991520820305364, 'time_algorithm_update': 0.008685128730640077, 'critic_loss': 1.0400699669201123, 'actor_loss': 0.06792894855403063, 'time_step': 0.009083626563088936, 'td_error': 1.1091557338539104, 'init_value': -5.510349750518799, 'ave_value': 1.0416269282780604} step=6156
2022-04-20 17:50.52 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174947/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:50.55 [info     ] TD3PlusBC_20220420174947: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.00031634799221105743, 'time_algorithm_update': 0.00839911078849034, 'critic_loss': 1.0958594286825225, 'actor_loss': 0.08209106665954255, 'time_step': 0.00879301453194423, 'td_error': 1.1485946152130082, 'init_value': -5.7243547439575195, 'ave_value': 1.1719408998927856} step=6498
2022-04-20 17:50.55 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174947/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:50.59 [info     ] TD3PlusBC_20220420174947: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.0003253256368358233, 'time_algorithm_update': 0.008742472581696092, 'critic_loss': 1.1543667834927465, 'actor_loss': 0.09332644878423701, 'time_step': 0.009147077275995622, 'td_error': 1.1957426537871354, 'init_value': -6.022034645080566, 'ave_value': 1.2180134370693567} step=6840
2022-04-20 17:50.59 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174947/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:51.02 [info     ] TD3PlusBC_20220420174947: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.0003204101707503112, 'time_algorithm_update': 0.008271651658398367, 'critic_loss': 1.2038005864062504, 'actor_loss': 0.07405325112461347, 'time_step': 0.008669015259770622, 'td_error': 1.2419082635649354, 'init_value': -6.311092376708984, 'ave_value': 1.2418591866124975} step=7182
2022-04-20 17:51.02 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174947/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:51.06 [info     ] TD3PlusBC_20220420174947: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.0003191650959483364, 'time_algorithm_update': 0.008743224088211505, 'critic_loss': 1.2646352552879623, 'actor_loss': 0.0661139165143869, 'time_step': 0.009141259026109126, 'td_error': 1.2903614659504397, 'init_value': -6.559327602386475, 'ave_value': 1.3259925453592225} step=7524
2022-04-20 17:51.06 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174947/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:51.10 [info     ] TD3PlusBC_20220420174947: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.00031938748052942823, 'time_algorithm_update': 0.008739714734038414, 'critic_loss': 1.3221562522632337, 'actor_loss': 0.07435297220945358, 'time_step': 0.009138600868091248, 'td_error': 1.333770094936176, 'init_value': -6.92529821395874, 'ave_value': 1.315882846238637} step=7866
2022-04-20 17:51.10 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174947/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:51.13 [info     ] TD3PlusBC_20220420174947: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.00031805387017322563, 'time_algorithm_update': 0.008364446679053948, 'critic_loss': 1.361328541927519, 'actor_loss': 0.0731189741987234, 'time_step': 0.008761306952314767, 'td_error': 1.391077626432174, 'init_value': -7.198092460632324, 'ave_value': 1.3567960158710344} step=8208
2022-04-20 17:51.13 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174947/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:51.17 [info     ] TD3PlusBC_20220420174947: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.00032301674112241867, 'time_algorithm_update': 0.008824977261281152, 'critic_loss': 1.4411999042968304, 'actor_loss': 0.0822735188735856, 'time_step': 0.009227979252910056, 'td_error': 1.4476918016428477, 'init_value': -7.482557773590088, 'ave_value': 1.4278275302148986} step=8550
2022-04-20 17:51.17 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174947/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:51.20 [info     ] TD3PlusBC_20220420174947: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.00032065068072045756, 'time_algorithm_update': 0.00828328746104101, 'critic_loss': 1.4894628590136243, 'actor_loss': 0.06881695552266132, 'time_step': 0.008682606513040108, 'td_error': 1.4981655266605842, 'init_value': -7.6691694259643555, 'ave_value': 1.5419241809241708} step=8892
2022-04-20 17:51.20 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174947/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:51.24 [info     ] TD3PlusBC_20220420174947: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.0003174480639005962, 'time_algorithm_update': 0.00883440037219845, 'critic_loss': 1.5576624488185722, 'actor_loss': 0.07530093550333503, 'time_step': 0.009233393864324915, 'td_error': 1.5586853022011082, 'init_value': -8.014323234558105, 'ave_value': 1.5151294805901125} step=9234
2022-04-20 17:51.24 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174947/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:51.28 [info     ] TD3PlusBC_20220420174947: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.000317531022412038, 'time_algorithm_update': 0.008774362809476797, 'critic_loss': 1.6109137156902, 'actor_loss': 0.09029703951108525, 'time_step': 0.009170355852584394, 'td_error': 1.6154097279086514, 'init_value': -8.247430801391602, 'ave_value': 1.614821582207087} step=9576
2022-04-20 17:51.28 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174947/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:51.31 [info     ] TD3PlusBC_20220420174947: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.00031744527537920324, 'time_algorithm_update': 0.008404013706229584, 'critic_loss': 1.6633410676878098, 'actor_loss': 0.07818370101134679, 'time_step': 0.008800093193500363, 'td_error': 1.6746507274284699, 'init_value': -8.502828598022461, 'ave_value': 1.648034391864958} step=9918
2022-04-20 17:51.31 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174947/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:51.35 [info     ] TD3PlusBC_20220420174947: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.0003213610565453245, 'time_algorithm_update': 0.008768013346264933, 'critic_loss': 1.7273664502372519, 'actor_loss': 0.07504591796743243, 'time_step': 0.009168497303075958, 'td_error': 1.738483559073957, 'init_value': -8.785662651062012, 'ave_value': 1.6874981986569357} step=10260
2022-04-20 17:51.35 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174947/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:51.38 [info     ] TD3PlusBC_20220420174947: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.0003215862296478093, 'time_algorithm_update': 0.008609890937805176, 'critic_loss': 1.7843490393712507, 'actor_loss': 0.07551039243389292, 'time_step': 0.009010898439507736, 'td_error': 1.8096628962689674, 'init_value': -9.142576217651367, 'ave_value': 1.704175562714688} step=10602
2022-04-20 17:51.38 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174947/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:51.42 [info     ] TD3PlusBC_20220420174947: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.00032384562910648816, 'time_algorithm_update': 0.00882863161856668, 'critic_loss': 1.8412079733976148, 'actor_loss': 0.07181817101456268, 'time_step': 0.009232265210291099, 'td_error': 1.8736276589798584, 'init_value': -9.2826566696167, 'ave_value': 1.8177498612115748} step=10944
2022-04-20 17:51.42 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174947/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:51.46 [info     ] TD3PlusBC_20220420174947: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.0003255800894129346, 'time_algorithm_update': 0.008762277357759531, 'critic_loss': 1.897602243643058, 'actor_loss': 0.08031722739861723, 'time_step': 0.00916965175093266, 'td_error': 1.9417792447768065, 'init_value': -9.667923927307129, 'ave_value': 1.762216179498916} step=11286
2022-04-20 17:51.46 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174947/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:51.49 [info     ] TD3PlusBC_20220420174947: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.0003188262905990868, 'time_algorithm_update': 0.008473456254479482, 'critic_loss': 1.9565428435279613, 'actor_loss': 0.09172266329589643, 'time_step': 0.008872479026080573, 'td_error': 2.003042990480008, 'init_value': -9.846197128295898, 'ave_value': 1.8960679316488809} step=11628
2022-04-20 17:51.49 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174947/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:51.53 [info     ] TD3PlusBC_20220420174947: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.0003258617300736277, 'time_algorithm_update': 0.008862513547752335, 'critic_loss': 2.006457506956761, 'actor_loss': 0.08248915634395783, 'time_step': 0.00926908902954637, 'td_error': 2.0689458209895286, 'init_value': -10.131202697753906, 'ave_value': 1.893369817794007} step=11970
2022-04-20 17:51.53 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174947/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:51.57 [info     ] TD3PlusBC_20220420174947: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.0003201438669572797, 'time_algorithm_update': 0.008705540010106493, 'critic_loss': 2.0628399357461094, 'actor_loss': 0.0899730085978034, 'time_step': 0.009104729395860817, 'td_error': 2.141977193519848, 'init_value': -10.35468578338623, 'ave_value': 1.9890996088393804} step=12312
2022-04-20 17:51.57 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174947/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:52.00 [info     ] TD3PlusBC_20220420174947: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.0003142803035981474, 'time_algorithm_update': 0.008727309299491302, 'critic_loss': 2.143542874626249, 'actor_loss': 0.09392192887893894, 'time_step': 0.009120168044553166, 'td_error': 2.2275037780554197, 'init_value': -10.734375, 'ave_value': 1.965527762127495} step=12654
2022-04-20 17:52.00 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174947/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:52.04 [info     ] TD3PlusBC_20220420174947: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.000318556501154314, 'time_algorithm_update': 0.008672198356940733, 'critic_loss': 2.1856771038289655, 'actor_loss': 0.08116012539344224, 'time_step': 0.00907090741988511, 'td_error': 2.290986385904877, 'init_value': -10.942537307739258, 'ave_value': 2.025108539180595} step=12996
2022-04-20 17:52.04 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174947/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:52.07 [info     ] TD3PlusBC_20220420174947: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.0003199953781931024, 'time_algorithm_update': 0.008334491685119986, 'critic_loss': 2.247337243013215, 'actor_loss': 0.09132479355610602, 'time_step': 0.008734841792904145, 'td_error': 2.367728925940063, 'init_value': -11.170408248901367, 'ave_value': 2.1087375454753974} step=13338
2022-04-20 17:52.07 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174947/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:52.11 [info     ] TD3PlusBC_20220420174947: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.0003179506948816846, 'time_algorithm_update': 0.008837950857062089, 'critic_loss': 2.324269189733511, 'actor_loss': 0.07941438564867304, 'time_step': 0.009234914305614449, 'td_error': 2.4333911426499113, 'init_value': -11.443877220153809, 'ave_value': 2.1133327191146005} step=13680
2022-04-20 17:52.11 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174947/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:52.15 [info     ] TD3PlusBC_20220420174947: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.00032562122010348134, 'time_algorithm_update': 0.00877285561366388, 'critic_loss': 2.373440993721025, 'actor_loss': 0.10560715059090776, 'time_step': 0.009176542884425112, 'td_error': 2.5131421624337453, 'init_value': -11.717119216918945, 'ave_value': 2.1427446509537766} step=14022
2022-04-20 17:52.15 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174947/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:52.18 [info     ] TD3PlusBC_20220420174947: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.0003191274509095309, 'time_algorithm_update': 0.008596793252822251, 'critic_loss': 2.429921068904693, 'actor_loss': 0.09461296650401332, 'time_step': 0.008994905572188528, 'td_error': 2.597122958283897, 'init_value': -11.979047775268555, 'ave_value': 2.1799253945794326} step=14364
2022-04-20 17:52.18 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174947/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:52.22 [info     ] TD3PlusBC_20220420174947: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.00032495058070846467, 'time_algorithm_update': 0.008852820647390265, 'critic_loss': 2.4978207235622127, 'actor_loss': 0.06407548121192999, 'time_step': 0.009258816813864904, 'td_error': 2.6554610754490646, 'init_value': -12.183080673217773, 'ave_value': 2.259037614936546} step=14706
2022-04-20 17:52.22 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174947/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:52.26 [info     ] TD3PlusBC_20220420174947: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.00032186856743885063, 'time_algorithm_update': 0.008385233711778071, 'critic_loss': 2.5554759694993145, 'actor_loss': 0.09317190853650109, 'time_step': 0.008785097919709502, 'td_error': 2.747340101555465, 'init_value': -12.4887056350708, 'ave_value': 2.2578162940763575} step=15048
2022-04-20 17:52.26 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174947/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:52.29 [info     ] TD3PlusBC_20220420174947: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.00031985525499310407, 'time_algorithm_update': 0.008816350970351905, 'critic_loss': 2.6173715297764506, 'actor_loss': 0.07680239844304776, 'time_step': 0.009215919595015677, 'td_error': 2.8215997043941186, 'init_value': -12.766388893127441, 'ave_value': 2.2681292830676947} step=15390
2022-04-20 17:52.29 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174947/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:52.33 [info     ] TD3PlusBC_20220420174947: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.00032359954209355585, 'time_algorithm_update': 0.008855466257061875, 'critic_loss': 2.703262868680452, 'actor_loss': 0.05741150291603908, 'time_step': 0.009260847554569357, 'td_error': 2.903390948696581, 'init_value': -13.015968322753906, 'ave_value': 2.3256514878169137} step=15732
2022-04-20 17:52.33 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174947/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:52.37 [info     ] TD3PlusBC_20220420174947: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.00031976671943887634, 'time_algorithm_update': 0.008371901790998136, 'critic_loss': 2.757132811591639, 'actor_loss': 0.081762230884262, 'time_step': 0.008769358110706709, 'td_error': 2.9958361248881453, 'init_value': -13.292553901672363, 'ave_value': 2.3422158068354317} step=16074
2022-04-20 17:52.37 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174947/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:52.40 [info     ] TD3PlusBC_20220420174947: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.000322259657564219, 'time_algorithm_update': 0.00872477244215402, 'critic_loss': 2.8317970604750147, 'actor_loss': 0.09601794919598172, 'time_step': 0.009123929062781975, 'td_error': 3.0569542735824493, 'init_value': -13.464132308959961, 'ave_value': 2.426815674064664} step=16416
2022-04-20 17:52.40 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174947/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:52.44 [info     ] TD3PlusBC_20220420174947: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.0003226430792557566, 'time_algorithm_update': 0.008420341196115951, 'critic_loss': 2.9163894524351197, 'actor_loss': 0.09341809447658689, 'time_step': 0.008824011038618478, 'td_error': 3.1386985841835133, 'init_value': -13.654603958129883, 'ave_value': 2.4823681273462403} step=16758
2022-04-20 17:52.44 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174947/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:52.47 [info     ] TD3PlusBC_20220420174947: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.00032015641530354816, 'time_algorithm_update': 0.008765341245640091, 'critic_loss': 2.9876872235745715, 'actor_loss': 0.09403440866031144, 'time_step': 0.009164127690053125, 'td_error': 3.2318720673819046, 'init_value': -14.025294303894043, 'ave_value': 2.525611336859329} step=17100
2022-04-20 17:52.47 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420174947/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 102 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.28189108e-0

Epoch 1/50:   0%|          | 0/177 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 17:52.49 [info     ] FQE_20220420175248: epoch=1 step=177 epoch=1 metrics={'time_sample_batch': 0.00016369927400923046, 'time_algorithm_update': 0.005085441352283887, 'loss': 0.00764199642801268, 'time_step': 0.005322459053858525, 'init_value': -0.2142937034368515, 'ave_value': -0.15326121655871738, 'soft_opc': nan} step=177




2022-04-20 17:52.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175248/model_177.pt


Epoch 2/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:52.50 [info     ] FQE_20220420175248: epoch=2 step=354 epoch=2 metrics={'time_sample_batch': 0.00016324264181535796, 'time_algorithm_update': 0.005110634248808953, 'loss': 0.0057938675038348144, 'time_step': 0.005346383078623626, 'init_value': -0.40006670355796814, 'ave_value': -0.2759332201468783, 'soft_opc': nan} step=354




2022-04-20 17:52.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175248/model_354.pt


Epoch 3/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:52.51 [info     ] FQE_20220420175248: epoch=3 step=531 epoch=3 metrics={'time_sample_batch': 0.0001652995071842172, 'time_algorithm_update': 0.005000503723230739, 'loss': 0.004867515005802704, 'time_step': 0.005234784325637386, 'init_value': -0.47479453682899475, 'ave_value': -0.3064349846953565, 'soft_opc': nan} step=531




2022-04-20 17:52.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175248/model_531.pt


Epoch 4/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:52.51 [info     ] FQE_20220420175248: epoch=4 step=708 epoch=4 metrics={'time_sample_batch': 0.00015891743245097876, 'time_algorithm_update': 0.004412465176339877, 'loss': 0.004607015415647273, 'time_step': 0.004644264609126722, 'init_value': -0.5516754388809204, 'ave_value': -0.34367263271828535, 'soft_opc': nan} step=708




2022-04-20 17:52.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175248/model_708.pt


Epoch 5/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:52.52 [info     ] FQE_20220420175248: epoch=5 step=885 epoch=5 metrics={'time_sample_batch': 0.0001671718338788566, 'time_algorithm_update': 0.005066644000468281, 'loss': 0.004277633910204184, 'time_step': 0.005304670603261829, 'init_value': -0.6034484505653381, 'ave_value': -0.3572225374032606, 'soft_opc': nan} step=885




2022-04-20 17:52.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175248/model_885.pt


Epoch 6/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:52.53 [info     ] FQE_20220420175248: epoch=6 step=1062 epoch=6 metrics={'time_sample_batch': 0.0001676446300441936, 'time_algorithm_update': 0.005067914219225867, 'loss': 0.004196701597653877, 'time_step': 0.0053095480816512455, 'init_value': -0.7012274265289307, 'ave_value': -0.4189946134812436, 'soft_opc': nan} step=1062




2022-04-20 17:52.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175248/model_1062.pt


Epoch 7/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:52.54 [info     ] FQE_20220420175248: epoch=7 step=1239 epoch=7 metrics={'time_sample_batch': 0.0001623199484442587, 'time_algorithm_update': 0.00505760295242913, 'loss': 0.003970026393094768, 'time_step': 0.005291530641458802, 'init_value': -0.7923588752746582, 'ave_value': -0.48568510570068346, 'soft_opc': nan} step=1239




2022-04-20 17:52.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175248/model_1239.pt


Epoch 8/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:52.55 [info     ] FQE_20220420175248: epoch=8 step=1416 epoch=8 metrics={'time_sample_batch': 0.00016466103031136895, 'time_algorithm_update': 0.005028567071688377, 'loss': 0.003979940548812771, 'time_step': 0.005266683923322602, 'init_value': -0.8607826828956604, 'ave_value': -0.5284215256940943, 'soft_opc': nan} step=1416




2022-04-20 17:52.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175248/model_1416.pt


Epoch 9/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:52.56 [info     ] FQE_20220420175248: epoch=9 step=1593 epoch=9 metrics={'time_sample_batch': 0.00016305002115540586, 'time_algorithm_update': 0.005033408181141999, 'loss': 0.00401677551881889, 'time_step': 0.005269050598144531, 'init_value': -0.9299576878547668, 'ave_value': -0.5922788808080326, 'soft_opc': nan} step=1593




2022-04-20 17:52.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175248/model_1593.pt


Epoch 10/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:52.58 [info     ] FQE_20220420175248: epoch=10 step=1770 epoch=10 metrics={'time_sample_batch': 0.00016629089743404066, 'time_algorithm_update': 0.005097533349936964, 'loss': 0.004220040473629231, 'time_step': 0.005339468939829681, 'init_value': -0.999521017074585, 'ave_value': -0.6455741437196776, 'soft_opc': nan} step=1770




2022-04-20 17:52.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175248/model_1770.pt


Epoch 11/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:52.59 [info     ] FQE_20220420175248: epoch=11 step=1947 epoch=11 metrics={'time_sample_batch': 0.0001655985406563107, 'time_algorithm_update': 0.0049534463613046765, 'loss': 0.004445806634349296, 'time_step': 0.005191771997570318, 'init_value': -1.1508764028549194, 'ave_value': -0.7603059577266196, 'soft_opc': nan} step=1947




2022-04-20 17:52.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175248/model_1947.pt


Epoch 12/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:53.00 [info     ] FQE_20220420175248: epoch=12 step=2124 epoch=12 metrics={'time_sample_batch': 0.00016373968393789174, 'time_algorithm_update': 0.005084747648508536, 'loss': 0.00469750382392904, 'time_step': 0.005324316563579322, 'init_value': -1.235984444618225, 'ave_value': -0.8088816494420842, 'soft_opc': nan} step=2124




2022-04-20 17:53.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175248/model_2124.pt


Epoch 13/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:53.00 [info     ] FQE_20220420175248: epoch=13 step=2301 epoch=13 metrics={'time_sample_batch': 0.00015998560156525865, 'time_algorithm_update': 0.004129894709182998, 'loss': 0.00503555837542471, 'time_step': 0.004361735898896125, 'init_value': -1.291176438331604, 'ave_value': -0.8449479262593451, 'soft_opc': nan} step=2301




2022-04-20 17:53.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175248/model_2301.pt


Epoch 14/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:53.01 [info     ] FQE_20220420175248: epoch=14 step=2478 epoch=14 metrics={'time_sample_batch': 0.00016694419128073139, 'time_algorithm_update': 0.0050786457492806815, 'loss': 0.005529296869875095, 'time_step': 0.0053180112677105405, 'init_value': -1.3687987327575684, 'ave_value': -0.8920319501142782, 'soft_opc': nan} step=2478




2022-04-20 17:53.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175248/model_2478.pt


Epoch 15/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:53.02 [info     ] FQE_20220420175248: epoch=15 step=2655 epoch=15 metrics={'time_sample_batch': 0.0001602145911610059, 'time_algorithm_update': 0.005115971053387486, 'loss': 0.005947059441336804, 'time_step': 0.005350244920806023, 'init_value': -1.5152004957199097, 'ave_value': -1.0152602416102414, 'soft_opc': nan} step=2655




2022-04-20 17:53.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175248/model_2655.pt


Epoch 16/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:53.03 [info     ] FQE_20220420175248: epoch=16 step=2832 epoch=16 metrics={'time_sample_batch': 0.00016278870361672955, 'time_algorithm_update': 0.005083608088520287, 'loss': 0.006463961664581998, 'time_step': 0.005317853668988761, 'init_value': -1.641442894935608, 'ave_value': -1.1141265660501993, 'soft_opc': nan} step=2832




2022-04-20 17:53.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175248/model_2832.pt


Epoch 17/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:53.04 [info     ] FQE_20220420175248: epoch=17 step=3009 epoch=17 metrics={'time_sample_batch': 0.0001645236365539206, 'time_algorithm_update': 0.005027436940683483, 'loss': 0.0070030601658347975, 'time_step': 0.005263491538958361, 'init_value': -1.7700623273849487, 'ave_value': -1.2263008103110231, 'soft_opc': nan} step=3009




2022-04-20 17:53.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175248/model_3009.pt


Epoch 18/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:53.05 [info     ] FQE_20220420175248: epoch=18 step=3186 epoch=18 metrics={'time_sample_batch': 0.00016313084101272842, 'time_algorithm_update': 0.005054708254539361, 'loss': 0.007616306930033248, 'time_step': 0.0052926001575707045, 'init_value': -1.8295745849609375, 'ave_value': -1.2557649669953406, 'soft_opc': nan} step=3186




2022-04-20 17:53.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175248/model_3186.pt


Epoch 19/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:53.06 [info     ] FQE_20220420175248: epoch=19 step=3363 epoch=19 metrics={'time_sample_batch': 0.0001660875007931122, 'time_algorithm_update': 0.005109951321014577, 'loss': 0.008623595553381885, 'time_step': 0.005351765681121309, 'init_value': -1.8981736898422241, 'ave_value': -1.3035870873489537, 'soft_opc': nan} step=3363




2022-04-20 17:53.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175248/model_3363.pt


Epoch 20/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:53.07 [info     ] FQE_20220420175248: epoch=20 step=3540 epoch=20 metrics={'time_sample_batch': 0.00016574132240424722, 'time_algorithm_update': 0.0050546072297177076, 'loss': 0.009348677120718706, 'time_step': 0.005291736732094975, 'init_value': -2.0510478019714355, 'ave_value': -1.4383919788586037, 'soft_opc': nan} step=3540




2022-04-20 17:53.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175248/model_3540.pt


Epoch 21/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:53.08 [info     ] FQE_20220420175248: epoch=21 step=3717 epoch=21 metrics={'time_sample_batch': 0.00016560392864679887, 'time_algorithm_update': 0.004994044869633044, 'loss': 0.009870043984025, 'time_step': 0.005231864034792798, 'init_value': -2.2172365188598633, 'ave_value': -1.579639254098719, 'soft_opc': nan} step=3717




2022-04-20 17:53.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175248/model_3717.pt


Epoch 22/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:53.09 [info     ] FQE_20220420175248: epoch=22 step=3894 epoch=22 metrics={'time_sample_batch': 0.0001626580448473914, 'time_algorithm_update': 0.0030072920739987476, 'loss': 0.01092953746941067, 'time_step': 0.0032395373629984883, 'init_value': -2.2406013011932373, 'ave_value': -1.5843658714029412, 'soft_opc': nan} step=3894




2022-04-20 17:53.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175248/model_3894.pt


Epoch 23/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:53.10 [info     ] FQE_20220420175248: epoch=23 step=4071 epoch=23 metrics={'time_sample_batch': 0.0001599276806675108, 'time_algorithm_update': 0.0034791116660597633, 'loss': 0.011594799747769485, 'time_step': 0.003709492710350597, 'init_value': -2.3742294311523438, 'ave_value': -1.7001455917372719, 'soft_opc': nan} step=4071




2022-04-20 17:53.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175248/model_4071.pt


Epoch 24/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:53.10 [info     ] FQE_20220420175248: epoch=24 step=4248 epoch=24 metrics={'time_sample_batch': 0.00015829511954959502, 'time_algorithm_update': 0.0033041407159492796, 'loss': 0.012359137315366227, 'time_step': 0.0035349999443959382, 'init_value': -2.4509708881378174, 'ave_value': -1.758942173976261, 'soft_opc': nan} step=4248




2022-04-20 17:53.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175248/model_4248.pt


Epoch 25/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:53.11 [info     ] FQE_20220420175248: epoch=25 step=4425 epoch=25 metrics={'time_sample_batch': 0.00015816580777787892, 'time_algorithm_update': 0.0034214480448577366, 'loss': 0.012929027260144341, 'time_step': 0.0036496994859081203, 'init_value': -2.494184970855713, 'ave_value': -1.7776318236245765, 'soft_opc': nan} step=4425




2022-04-20 17:53.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175248/model_4425.pt


Epoch 26/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:53.12 [info     ] FQE_20220420175248: epoch=26 step=4602 epoch=26 metrics={'time_sample_batch': 0.000160721062266894, 'time_algorithm_update': 0.0035503341653252726, 'loss': 0.013493201663154625, 'time_step': 0.003779885459080928, 'init_value': -2.523611307144165, 'ave_value': -1.7823807265399745, 'soft_opc': nan} step=4602




2022-04-20 17:53.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175248/model_4602.pt


Epoch 27/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:53.13 [info     ] FQE_20220420175248: epoch=27 step=4779 epoch=27 metrics={'time_sample_batch': 0.00015789910224871446, 'time_algorithm_update': 0.0034462314541057006, 'loss': 0.013999982099022484, 'time_step': 0.003673716453509142, 'init_value': -2.656937599182129, 'ave_value': -1.8944102865808807, 'soft_opc': nan} step=4779




2022-04-20 17:53.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175248/model_4779.pt


Epoch 28/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:53.13 [info     ] FQE_20220420175248: epoch=28 step=4956 epoch=28 metrics={'time_sample_batch': 0.0001569454279323082, 'time_algorithm_update': 0.0034716088893049853, 'loss': 0.014730232989410265, 'time_step': 0.003699625952769134, 'init_value': -2.7780048847198486, 'ave_value': -1.9697849529484908, 'soft_opc': nan} step=4956




2022-04-20 17:53.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175248/model_4956.pt


Epoch 29/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:53.14 [info     ] FQE_20220420175248: epoch=29 step=5133 epoch=29 metrics={'time_sample_batch': 0.00015882314261743579, 'time_algorithm_update': 0.003400600562661381, 'loss': 0.0154127816269753, 'time_step': 0.0036290661763336697, 'init_value': -2.904963493347168, 'ave_value': -2.0832223360189, 'soft_opc': nan} step=5133




2022-04-20 17:53.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175248/model_5133.pt


Epoch 30/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:53.15 [info     ] FQE_20220420175248: epoch=30 step=5310 epoch=30 metrics={'time_sample_batch': 0.0001553276837882349, 'time_algorithm_update': 0.003420239787990764, 'loss': 0.01640095129098722, 'time_step': 0.003647638579546395, 'init_value': -2.981768846511841, 'ave_value': -2.111931317460698, 'soft_opc': nan} step=5310




2022-04-20 17:53.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175248/model_5310.pt


Epoch 31/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:53.15 [info     ] FQE_20220420175248: epoch=31 step=5487 epoch=31 metrics={'time_sample_batch': 0.00016203303795076358, 'time_algorithm_update': 0.003471498435499978, 'loss': 0.017584428822590134, 'time_step': 0.003704665070873196, 'init_value': -3.046750783920288, 'ave_value': -2.136441407771082, 'soft_opc': nan} step=5487




2022-04-20 17:53.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175248/model_5487.pt


Epoch 32/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:53.16 [info     ] FQE_20220420175248: epoch=32 step=5664 epoch=32 metrics={'time_sample_batch': 0.00015765933667199087, 'time_algorithm_update': 0.0034253960948879437, 'loss': 0.019253707428378616, 'time_step': 0.003652370582192631, 'init_value': -3.1702518463134766, 'ave_value': -2.194183149763772, 'soft_opc': nan} step=5664




2022-04-20 17:53.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175248/model_5664.pt


Epoch 33/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:53.17 [info     ] FQE_20220420175248: epoch=33 step=5841 epoch=33 metrics={'time_sample_batch': 0.0001622728035274872, 'time_algorithm_update': 0.0034950116259903556, 'loss': 0.02028110030956966, 'time_step': 0.0037274225956976078, 'init_value': -3.293259382247925, 'ave_value': -2.2934441866671986, 'soft_opc': nan} step=5841




2022-04-20 17:53.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175248/model_5841.pt


Epoch 34/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:53.18 [info     ] FQE_20220420175248: epoch=34 step=6018 epoch=34 metrics={'time_sample_batch': 0.00015602004056596485, 'time_algorithm_update': 0.0035157176734363967, 'loss': 0.020514547768953957, 'time_step': 0.003741839511246331, 'init_value': -3.468414068222046, 'ave_value': -2.425090475988717, 'soft_opc': nan} step=6018




2022-04-20 17:53.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175248/model_6018.pt


Epoch 35/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:53.18 [info     ] FQE_20220420175248: epoch=35 step=6195 epoch=35 metrics={'time_sample_batch': 0.00016002735849154198, 'time_algorithm_update': 0.0034540898382326977, 'loss': 0.02205588881881371, 'time_step': 0.0036870220960196804, 'init_value': -3.4645299911499023, 'ave_value': -2.400069543038839, 'soft_opc': nan} step=6195




2022-04-20 17:53.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175248/model_6195.pt


Epoch 36/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:53.19 [info     ] FQE_20220420175248: epoch=36 step=6372 epoch=36 metrics={'time_sample_batch': 0.00015920030195160775, 'time_algorithm_update': 0.003541897919218419, 'loss': 0.022549735699217283, 'time_step': 0.0037727625356555657, 'init_value': -3.5799672603607178, 'ave_value': -2.4378390875245044, 'soft_opc': nan} step=6372




2022-04-20 17:53.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175248/model_6372.pt


Epoch 37/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:53.20 [info     ] FQE_20220420175248: epoch=37 step=6549 epoch=37 metrics={'time_sample_batch': 0.00016048937867590263, 'time_algorithm_update': 0.0035278473870228914, 'loss': 0.024056540896831933, 'time_step': 0.0037562214048568812, 'init_value': -3.678617238998413, 'ave_value': -2.516059957724344, 'soft_opc': nan} step=6549




2022-04-20 17:53.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175248/model_6549.pt


Epoch 38/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:53.21 [info     ] FQE_20220420175248: epoch=38 step=6726 epoch=38 metrics={'time_sample_batch': 0.00015817388976361118, 'time_algorithm_update': 0.0034748362956074, 'loss': 0.025289964265621526, 'time_step': 0.003701585834309206, 'init_value': -3.7236530780792236, 'ave_value': -2.535159310515321, 'soft_opc': nan} step=6726




2022-04-20 17:53.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175248/model_6726.pt


Epoch 39/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:53.21 [info     ] FQE_20220420175248: epoch=39 step=6903 epoch=39 metrics={'time_sample_batch': 0.0001624330962445103, 'time_algorithm_update': 0.0034498602656994836, 'loss': 0.02536895961926903, 'time_step': 0.003681724354372186, 'init_value': -3.7759437561035156, 'ave_value': -2.549805533560692, 'soft_opc': nan} step=6903




2022-04-20 17:53.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175248/model_6903.pt


Epoch 40/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:53.22 [info     ] FQE_20220420175248: epoch=40 step=7080 epoch=40 metrics={'time_sample_batch': 0.00016205863090558242, 'time_algorithm_update': 0.0034678534599347304, 'loss': 0.026420028725018103, 'time_step': 0.00370718665042166, 'init_value': -3.8487093448638916, 'ave_value': -2.59099706768928, 'soft_opc': nan} step=7080




2022-04-20 17:53.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175248/model_7080.pt


Epoch 41/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:53.23 [info     ] FQE_20220420175248: epoch=41 step=7257 epoch=41 metrics={'time_sample_batch': 0.00016254220305189575, 'time_algorithm_update': 0.0035133617745954438, 'loss': 0.027498283639871172, 'time_step': 0.003750095259671831, 'init_value': -4.003634452819824, 'ave_value': -2.6743472810328432, 'soft_opc': nan} step=7257




2022-04-20 17:53.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175248/model_7257.pt


Epoch 42/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:53.23 [info     ] FQE_20220420175248: epoch=42 step=7434 epoch=42 metrics={'time_sample_batch': 0.00016317663893187788, 'time_algorithm_update': 0.003507502334939558, 'loss': 0.028711529405219323, 'time_step': 0.003746536491954394, 'init_value': -4.080333232879639, 'ave_value': -2.6921755445156212, 'soft_opc': nan} step=7434




2022-04-20 17:53.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175248/model_7434.pt


Epoch 43/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:53.24 [info     ] FQE_20220420175248: epoch=43 step=7611 epoch=43 metrics={'time_sample_batch': 0.00016454518851587329, 'time_algorithm_update': 0.0035463281943973173, 'loss': 0.027672207964279053, 'time_step': 0.003785276143564343, 'init_value': -4.18510627746582, 'ave_value': -2.7695843668537097, 'soft_opc': nan} step=7611




2022-04-20 17:53.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175248/model_7611.pt


Epoch 44/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:53.25 [info     ] FQE_20220420175248: epoch=44 step=7788 epoch=44 metrics={'time_sample_batch': 0.00016116018349167992, 'time_algorithm_update': 0.003415478151396843, 'loss': 0.030799567901350473, 'time_step': 0.0036518061901889953, 'init_value': -4.173020839691162, 'ave_value': -2.7373828306732637, 'soft_opc': nan} step=7788




2022-04-20 17:53.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175248/model_7788.pt


Epoch 45/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:53.26 [info     ] FQE_20220420175248: epoch=45 step=7965 epoch=45 metrics={'time_sample_batch': 0.00016383532076905675, 'time_algorithm_update': 0.003556838816842117, 'loss': 0.03204283181980442, 'time_step': 0.0037932207355391508, 'init_value': -4.447706699371338, 'ave_value': -2.8918591565343443, 'soft_opc': nan} step=7965




2022-04-20 17:53.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175248/model_7965.pt


Epoch 46/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:53.26 [info     ] FQE_20220420175248: epoch=46 step=8142 epoch=46 metrics={'time_sample_batch': 0.0001614726869399938, 'time_algorithm_update': 0.0034727551842813438, 'loss': 0.033352331366221614, 'time_step': 0.0037045303711109915, 'init_value': -4.53856086730957, 'ave_value': -2.9598464933956774, 'soft_opc': nan} step=8142




2022-04-20 17:53.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175248/model_8142.pt


Epoch 47/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:53.27 [info     ] FQE_20220420175248: epoch=47 step=8319 epoch=47 metrics={'time_sample_batch': 0.00016114806051308153, 'time_algorithm_update': 0.0035451428364899197, 'loss': 0.034899018729884324, 'time_step': 0.0037809711391642944, 'init_value': -4.631394863128662, 'ave_value': -3.0039373228969697, 'soft_opc': nan} step=8319




2022-04-20 17:53.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175248/model_8319.pt


Epoch 48/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:53.28 [info     ] FQE_20220420175248: epoch=48 step=8496 epoch=48 metrics={'time_sample_batch': 0.00016447918563239317, 'time_algorithm_update': 0.0035830635135456665, 'loss': 0.03639491593023225, 'time_step': 0.0038263716940152444, 'init_value': -4.710620403289795, 'ave_value': -3.030548287910205, 'soft_opc': nan} step=8496




2022-04-20 17:53.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175248/model_8496.pt


Epoch 49/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:53.28 [info     ] FQE_20220420175248: epoch=49 step=8673 epoch=49 metrics={'time_sample_batch': 0.00016461119139935337, 'time_algorithm_update': 0.003526769788925257, 'loss': 0.03737405473813625, 'time_step': 0.003760774256819386, 'init_value': -4.7663044929504395, 'ave_value': -3.072013529953365, 'soft_opc': nan} step=8673




2022-04-20 17:53.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175248/model_8673.pt


Epoch 50/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:53.29 [info     ] FQE_20220420175248: epoch=50 step=8850 epoch=50 metrics={'time_sample_batch': 0.00016300287623863437, 'time_algorithm_update': 0.003561887363929533, 'loss': 0.03861292573560219, 'time_step': 0.0037933082903845836, 'init_value': -4.85946798324585, 'ave_value': -3.0870446973919377, 'soft_opc': nan} step=8850




2022-04-20 17:53.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175248/model_8850.pt
start
[ 0.00000000e+00  7.95731469e+08  1.43210892e-01 -3.25999953e-02
 -1.38000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.28049971e-01  6.00000000e-01  4.67532035e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.87189108e-01  2.46000047e-02
 -8.00013420e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.61927906e-02  2.08952959e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.53789108e-01  1.32000047e-02
  8.79998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -9.71586383e-02 -6.00000000e-01 -5.33093061e-02]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.39489108e-01 -4.75999953e-02
 -9.30001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  1.41298638e-01  5.43892365e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.9573146

2022-04-20 17:53.30 [info     ] Directory is created at d3rlpy_logs/FQE_20220420175330
2022-04-20 17:53.30 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 17:53.30 [debug    ] Building models...
2022-04-20 17:53.30 [debug    ] Models have been built.
2022-04-20 17:53.30 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420175330/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 17:53.31 [info     ] FQE_20220420175330: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00015878330829531648, 'time_algorithm_update': 0.0034574900948724082, 'loss': 0.03292717683388917, 'time_step': 0.0036870955034743907, 'init_value': -1.1524620056152344, 'ave_value': -1.1667301538425523, 'soft_opc': nan} step=344




2022-04-20 17:53.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175330/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:53.32 [info     ] FQE_20220420175330: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.0001616228458493255, 'time_algorithm_update': 0.003494414479233498, 'loss': 0.026432593030920035, 'time_step': 0.0037273563617883725, 'init_value': -1.900146722793579, 'ave_value': -1.919496601135344, 'soft_opc': nan} step=688




2022-04-20 17:53.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175330/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:53.34 [info     ] FQE_20220420175330: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00016201928604480832, 'time_algorithm_update': 0.003494581510854322, 'loss': 0.029329382107853023, 'time_step': 0.003732348597326944, 'init_value': -2.7957706451416016, 'ave_value': -2.85117957932992, 'soft_opc': nan} step=1032




2022-04-20 17:53.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175330/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:53.35 [info     ] FQE_20220420175330: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00016246770703515342, 'time_algorithm_update': 0.0035311149996380474, 'loss': 0.03234909093927939, 'time_step': 0.003767048203667929, 'init_value': -3.3281455039978027, 'ave_value': -3.4207622580163113, 'soft_opc': nan} step=1376




2022-04-20 17:53.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175330/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:53.37 [info     ] FQE_20220420175330: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00016298197036565735, 'time_algorithm_update': 0.0035195121931475264, 'loss': 0.04158936947215955, 'time_step': 0.0037607827851938646, 'init_value': -4.074094772338867, 'ave_value': -4.190667187147312, 'soft_opc': nan} step=1720




2022-04-20 17:53.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175330/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:53.38 [info     ] FQE_20220420175330: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00016425168791482615, 'time_algorithm_update': 0.003444216279096382, 'loss': 0.049607469288762224, 'time_step': 0.0036819889101871225, 'init_value': -4.423681735992432, 'ave_value': -4.560390127295847, 'soft_opc': nan} step=2064




2022-04-20 17:53.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175330/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:53.39 [info     ] FQE_20220420175330: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.0001634407875149749, 'time_algorithm_update': 0.0034903107687484385, 'loss': 0.06045314098361793, 'time_step': 0.0037293198496796366, 'init_value': -5.168559551239014, 'ave_value': -5.356442681022055, 'soft_opc': nan} step=2408




2022-04-20 17:53.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175330/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:53.41 [info     ] FQE_20220420175330: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00016672042913215104, 'time_algorithm_update': 0.003458323866821999, 'loss': 0.07322204227526781, 'time_step': 0.003700002681377322, 'init_value': -5.7394208908081055, 'ave_value': -5.869993779882117, 'soft_opc': nan} step=2752




2022-04-20 17:53.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175330/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:53.42 [info     ] FQE_20220420175330: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00016617705655652424, 'time_algorithm_update': 0.0034863671591115553, 'loss': 0.08816932661047336, 'time_step': 0.0037255626778269924, 'init_value': -6.141883373260498, 'ave_value': -6.265815858959078, 'soft_opc': nan} step=3096




2022-04-20 17:53.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175330/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:53.43 [info     ] FQE_20220420175330: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00016249404397121695, 'time_algorithm_update': 0.002532624228056087, 'loss': 0.10778226588072992, 'time_step': 0.0027691950631696122, 'init_value': -6.660025119781494, 'ave_value': -6.7754455291204625, 'soft_opc': nan} step=3440




2022-04-20 17:53.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175330/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:53.45 [info     ] FQE_20220420175330: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00016582705253778503, 'time_algorithm_update': 0.003566707982573398, 'loss': 0.12178786058888533, 'time_step': 0.0038041611050450524, 'init_value': -7.076033592224121, 'ave_value': -7.182304656894894, 'soft_opc': nan} step=3784




2022-04-20 17:53.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175330/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:53.46 [info     ] FQE_20220420175330: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00016604467879894168, 'time_algorithm_update': 0.0035333175991856774, 'loss': 0.14364655405759466, 'time_step': 0.0037718512291132016, 'init_value': -7.722264766693115, 'ave_value': -7.79713993217494, 'soft_opc': nan} step=4128




2022-04-20 17:53.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175330/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:53.47 [info     ] FQE_20220420175330: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00016210037608479344, 'time_algorithm_update': 0.0035735167736230893, 'loss': 0.16505343862267774, 'time_step': 0.0038055923095969267, 'init_value': -8.095975875854492, 'ave_value': -8.144602555378869, 'soft_opc': nan} step=4472




2022-04-20 17:53.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175330/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:53.49 [info     ] FQE_20220420175330: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.0001665443875068842, 'time_algorithm_update': 0.003517722667649735, 'loss': 0.1830928778427458, 'time_step': 0.003757162149562392, 'init_value': -8.690559387207031, 'ave_value': -8.710532656674449, 'soft_opc': nan} step=4816




2022-04-20 17:53.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175330/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:53.50 [info     ] FQE_20220420175330: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.000163636235303657, 'time_algorithm_update': 0.003550495519194492, 'loss': 0.20318374821244803, 'time_step': 0.003789650146351304, 'init_value': -9.050601959228516, 'ave_value': -9.070466398696105, 'soft_opc': nan} step=5160




2022-04-20 17:53.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175330/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:53.52 [info     ] FQE_20220420175330: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.0001677912335063136, 'time_algorithm_update': 0.00357302330261053, 'loss': 0.22145222711680068, 'time_step': 0.0038126353607621302, 'init_value': -9.747251510620117, 'ave_value': -9.712464543673638, 'soft_opc': nan} step=5504




2022-04-20 17:53.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175330/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:53.53 [info     ] FQE_20220420175330: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00016226602155108784, 'time_algorithm_update': 0.003519971010296844, 'loss': 0.24390260653765222, 'time_step': 0.0037534056707870128, 'init_value': -10.10723876953125, 'ave_value': -10.052712750193235, 'soft_opc': nan} step=5848




2022-04-20 17:53.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175330/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:53.54 [info     ] FQE_20220420175330: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00016559348549953726, 'time_algorithm_update': 0.0035106754580209423, 'loss': 0.25828516259139706, 'time_step': 0.003749896620595178, 'init_value': -10.40109634399414, 'ave_value': -10.333152882237961, 'soft_opc': nan} step=6192




2022-04-20 17:53.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175330/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:53.56 [info     ] FQE_20220420175330: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00016287939493046252, 'time_algorithm_update': 0.0035051710383836613, 'loss': 0.27719827687779314, 'time_step': 0.003743658232134442, 'init_value': -10.78128433227539, 'ave_value': -10.67577701809707, 'soft_opc': nan} step=6536




2022-04-20 17:53.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175330/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:53.57 [info     ] FQE_20220420175330: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00016611329344815986, 'time_algorithm_update': 0.003595214250475861, 'loss': 0.2985871505360444, 'time_step': 0.003834365412246349, 'init_value': -11.104026794433594, 'ave_value': -10.992188861768122, 'soft_opc': nan} step=6880




2022-04-20 17:53.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175330/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:53.59 [info     ] FQE_20220420175330: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00016305058501487555, 'time_algorithm_update': 0.0034717813480732054, 'loss': 0.31034228126626723, 'time_step': 0.003708963477334311, 'init_value': -11.442296981811523, 'ave_value': -11.299737375456345, 'soft_opc': nan} step=7224




2022-04-20 17:53.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175330/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:54.00 [info     ] FQE_20220420175330: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.0001669054807618607, 'time_algorithm_update': 0.0035342920658200288, 'loss': 0.33239666403942675, 'time_step': 0.003775174534598062, 'init_value': -11.776673316955566, 'ave_value': -11.591314015004297, 'soft_opc': nan} step=7568




2022-04-20 17:54.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175330/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:54.01 [info     ] FQE_20220420175330: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00016690963922544967, 'time_algorithm_update': 0.003468227940936421, 'loss': 0.34968219112150023, 'time_step': 0.0037106920120327973, 'init_value': -12.148771286010742, 'ave_value': -11.86054364706742, 'soft_opc': nan} step=7912




2022-04-20 17:54.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175330/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:54.03 [info     ] FQE_20220420175330: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00016485674436702284, 'time_algorithm_update': 0.003524106602336085, 'loss': 0.36040065698542223, 'time_step': 0.0037656391775885293, 'init_value': -12.242738723754883, 'ave_value': -11.858899526583736, 'soft_opc': nan} step=8256




2022-04-20 17:54.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175330/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:54.04 [info     ] FQE_20220420175330: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00016050629837568417, 'time_algorithm_update': 0.003457486629486084, 'loss': 0.37561150879577493, 'time_step': 0.003693083691042523, 'init_value': -12.537420272827148, 'ave_value': -12.174582159650923, 'soft_opc': nan} step=8600




2022-04-20 17:54.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175330/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:54.06 [info     ] FQE_20220420175330: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00016721182091291562, 'time_algorithm_update': 0.003511965274810791, 'loss': 0.3861507100356353, 'time_step': 0.0037555770818577254, 'init_value': -13.07107162475586, 'ave_value': -12.708816033279573, 'soft_opc': nan} step=8944




2022-04-20 17:54.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175330/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:54.07 [info     ] FQE_20220420175330: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00016378316768380098, 'time_algorithm_update': 0.003463814424913983, 'loss': 0.395744446009874, 'time_step': 0.0037044723366582116, 'init_value': -13.223043441772461, 'ave_value': -12.853102592466113, 'soft_opc': nan} step=9288




2022-04-20 17:54.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175330/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:54.08 [info     ] FQE_20220420175330: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00016720835552659145, 'time_algorithm_update': 0.0035317657991897227, 'loss': 0.4032153831036805, 'time_step': 0.0037743726442026537, 'init_value': -13.348247528076172, 'ave_value': -13.090210612319611, 'soft_opc': nan} step=9632




2022-04-20 17:54.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175330/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:54.10 [info     ] FQE_20220420175330: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.0001643930756768515, 'time_algorithm_update': 0.0034739167191261467, 'loss': 0.40366642798167157, 'time_step': 0.0037122202474017475, 'init_value': -13.541902542114258, 'ave_value': -13.17862522426892, 'soft_opc': nan} step=9976




2022-04-20 17:54.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175330/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:54.11 [info     ] FQE_20220420175330: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00016629765200060467, 'time_algorithm_update': 0.0034629148106242336, 'loss': 0.4189181106630713, 'time_step': 0.003706139880557393, 'init_value': -13.683664321899414, 'ave_value': -13.31741753748811, 'soft_opc': nan} step=10320




2022-04-20 17:54.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175330/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:54.13 [info     ] FQE_20220420175330: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00016374088997064636, 'time_algorithm_update': 0.003513560045597165, 'loss': 0.42039744661536155, 'time_step': 0.003751560699108035, 'init_value': -13.536888122558594, 'ave_value': -13.254480318872778, 'soft_opc': nan} step=10664




2022-04-20 17:54.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175330/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:54.14 [info     ] FQE_20220420175330: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00016097759091576866, 'time_algorithm_update': 0.0035481272741805675, 'loss': 0.4316247588640815, 'time_step': 0.003783212844715562, 'init_value': -13.565315246582031, 'ave_value': -13.409571206059542, 'soft_opc': nan} step=11008




2022-04-20 17:54.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175330/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:54.15 [info     ] FQE_20220420175330: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00016560873319936353, 'time_algorithm_update': 0.003515712050504463, 'loss': 0.42382227639711006, 'time_step': 0.003755105789317641, 'init_value': -13.220976829528809, 'ave_value': -12.911569411904962, 'soft_opc': nan} step=11352




2022-04-20 17:54.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175330/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:54.17 [info     ] FQE_20220420175330: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00016647646593493084, 'time_algorithm_update': 0.0034567588983580124, 'loss': 0.4199078604903852, 'time_step': 0.0036995175272919413, 'init_value': -13.426669120788574, 'ave_value': -13.120387980059997, 'soft_opc': nan} step=11696




2022-04-20 17:54.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175330/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:54.18 [info     ] FQE_20220420175330: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00016307692195093908, 'time_algorithm_update': 0.0034558980963950935, 'loss': 0.42758996055331516, 'time_step': 0.0036938363729521286, 'init_value': -13.375083923339844, 'ave_value': -13.188453814467868, 'soft_opc': nan} step=12040




2022-04-20 17:54.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175330/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:54.20 [info     ] FQE_20220420175330: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00016606616419415142, 'time_algorithm_update': 0.003502181796140449, 'loss': 0.4273606759853401, 'time_step': 0.003742841094039207, 'init_value': -13.066327095031738, 'ave_value': -12.87088596321441, 'soft_opc': nan} step=12384




2022-04-20 17:54.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175330/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:54.21 [info     ] FQE_20220420175330: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.0001630887042644412, 'time_algorithm_update': 0.003444565590037856, 'loss': 0.4299680137259582, 'time_step': 0.0036800385907638906, 'init_value': -13.557422637939453, 'ave_value': -13.18756867079316, 'soft_opc': nan} step=12728




2022-04-20 17:54.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175330/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:54.22 [info     ] FQE_20220420175330: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00016352049140043036, 'time_algorithm_update': 0.0035515302835508835, 'loss': 0.4385659259657354, 'time_step': 0.0037892890530963276, 'init_value': -13.692452430725098, 'ave_value': -13.44089983548667, 'soft_opc': nan} step=13072




2022-04-20 17:54.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175330/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:54.24 [info     ] FQE_20220420175330: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00016690478768459585, 'time_algorithm_update': 0.003481716610664545, 'loss': 0.4343931083296707, 'time_step': 0.0037244073180265204, 'init_value': -13.399026870727539, 'ave_value': -13.131076142181819, 'soft_opc': nan} step=13416




2022-04-20 17:54.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175330/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:54.25 [info     ] FQE_20220420175330: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00016375890997953193, 'time_algorithm_update': 0.003487548855848091, 'loss': 0.44318414903462455, 'time_step': 0.003726435955180678, 'init_value': -13.521183967590332, 'ave_value': -13.20932334503075, 'soft_opc': nan} step=13760




2022-04-20 17:54.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175330/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:54.27 [info     ] FQE_20220420175330: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.0001634019751881444, 'time_algorithm_update': 0.0034462865008864293, 'loss': 0.4554976385714876, 'time_step': 0.0036843467590420747, 'init_value': -13.989723205566406, 'ave_value': -13.818031360598297, 'soft_opc': nan} step=14104




2022-04-20 17:54.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175330/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:54.28 [info     ] FQE_20220420175330: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.0001647243666094403, 'time_algorithm_update': 0.003481026998786039, 'loss': 0.4651948134360705, 'time_step': 0.003720977971720141, 'init_value': -13.93490982055664, 'ave_value': -13.768741713544808, 'soft_opc': nan} step=14448




2022-04-20 17:54.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175330/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:54.29 [info     ] FQE_20220420175330: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00016517902529516884, 'time_algorithm_update': 0.003474683262581049, 'loss': 0.4654474749154043, 'time_step': 0.003713295210239499, 'init_value': -14.0029878616333, 'ave_value': -13.921687497050913, 'soft_opc': nan} step=14792




2022-04-20 17:54.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175330/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:54.31 [info     ] FQE_20220420175330: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00016680637071298998, 'time_algorithm_update': 0.0034522247868914936, 'loss': 0.47330932269349346, 'time_step': 0.0036940498407496965, 'init_value': -13.93118953704834, 'ave_value': -13.859318271708919, 'soft_opc': nan} step=15136




2022-04-20 17:54.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175330/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:54.32 [info     ] FQE_20220420175330: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00016408812168032625, 'time_algorithm_update': 0.0034794003464454827, 'loss': 0.4676499339524483, 'time_step': 0.0037190581476965615, 'init_value': -13.716115951538086, 'ave_value': -13.76031794098986, 'soft_opc': nan} step=15480




2022-04-20 17:54.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175330/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:54.34 [info     ] FQE_20220420175330: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00016545764235563056, 'time_algorithm_update': 0.0034682736840359, 'loss': 0.47531044168863446, 'time_step': 0.003708239904669828, 'init_value': -13.707298278808594, 'ave_value': -13.621943486515466, 'soft_opc': nan} step=15824




2022-04-20 17:54.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175330/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:54.35 [info     ] FQE_20220420175330: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00016480199126310126, 'time_algorithm_update': 0.0035166269124940384, 'loss': 0.4795361889991909, 'time_step': 0.0037544626136158787, 'init_value': -13.967390060424805, 'ave_value': -13.842218235604935, 'soft_opc': nan} step=16168




2022-04-20 17:54.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175330/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:54.36 [info     ] FQE_20220420175330: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00016807331595309946, 'time_algorithm_update': 0.0034928398076878035, 'loss': 0.4947709742538241, 'time_step': 0.0037345470384109853, 'init_value': -14.112386703491211, 'ave_value': -13.992778788157949, 'soft_opc': nan} step=16512




2022-04-20 17:54.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175330/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:54.38 [info     ] FQE_20220420175330: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00016196176063182742, 'time_algorithm_update': 0.003531634114509405, 'loss': 0.4938340751172671, 'time_step': 0.0037651214488717013, 'init_value': -14.060691833496094, 'ave_value': -14.003307361726288, 'soft_opc': nan} step=16856




2022-04-20 17:54.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175330/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:54.39 [info     ] FQE_20220420175330: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00016588111256444178, 'time_algorithm_update': 0.0034542125324870266, 'loss': 0.5073621183644633, 'time_step': 0.003693728252898815, 'init_value': -14.277036666870117, 'ave_value': -14.20552784714613, 'soft_opc': nan} step=17200




2022-04-20 17:54.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175330/model_17200.pt
search iteration:  29
using hyper params:  [0.00615986784353422, 0.002744028276777206, 8.245166229083582e-05, 1]
2022-04-20 17:54.39 [debug    ] RoundIterator is selected.
2022-04-20 17:54.39 [info     ] Directory is created at d3rlpy_logs/TD3PlusBC_20220420175439
2022-04-20 17:54.39 [debug    ] Fitting scaler...              scaler=standard
2022-04-20 17:54.39 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 17:54.39 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 17:54.39 [info     ] Parameters are saved to d3rlpy_logs/TD3PlusBC_20220420175439/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.006159867843534

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:54.42 [info     ] TD3PlusBC_20220420175439: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.00031871962965580457, 'time_algorithm_update': 0.0067046197534304615, 'critic_loss': 0.5361887983847082, 'actor_loss': 0.02583106154063989, 'time_step': 0.007100888163025616, 'td_error': 0.8089574410730814, 'init_value': -0.556118369102478, 'ave_value': 0.1492397749237926} step=342
2022-04-20 17:54.42 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420175439/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:54.45 [info     ] TD3PlusBC_20220420175439: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.0003156034569991262, 'time_algorithm_update': 0.0067437810507434155, 'critic_loss': 0.19555509563165102, 'actor_loss': -0.030464248306918563, 'time_step': 0.0071377998207047665, 'td_error': 0.8047836107873927, 'init_value': -0.7714181542396545, 'ave_value': 0.23264173362855087} step=684
2022-04-20 17:54.45 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420175439/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:54.48 [info     ] TD3PlusBC_20220420175439: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.00031498440524988007, 'time_algorithm_update': 0.0067470345580786985, 'critic_loss': 0.22505899022022882, 'actor_loss': -0.013851801837571183, 'time_step': 0.007141960991753472, 'td_error': 0.8045567809791833, 'init_value': -1.1290251016616821, 'ave_value': 0.23209360988861} step=1026
2022-04-20 17:54.48 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420175439/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:54.51 [info     ] TD3PlusBC_20220420175439: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.00032048197517618103, 'time_algorithm_update': 0.006824613314623024, 'critic_loss': 0.2670707439034306, 'actor_loss': 0.015367275920876285, 'time_step': 0.007224033450522618, 'td_error': 0.8050499584578761, 'init_value': -1.4022648334503174, 'ave_value': 0.30092356901269757} step=1368
2022-04-20 17:54.51 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420175439/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:54.54 [info     ] TD3PlusBC_20220420175439: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.0003202498307702137, 'time_algorithm_update': 0.006779256619905171, 'critic_loss': 0.32499307387492116, 'actor_loss': 0.01876217560374249, 'time_step': 0.007178547786690338, 'td_error': 0.8039408560286363, 'init_value': -1.680135726928711, 'ave_value': 0.38350174848713936} step=1710
2022-04-20 17:54.54 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420175439/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:54.57 [info     ] TD3PlusBC_20220420175439: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.0003222087670487967, 'time_algorithm_update': 0.006723228253816303, 'critic_loss': 0.3861783881895026, 'actor_loss': 0.0010195107182912658, 'time_step': 0.007125684392382527, 'td_error': 0.8088006153723938, 'init_value': -1.94742751121521, 'ave_value': 0.47148539928457206} step=2052
2022-04-20 17:54.57 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420175439/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:55.00 [info     ] TD3PlusBC_20220420175439: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.00032354795444778533, 'time_algorithm_update': 0.00890088987629316, 'critic_loss': 0.46726548066090423, 'actor_loss': 0.02278354750913486, 'time_step': 0.009303038580375806, 'td_error': 0.8183848627554612, 'init_value': -2.2270407676696777, 'ave_value': 0.5428569867524008} step=2394
2022-04-20 17:55.00 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420175439/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:55.04 [info     ] TD3PlusBC_20220420175439: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.0003194355825234575, 'time_algorithm_update': 0.00888427656296401, 'critic_loss': 0.5041509721624223, 'actor_loss': 0.04425550012560616, 'time_step': 0.009282262004606905, 'td_error': 0.8295839106645007, 'init_value': -2.5432159900665283, 'ave_value': 0.6144509968758133} step=2736
2022-04-20 17:55.04 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420175439/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:55.08 [info     ] TD3PlusBC_20220420175439: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.0003227964479323716, 'time_algorithm_update': 0.008407256756609643, 'critic_loss': 0.5951785452184621, 'actor_loss': 0.042407522272122536, 'time_step': 0.008809459836859452, 'td_error': 0.8423814441078268, 'init_value': -2.8962578773498535, 'ave_value': 0.6191968940722395} step=3078
2022-04-20 17:55.08 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420175439/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:55.11 [info     ] TD3PlusBC_20220420175439: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.00032480418333533215, 'time_algorithm_update': 0.008922122375309816, 'critic_loss': 0.6849059697828794, 'actor_loss': 0.03069126074425658, 'time_step': 0.009328234265422264, 'td_error': 0.8609383046719743, 'init_value': -3.1618762016296387, 'ave_value': 0.6866219039308746} step=3420
2022-04-20 17:55.11 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420175439/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:55.15 [info     ] TD3PlusBC_20220420175439: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.0003246619687442891, 'time_algorithm_update': 0.008500239305328904, 'critic_loss': 0.7554479001296891, 'actor_loss': 0.060681141542586664, 'time_step': 0.00890506917273092, 'td_error': 0.8773091014663366, 'init_value': -3.516740322113037, 'ave_value': 0.7561423129885801} step=3762
2022-04-20 17:55.15 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420175439/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:55.19 [info     ] TD3PlusBC_20220420175439: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.0003248146402905559, 'time_algorithm_update': 0.008879409198872527, 'critic_loss': 0.8232994063904411, 'actor_loss': 0.04008888864987775, 'time_step': 0.00928418817575912, 'td_error': 0.9026420873396261, 'init_value': -3.7969672679901123, 'ave_value': 0.7994988853337982} step=4104
2022-04-20 17:55.19 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420175439/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:55.22 [info     ] TD3PlusBC_20220420175439: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.00032333463256122073, 'time_algorithm_update': 0.008895680918331034, 'critic_loss': 0.9342742052953146, 'actor_loss': 0.054023214882751656, 'time_step': 0.00929967771496689, 'td_error': 0.9249486127724015, 'init_value': -4.088960647583008, 'ave_value': 0.8694414994699109} step=4446
2022-04-20 17:55.22 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420175439/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:55.26 [info     ] TD3PlusBC_20220420175439: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.00032563516271044636, 'time_algorithm_update': 0.00843080024273075, 'critic_loss': 1.0513236585416292, 'actor_loss': 0.042483486954057424, 'time_step': 0.008836409501862107, 'td_error': 0.9511769079147809, 'init_value': -4.450252532958984, 'ave_value': 0.922753650410361} step=4788
2022-04-20 17:55.26 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420175439/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:55.30 [info     ] TD3PlusBC_20220420175439: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.00032999153025666173, 'time_algorithm_update': 0.008948329596491585, 'critic_loss': 1.1598582152734722, 'actor_loss': 0.05181059467862224, 'time_step': 0.00935743426718907, 'td_error': 0.970249071022411, 'init_value': -4.894768714904785, 'ave_value': 0.9225603943876503} step=5130
2022-04-20 17:55.30 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420175439/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:55.33 [info     ] TD3PlusBC_20220420175439: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.00032107174745080066, 'time_algorithm_update': 0.008774553823192217, 'critic_loss': 1.3023347916484576, 'actor_loss': 0.03977933203616337, 'time_step': 0.00917408619707788, 'td_error': 0.9994348233516589, 'init_value': -5.0323004722595215, 'ave_value': 1.0591028063027776} step=5472
2022-04-20 17:55.33 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420175439/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:55.37 [info     ] TD3PlusBC_20220420175439: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.0003297565973293014, 'time_algorithm_update': 0.008793497643275567, 'critic_loss': 1.456451343731922, 'actor_loss': 0.05267372703430248, 'time_step': 0.00920511337748745, 'td_error': 1.0234928205007177, 'init_value': -5.419778347015381, 'ave_value': 1.0975706036227362} step=5814
2022-04-20 17:55.37 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420175439/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:55.40 [info     ] TD3PlusBC_20220420175439: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.0003257885313870614, 'time_algorithm_update': 0.008758630668907835, 'critic_loss': 1.5898332655081275, 'actor_loss': 0.07327105460024019, 'time_step': 0.009162989276194433, 'td_error': 1.0536251219021862, 'init_value': -5.720913887023926, 'ave_value': 1.1387496705968059} step=6156
2022-04-20 17:55.40 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420175439/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:55.44 [info     ] TD3PlusBC_20220420175439: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.0003274923179581849, 'time_algorithm_update': 0.008501446037961725, 'critic_loss': 1.7610823282024317, 'actor_loss': 0.06612431138143902, 'time_step': 0.008910119184973643, 'td_error': 1.0838931747651204, 'init_value': -6.20685338973999, 'ave_value': 1.1245368438133516} step=6498
2022-04-20 17:55.44 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420175439/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:55.48 [info     ] TD3PlusBC_20220420175439: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.00032497776879204647, 'time_algorithm_update': 0.008954597495452703, 'critic_loss': 1.9442120808781238, 'actor_loss': 0.07893833036572613, 'time_step': 0.009357764009843794, 'td_error': 1.1200490008531283, 'init_value': -6.481422424316406, 'ave_value': 1.1984807657234806} step=6840
2022-04-20 17:55.48 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420175439/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:55.51 [info     ] TD3PlusBC_20220420175439: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.00032972871211537144, 'time_algorithm_update': 0.009024873811599107, 'critic_loss': 2.145571100258688, 'actor_loss': 0.04573101145133637, 'time_step': 0.009435220768577173, 'td_error': 1.1296220771181966, 'init_value': -7.120825290679932, 'ave_value': 1.123400758912468} step=7182
2022-04-20 17:55.51 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420175439/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:55.55 [info     ] TD3PlusBC_20220420175439: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.00029286236790885704, 'time_algorithm_update': 0.008210789390474732, 'critic_loss': 2.3295073638185424, 'actor_loss': 0.07593252758185069, 'time_step': 0.008574336592914069, 'td_error': 1.1966745548358861, 'init_value': -7.0267815589904785, 'ave_value': 1.3474416921041101} step=7524
2022-04-20 17:55.55 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420175439/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:55.58 [info     ] TD3PlusBC_20220420175439: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.0002942099208720246, 'time_algorithm_update': 0.008490275918391714, 'critic_loss': 2.5487766555178237, 'actor_loss': 0.07363158427396713, 'time_step': 0.008856000258908635, 'td_error': 1.2170939732139254, 'init_value': -7.500984191894531, 'ave_value': 1.3447101765020273} step=7866
2022-04-20 17:55.58 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420175439/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:56.02 [info     ] TD3PlusBC_20220420175439: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.00032729642433032655, 'time_algorithm_update': 0.008542872311776145, 'critic_loss': 2.7176453269015974, 'actor_loss': 0.07453468749136255, 'time_step': 0.008950342211806983, 'td_error': 1.2646081818881516, 'init_value': -7.797963619232178, 'ave_value': 1.3762208397996503} step=8208
2022-04-20 17:56.02 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420175439/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:56.06 [info     ] TD3PlusBC_20220420175439: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.00032515205137910897, 'time_algorithm_update': 0.00877597108919021, 'critic_loss': 2.9029671821677896, 'actor_loss': 0.07527246530990155, 'time_step': 0.009180733334948446, 'td_error': 1.2965665506126045, 'init_value': -8.107353210449219, 'ave_value': 1.4411705206215448} step=8550
2022-04-20 17:56.06 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420175439/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:56.09 [info     ] TD3PlusBC_20220420175439: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.00032726296207361054, 'time_algorithm_update': 0.008852314530757436, 'critic_loss': 3.1117877357187327, 'actor_loss': 0.06716915838725386, 'time_step': 0.009260643295377319, 'td_error': 1.3318755482512623, 'init_value': -8.439275741577148, 'ave_value': 1.5253040349676468} step=8892
2022-04-20 17:56.09 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420175439/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:56.13 [info     ] TD3PlusBC_20220420175439: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.00031115297685589704, 'time_algorithm_update': 0.008352558515225237, 'critic_loss': 3.2909506880051906, 'actor_loss': 0.09147713459723177, 'time_step': 0.008738870509186683, 'td_error': 1.3693274918109615, 'init_value': -8.850250244140625, 'ave_value': 1.4844077181507338} step=9234
2022-04-20 17:56.13 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420175439/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:56.16 [info     ] TD3PlusBC_20220420175439: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.0002978607925058108, 'time_algorithm_update': 0.008451462489122536, 'critic_loss': 3.5230686729065854, 'actor_loss': 0.09782723668548796, 'time_step': 0.008821401679724977, 'td_error': 1.39088440435028, 'init_value': -9.241827964782715, 'ave_value': 1.5779885499075323} step=9576
2022-04-20 17:56.16 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420175439/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:56.20 [info     ] TD3PlusBC_20220420175439: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.00032768542306465013, 'time_algorithm_update': 0.008505527736150731, 'critic_loss': 3.764739272625823, 'actor_loss': 0.0757578464758675, 'time_step': 0.008913265334235298, 'td_error': 1.4506162357967336, 'init_value': -9.553468704223633, 'ave_value': 1.5668785974442085} step=9918
2022-04-20 17:56.20 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420175439/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:56.24 [info     ] TD3PlusBC_20220420175439: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.0003272734190288343, 'time_algorithm_update': 0.008977559574863367, 'critic_loss': 3.9903615549293874, 'actor_loss': 0.07833825276173347, 'time_step': 0.009385267196342958, 'td_error': 1.4778315231007175, 'init_value': -9.99327278137207, 'ave_value': 1.5541913702968206} step=10260
2022-04-20 17:56.24 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420175439/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:56.27 [info     ] TD3PlusBC_20220420175439: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.000327955212509423, 'time_algorithm_update': 0.0089070295032702, 'critic_loss': 4.142786956669991, 'actor_loss': 0.07136248753607621, 'time_step': 0.009315830225135848, 'td_error': 1.4894430315621159, 'init_value': -10.524060249328613, 'ave_value': 1.636049693083592} step=10602
2022-04-20 17:56.27 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420175439/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:56.31 [info     ] TD3PlusBC_20220420175439: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.0003301588415402418, 'time_algorithm_update': 0.008657859082807574, 'critic_loss': 4.449947916276273, 'actor_loss': 0.0810220925605785, 'time_step': 0.009066563600685165, 'td_error': 1.5596989278049607, 'init_value': -10.70362663269043, 'ave_value': 1.6987486332794233} step=10944
2022-04-20 17:56.31 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420175439/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:56.35 [info     ] TD3PlusBC_20220420175439: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.00032760037316216365, 'time_algorithm_update': 0.008890200079533091, 'critic_loss': 4.615605204426057, 'actor_loss': 0.07446088525330463, 'time_step': 0.009295233508996796, 'td_error': 1.613188600220121, 'init_value': -10.97137451171875, 'ave_value': 1.715806529565404} step=11286
2022-04-20 17:56.35 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420175439/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:56.38 [info     ] TD3PlusBC_20220420175439: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.00033487423121580603, 'time_algorithm_update': 0.008476009842945121, 'critic_loss': 4.81015413633564, 'actor_loss': 0.08570362642755983, 'time_step': 0.008890743144074379, 'td_error': 1.6687896156572728, 'init_value': -11.442797660827637, 'ave_value': 1.712398833400897} step=11628
2022-04-20 17:56.38 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420175439/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:56.42 [info     ] TD3PlusBC_20220420175439: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.0003306377700894897, 'time_algorithm_update': 0.008984888506214522, 'critic_loss': 5.158041141012259, 'actor_loss': 0.08523008429951835, 'time_step': 0.009395623067666215, 'td_error': 1.7183075679600022, 'init_value': -11.798118591308594, 'ave_value': 1.7207420846295485} step=11970
2022-04-20 17:56.42 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420175439/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:56.46 [info     ] TD3PlusBC_20220420175439: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.00032563934549253586, 'time_algorithm_update': 0.008812210016083299, 'critic_loss': 5.288664318490446, 'actor_loss': 0.08699611001457387, 'time_step': 0.00921885381665146, 'td_error': 1.7233914255261946, 'init_value': -12.331530570983887, 'ave_value': 1.8209342873247492} step=12312
2022-04-20 17:56.46 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420175439/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:56.49 [info     ] TD3PlusBC_20220420175439: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.0003237647619860911, 'time_algorithm_update': 0.008431310542145668, 'critic_loss': 5.564497900113725, 'actor_loss': 0.09759229979319879, 'time_step': 0.008835589676572565, 'td_error': 1.7647457510109372, 'init_value': -12.824175834655762, 'ave_value': 1.8647270514788359} step=12654
2022-04-20 17:56.49 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420175439/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:56.53 [info     ] TD3PlusBC_20220420175439: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.00032840904436613386, 'time_algorithm_update': 0.008932972512049981, 'critic_loss': 5.765978201788071, 'actor_loss': 0.09160584945514885, 'time_step': 0.009341178581728572, 'td_error': 1.860393013394591, 'init_value': -12.464505195617676, 'ave_value': 2.048021592009888} step=12996
2022-04-20 17:56.53 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420175439/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:56.57 [info     ] TD3PlusBC_20220420175439: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.00032572369826467415, 'time_algorithm_update': 0.00846857634204173, 'critic_loss': 6.003481524381024, 'actor_loss': 0.0919809548173383, 'time_step': 0.008872687468054698, 'td_error': 1.9178451910136018, 'init_value': -12.968213081359863, 'ave_value': 1.8773227769278225} step=13338
2022-04-20 17:56.57 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420175439/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:57.00 [info     ] TD3PlusBC_20220420175439: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.0003286279432954844, 'time_algorithm_update': 0.009028902527881645, 'critic_loss': 6.308644904727824, 'actor_loss': 0.09152276968659713, 'time_step': 0.009439247393468667, 'td_error': 1.9536973817696393, 'init_value': -13.361673355102539, 'ave_value': 1.9826771796597085} step=13680
2022-04-20 17:57.00 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420175439/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:57.04 [info     ] TD3PlusBC_20220420175439: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.00032450023450349505, 'time_algorithm_update': 0.008822164340325964, 'critic_loss': 6.512666753683871, 'actor_loss': 0.10926479936648183, 'time_step': 0.009226023802283214, 'td_error': 2.0054576089530625, 'init_value': -13.887341499328613, 'ave_value': 1.9879839172562468} step=14022
2022-04-20 17:57.04 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420175439/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:57.08 [info     ] TD3PlusBC_20220420175439: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.00032756412238405463, 'time_algorithm_update': 0.008474350672716286, 'critic_loss': 6.787044253788497, 'actor_loss': 0.10527257096262006, 'time_step': 0.008881584245559068, 'td_error': 2.0487545507218896, 'init_value': -14.536539077758789, 'ave_value': 2.0229588334502333} step=14364
2022-04-20 17:57.08 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420175439/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:57.11 [info     ] TD3PlusBC_20220420175439: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.00032569023600795814, 'time_algorithm_update': 0.008951255452563192, 'critic_loss': 7.096884799282453, 'actor_loss': 0.09569704536980356, 'time_step': 0.009357886704785085, 'td_error': 2.0980496414676617, 'init_value': -14.569406509399414, 'ave_value': 2.115243541633266} step=14706
2022-04-20 17:57.11 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420175439/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:57.15 [info     ] TD3PlusBC_20220420175439: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.00032924560078403405, 'time_algorithm_update': 0.008643388748168945, 'critic_loss': 7.338878134886424, 'actor_loss': 0.08568541794928194, 'time_step': 0.009053957392597756, 'td_error': 2.1726658133230567, 'init_value': -14.811497688293457, 'ave_value': 2.003648205991784} step=15048
2022-04-20 17:57.15 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420175439/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:57.19 [info     ] TD3PlusBC_20220420175439: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.00032873599849946315, 'time_algorithm_update': 0.008894612217507166, 'critic_loss': 7.663423256044499, 'actor_loss': 0.1178987775629724, 'time_step': 0.009304204182318079, 'td_error': 2.2343143563826295, 'init_value': -15.334579467773438, 'ave_value': 2.0792372481171295} step=15390
2022-04-20 17:57.19 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420175439/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:57.22 [info     ] TD3PlusBC_20220420175439: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.00032693809933132597, 'time_algorithm_update': 0.00877415436750267, 'critic_loss': 7.849818572663424, 'actor_loss': 0.11211614731198166, 'time_step': 0.009179041399593241, 'td_error': 2.2901800170293196, 'init_value': -15.78722858428955, 'ave_value': 2.0754437305363367} step=15732
2022-04-20 17:57.22 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420175439/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:57.26 [info     ] TD3PlusBC_20220420175439: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.00032547273133930405, 'time_algorithm_update': 0.008377818336263734, 'critic_loss': 8.116710178161922, 'actor_loss': 0.08082361525872298, 'time_step': 0.00878419792442991, 'td_error': 2.3537969981868927, 'init_value': -16.433399200439453, 'ave_value': 2.1539000788803824} step=16074
2022-04-20 17:57.26 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420175439/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:57.30 [info     ] TD3PlusBC_20220420175439: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.0003300417236417358, 'time_algorithm_update': 0.008953015009562174, 'critic_loss': 8.482241736866577, 'actor_loss': 0.09738234371731155, 'time_step': 0.009363651972765114, 'td_error': 2.433963163776301, 'init_value': -16.506759643554688, 'ave_value': 2.16654326200309} step=16416
2022-04-20 17:57.30 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420175439/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:57.33 [info     ] TD3PlusBC_20220420175439: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.00032255105804978755, 'time_algorithm_update': 0.008917520617881017, 'critic_loss': 8.767567418123546, 'actor_loss': 0.10035833302471373, 'time_step': 0.00931872959025422, 'td_error': 2.4891487599470232, 'init_value': -16.760278701782227, 'ave_value': 2.229638572032102} step=16758
2022-04-20 17:57.33 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420175439/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:57.37 [info     ] TD3PlusBC_20220420175439: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.00033094938735515753, 'time_algorithm_update': 0.008878605407580994, 'critic_loss': 9.061305050264325, 'actor_loss': 0.12402122071263386, 'time_step': 0.00928884500648543, 'td_error': 2.558249495816487, 'init_value': -17.237516403198242, 'ave_value': 2.237700878760467} step=17100
2022-04-20 17:57.37 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420175439/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 102 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.28189108e-01  

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 17:57.38 [info     ] FQE_20220420175737: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00016314868467399873, 'time_algorithm_update': 0.004991139274045646, 'loss': 0.00819165025787226, 'time_step': 0.005228836852383901, 'init_value': -0.34474632143974304, 'ave_value': -0.29370167579266937, 'soft_opc': nan} step=166




2022-04-20 17:57.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175737/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:57.39 [info     ] FQE_20220420175737: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.0001640808151428958, 'time_algorithm_update': 0.005040522081306182, 'loss': 0.0061537577638806525, 'time_step': 0.00527313387537577, 'init_value': -0.507605791091919, 'ave_value': -0.3890065094766518, 'soft_opc': nan} step=332




2022-04-20 17:57.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175737/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:57.40 [info     ] FQE_20220420175737: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00016459643122661546, 'time_algorithm_update': 0.0050500947308827595, 'loss': 0.005633263165658587, 'time_step': 0.005291436091963067, 'init_value': -0.5666285753250122, 'ave_value': -0.41338393452266853, 'soft_opc': nan} step=498




2022-04-20 17:57.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175737/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:57.41 [info     ] FQE_20220420175737: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00016907180648252187, 'time_algorithm_update': 0.005083286618611899, 'loss': 0.005696696751044098, 'time_step': 0.005328198513352728, 'init_value': -0.6392184495925903, 'ave_value': -0.4272737191788651, 'soft_opc': nan} step=664




2022-04-20 17:57.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175737/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:57.42 [info     ] FQE_20220420175737: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00015759755329913404, 'time_algorithm_update': 0.00499424876936947, 'loss': 0.005315243457275701, 'time_step': 0.0052249747586537556, 'init_value': -0.7081618309020996, 'ave_value': -0.4738313741948422, 'soft_opc': nan} step=830




2022-04-20 17:57.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175737/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:57.43 [info     ] FQE_20220420175737: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00016109052910862198, 'time_algorithm_update': 0.004187074052282126, 'loss': 0.00514657004366079, 'time_step': 0.004423833755125482, 'init_value': -0.7620800733566284, 'ave_value': -0.4941680591133935, 'soft_opc': nan} step=996




2022-04-20 17:57.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175737/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:57.44 [info     ] FQE_20220420175737: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.0001701561801404838, 'time_algorithm_update': 0.005065031798489122, 'loss': 0.005196529049258006, 'time_step': 0.005308471530316824, 'init_value': -0.83137047290802, 'ave_value': -0.5267539630870561, 'soft_opc': nan} step=1162




2022-04-20 17:57.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175737/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:57.45 [info     ] FQE_20220420175737: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.0001680563731365893, 'time_algorithm_update': 0.005149750824434212, 'loss': 0.004907745536955365, 'time_step': 0.0053896588015269085, 'init_value': -0.9142581224441528, 'ave_value': -0.5877271817510461, 'soft_opc': nan} step=1328




2022-04-20 17:57.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175737/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:57.46 [info     ] FQE_20220420175737: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00016637738928737412, 'time_algorithm_update': 0.004984232316534203, 'loss': 0.004817842623690166, 'time_step': 0.00522444478000503, 'init_value': -0.9839482307434082, 'ave_value': -0.6375156587451226, 'soft_opc': nan} step=1494




2022-04-20 17:57.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175737/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:57.47 [info     ] FQE_20220420175737: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.0001643608851605151, 'time_algorithm_update': 0.005129591528191624, 'loss': 0.004762510809744428, 'time_step': 0.0053694506725633, 'init_value': -1.0210866928100586, 'ave_value': -0.6543356448814676, 'soft_opc': nan} step=1660




2022-04-20 17:57.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175737/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:57.48 [info     ] FQE_20220420175737: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.0001628657421433782, 'time_algorithm_update': 0.005009935562869152, 'loss': 0.0044930245178886295, 'time_step': 0.005251215164919934, 'init_value': -1.083066463470459, 'ave_value': -0.6946367533625784, 'soft_opc': nan} step=1826




2022-04-20 17:57.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175737/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:57.48 [info     ] FQE_20220420175737: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.0001621964466140931, 'time_algorithm_update': 0.005063226424067853, 'loss': 0.004371913807769586, 'time_step': 0.00529940013425896, 'init_value': -1.1380350589752197, 'ave_value': -0.7364924772256599, 'soft_opc': nan} step=1992




2022-04-20 17:57.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175737/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:57.49 [info     ] FQE_20220420175737: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.0001646510089736387, 'time_algorithm_update': 0.004973716046436724, 'loss': 0.004689160310130297, 'time_step': 0.005212404641760401, 'init_value': -1.245833158493042, 'ave_value': -0.8214930606385072, 'soft_opc': nan} step=2158




2022-04-20 17:57.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175737/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:57.50 [info     ] FQE_20220420175737: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00016259285340826195, 'time_algorithm_update': 0.005080666886754783, 'loss': 0.004492299742042929, 'time_step': 0.005317214023636048, 'init_value': -1.3038173913955688, 'ave_value': -0.8668217239951765, 'soft_opc': nan} step=2324




2022-04-20 17:57.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175737/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:57.51 [info     ] FQE_20220420175737: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.000166677566896002, 'time_algorithm_update': 0.004129809069346233, 'loss': 0.00439795426020107, 'time_step': 0.004367828369140625, 'init_value': -1.3127110004425049, 'ave_value': -0.8613099144746524, 'soft_opc': nan} step=2490




2022-04-20 17:57.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175737/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:57.52 [info     ] FQE_20220420175737: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.0001647098954901638, 'time_algorithm_update': 0.005223445145480604, 'loss': 0.004686903517064919, 'time_step': 0.005463160664202219, 'init_value': -1.3983159065246582, 'ave_value': -0.9245679134091882, 'soft_opc': nan} step=2656




2022-04-20 17:57.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175737/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:57.53 [info     ] FQE_20220420175737: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00016867970845785486, 'time_algorithm_update': 0.005099259227155203, 'loss': 0.004801087108081633, 'time_step': 0.005342950303870511, 'init_value': -1.4626569747924805, 'ave_value': -0.9897204497946543, 'soft_opc': nan} step=2822




2022-04-20 17:57.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175737/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:57.54 [info     ] FQE_20220420175737: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.0001654280237404697, 'time_algorithm_update': 0.005006326250283115, 'loss': 0.0049734784609985995, 'time_step': 0.005246452538363905, 'init_value': -1.4933756589889526, 'ave_value': -1.006997402351316, 'soft_opc': nan} step=2988




2022-04-20 17:57.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175737/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:57.55 [info     ] FQE_20220420175737: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.0001667278358735234, 'time_algorithm_update': 0.005148252808904073, 'loss': 0.005188000272028148, 'time_step': 0.005392684993973698, 'init_value': -1.572570562362671, 'ave_value': -1.0757277023986922, 'soft_opc': nan} step=3154




2022-04-20 17:57.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175737/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:57.56 [info     ] FQE_20220420175737: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00016300936779343938, 'time_algorithm_update': 0.005048106951885913, 'loss': 0.005463263208568321, 'time_step': 0.005283506519823189, 'init_value': -1.6170728206634521, 'ave_value': -1.1116184796491924, 'soft_opc': nan} step=3320




2022-04-20 17:57.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175737/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:57.57 [info     ] FQE_20220420175737: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00016409948647740376, 'time_algorithm_update': 0.005157335695013942, 'loss': 0.005607747341680105, 'time_step': 0.005396910460598497, 'init_value': -1.672309398651123, 'ave_value': -1.1737331319734587, 'soft_opc': nan} step=3486




2022-04-20 17:57.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175737/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:57.58 [info     ] FQE_20220420175737: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00016566500606307065, 'time_algorithm_update': 0.0050815142780901434, 'loss': 0.005991746006668152, 'time_step': 0.005325764058584191, 'init_value': -1.7812559604644775, 'ave_value': -1.242792211610589, 'soft_opc': nan} step=3652




2022-04-20 17:57.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175737/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:57.59 [info     ] FQE_20220420175737: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.0001623185284166451, 'time_algorithm_update': 0.005073976803974933, 'loss': 0.006504174987420844, 'time_step': 0.0053112190890024944, 'init_value': -1.8935171365737915, 'ave_value': -1.3348385429546774, 'soft_opc': nan} step=3818




2022-04-20 17:57.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175737/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:58.00 [info     ] FQE_20220420175737: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00016078029770448985, 'time_algorithm_update': 0.004018583929682353, 'loss': 0.006719301962812644, 'time_step': 0.004254812217620482, 'init_value': -1.913346290588379, 'ave_value': -1.3685861996970736, 'soft_opc': nan} step=3984




2022-04-20 17:58.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175737/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:58.01 [info     ] FQE_20220420175737: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00016437237521252, 'time_algorithm_update': 0.004992072840771043, 'loss': 0.007184350601515272, 'time_step': 0.005231106137654868, 'init_value': -2.009091377258301, 'ave_value': -1.4066046665984768, 'soft_opc': nan} step=4150




2022-04-20 17:58.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175737/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:58.02 [info     ] FQE_20220420175737: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00016246359032320688, 'time_algorithm_update': 0.005124059068151267, 'loss': 0.0075637199814257445, 'time_step': 0.005361048572034721, 'init_value': -2.1933038234710693, 'ave_value': -1.5739904804608307, 'soft_opc': nan} step=4316




2022-04-20 17:58.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175737/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:58.03 [info     ] FQE_20220420175737: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00016466824405164603, 'time_algorithm_update': 0.005073563162102757, 'loss': 0.008159899570788425, 'time_step': 0.005310677620301764, 'init_value': -2.204843521118164, 'ave_value': -1.5697749224466246, 'soft_opc': nan} step=4482




2022-04-20 17:58.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175737/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:58.04 [info     ] FQE_20220420175737: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.0001676168786474021, 'time_algorithm_update': 0.0051865994212139084, 'loss': 0.008908653911623925, 'time_step': 0.005432584199560694, 'init_value': -2.274224281311035, 'ave_value': -1.6033623423796517, 'soft_opc': nan} step=4648




2022-04-20 17:58.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175737/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:58.05 [info     ] FQE_20220420175737: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00016790556620402508, 'time_algorithm_update': 0.005087790718997817, 'loss': 0.009066550002197724, 'time_step': 0.005332332059561488, 'init_value': -2.400137424468994, 'ave_value': -1.6940450151828494, 'soft_opc': nan} step=4814




2022-04-20 17:58.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175737/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:58.06 [info     ] FQE_20220420175737: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.0001713583268314959, 'time_algorithm_update': 0.005117426435631442, 'loss': 0.009746543891273484, 'time_step': 0.005363449992903744, 'init_value': -2.4616408348083496, 'ave_value': -1.7548232790049132, 'soft_opc': nan} step=4980




2022-04-20 17:58.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175737/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:58.07 [info     ] FQE_20220420175737: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.0001678710960480104, 'time_algorithm_update': 0.005025109612798116, 'loss': 0.01012742705643177, 'time_step': 0.005265829074813659, 'init_value': -2.563119888305664, 'ave_value': -1.7979876197941669, 'soft_opc': nan} step=5146




2022-04-20 17:58.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175737/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:58.08 [info     ] FQE_20220420175737: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00016308692564447242, 'time_algorithm_update': 0.005122704678271191, 'loss': 0.010668518258039895, 'time_step': 0.005359070846833378, 'init_value': -2.6656341552734375, 'ave_value': -1.8822731132073713, 'soft_opc': nan} step=5312




2022-04-20 17:58.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175737/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:58.08 [info     ] FQE_20220420175737: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00017172888100865376, 'time_algorithm_update': 0.004554788750338267, 'loss': 0.011063349203568468, 'time_step': 0.004797204431281032, 'init_value': -2.7462689876556396, 'ave_value': -1.9395980697427246, 'soft_opc': nan} step=5478




2022-04-20 17:58.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175737/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:58.09 [info     ] FQE_20220420175737: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00016221368169210045, 'time_algorithm_update': 0.004756202180701566, 'loss': 0.012631241055429879, 'time_step': 0.004992545369159745, 'init_value': -2.85245943069458, 'ave_value': -2.0504787597134038, 'soft_opc': nan} step=5644




2022-04-20 17:58.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175737/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:58.10 [info     ] FQE_20220420175737: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.0001705310430871435, 'time_algorithm_update': 0.005144313157322895, 'loss': 0.01265012635218249, 'time_step': 0.0053940494376492785, 'init_value': -2.908722162246704, 'ave_value': -2.0681267266002323, 'soft_opc': nan} step=5810




2022-04-20 17:58.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175737/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:58.11 [info     ] FQE_20220420175737: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00016479463462369987, 'time_algorithm_update': 0.005009115460407303, 'loss': 0.013147835729149989, 'time_step': 0.005249849284987852, 'init_value': -2.9352307319641113, 'ave_value': -2.0706328705859347, 'soft_opc': nan} step=5976




2022-04-20 17:58.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175737/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:58.12 [info     ] FQE_20220420175737: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00017305454575871848, 'time_algorithm_update': 0.005115431475352092, 'loss': 0.013845282294352662, 'time_step': 0.005367682640811047, 'init_value': -3.0516011714935303, 'ave_value': -2.1177460177994525, 'soft_opc': nan} step=6142




2022-04-20 17:58.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175737/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:58.13 [info     ] FQE_20220420175737: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.0001678222633269896, 'time_algorithm_update': 0.005197470446667039, 'loss': 0.01487485849346225, 'time_step': 0.005441312330314912, 'init_value': -3.094172477722168, 'ave_value': -2.1596534738333912, 'soft_opc': nan} step=6308




2022-04-20 17:58.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175737/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:58.14 [info     ] FQE_20220420175737: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00017121900995093655, 'time_algorithm_update': 0.005186985774212573, 'loss': 0.014509511635363865, 'time_step': 0.005429691578968462, 'init_value': -3.0983874797821045, 'ave_value': -2.1506113026458937, 'soft_opc': nan} step=6474




2022-04-20 17:58.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175737/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:58.15 [info     ] FQE_20220420175737: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.0001678581697395049, 'time_algorithm_update': 0.005004595561199878, 'loss': 0.01466122612915372, 'time_step': 0.005244312516177993, 'init_value': -3.238798141479492, 'ave_value': -2.277507919704055, 'soft_opc': nan} step=6640




2022-04-20 17:58.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175737/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:58.16 [info     ] FQE_20220420175737: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00017611664461802287, 'time_algorithm_update': 0.004999597388577749, 'loss': 0.01520235466150881, 'time_step': 0.005249543362353222, 'init_value': -3.1891655921936035, 'ave_value': -2.268132803800541, 'soft_opc': nan} step=6806




2022-04-20 17:58.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175737/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:58.17 [info     ] FQE_20220420175737: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00016641473195639, 'time_algorithm_update': 0.00481370270970356, 'loss': 0.015741859938590282, 'time_step': 0.005052621106067336, 'init_value': -3.32627272605896, 'ave_value': -2.3333332035405276, 'soft_opc': nan} step=6972




2022-04-20 17:58.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175737/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:58.18 [info     ] FQE_20220420175737: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.0001635680715721774, 'time_algorithm_update': 0.0044969420835196255, 'loss': 0.016711726684420627, 'time_step': 0.004736781120300293, 'init_value': -3.4210622310638428, 'ave_value': -2.410229827812663, 'soft_opc': nan} step=7138




2022-04-20 17:58.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175737/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:58.19 [info     ] FQE_20220420175737: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00016539929861045745, 'time_algorithm_update': 0.005233016358800681, 'loss': 0.01741643061760409, 'time_step': 0.005474919296172728, 'init_value': -3.5909841060638428, 'ave_value': -2.5493768623954542, 'soft_opc': nan} step=7304




2022-04-20 17:58.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175737/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:58.20 [info     ] FQE_20220420175737: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.0001703989074890872, 'time_algorithm_update': 0.00512056896485478, 'loss': 0.01846567797194044, 'time_step': 0.005368272942232798, 'init_value': -3.5976929664611816, 'ave_value': -2.5747513220519633, 'soft_opc': nan} step=7470




2022-04-20 17:58.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175737/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:58.21 [info     ] FQE_20220420175737: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00017346100634839162, 'time_algorithm_update': 0.005026038870754012, 'loss': 0.019138749596993833, 'time_step': 0.005274914833436529, 'init_value': -3.6933555603027344, 'ave_value': -2.585589286064108, 'soft_opc': nan} step=7636




2022-04-20 17:58.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175737/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:58.22 [info     ] FQE_20220420175737: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00017397231366260942, 'time_algorithm_update': 0.005117993756949183, 'loss': 0.019844499510623156, 'time_step': 0.005368920693914574, 'init_value': -3.768949508666992, 'ave_value': -2.630194769068076, 'soft_opc': nan} step=7802




2022-04-20 17:58.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175737/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:58.23 [info     ] FQE_20220420175737: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00017565417002482587, 'time_algorithm_update': 0.00506929748029594, 'loss': 0.020855130852166427, 'time_step': 0.00532242907098977, 'init_value': -3.8779616355895996, 'ave_value': -2.7159567714240906, 'soft_opc': nan} step=7968




2022-04-20 17:58.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175737/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:58.24 [info     ] FQE_20220420175737: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00016654112252844386, 'time_algorithm_update': 0.005155152585133013, 'loss': 0.021511870404812002, 'time_step': 0.005397800939628877, 'init_value': -3.883051872253418, 'ave_value': -2.691501767980354, 'soft_opc': nan} step=8134




2022-04-20 17:58.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175737/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:58.25 [info     ] FQE_20220420175737: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00016920106956757694, 'time_algorithm_update': 0.0050009230533278135, 'loss': 0.0217942262569117, 'time_step': 0.005245508917843003, 'init_value': -3.946986198425293, 'ave_value': -2.719418384379766, 'soft_opc': nan} step=8300




2022-04-20 17:58.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175737/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.43210892e-01 -3.25999953e-02
 -1.38000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.28049971e-01  6.00000000e-01  4.67532035e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.87189108e-01  2.46000047e-02
 -8.00013420e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.61927906e-02  2.08952959e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.53789108e-01  1.32000047e-02
  8.79998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -9.71586383e-02 -6.00000000e-01 -5.33093061e-02]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.39489108e-01 -4.75999953e-02
 -9.30001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  1.41298638e-01  5.43892365e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.9573146

2022-04-20 17:58.25 [info     ] Directory is created at d3rlpy_logs/FQE_20220420175825
2022-04-20 17:58.25 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 17:58.25 [debug    ] Building models...
2022-04-20 17:58.25 [debug    ] Models have been built.
2022-04-20 17:58.25 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420175825/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 17:58.27 [info     ] FQE_20220420175825: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.0001631808835406636, 'time_algorithm_update': 0.004602094029271325, 'loss': 0.030992467021353023, 'time_step': 0.004841078852498254, 'init_value': -0.5977668166160583, 'ave_value': -0.5808881819097175, 'soft_opc': nan} step=344




2022-04-20 17:58.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175825/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:58.29 [info     ] FQE_20220420175825: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00017223247261934503, 'time_algorithm_update': 0.00507915643758552, 'loss': 0.025616978541514727, 'time_step': 0.005330819723217986, 'init_value': -1.3352503776550293, 'ave_value': -1.3216032268043172, 'soft_opc': nan} step=688




2022-04-20 17:58.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175825/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:58.31 [info     ] FQE_20220420175825: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00016738786253818247, 'time_algorithm_update': 0.005078661580418431, 'loss': 0.02976925448287105, 'time_step': 0.0053241474683894665, 'init_value': -2.291689872741699, 'ave_value': -2.287633888316047, 'soft_opc': nan} step=1032




2022-04-20 17:58.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175825/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:58.33 [info     ] FQE_20220420175825: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.0001710355281829834, 'time_algorithm_update': 0.005053551391113636, 'loss': 0.03373831404957834, 'time_step': 0.005300984133121579, 'init_value': -2.7793943881988525, 'ave_value': -2.7807691028148676, 'soft_opc': nan} step=1376




2022-04-20 17:58.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175825/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:58.35 [info     ] FQE_20220420175825: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00016887174096218374, 'time_algorithm_update': 0.004808852145838183, 'loss': 0.04457103341363024, 'time_step': 0.005052856927694276, 'init_value': -3.50644588470459, 'ave_value': -3.49433051349075, 'soft_opc': nan} step=1720




2022-04-20 17:58.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175825/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:58.37 [info     ] FQE_20220420175825: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00017231841420018397, 'time_algorithm_update': 0.004986070616300716, 'loss': 0.05810651869166556, 'time_step': 0.005234593568846237, 'init_value': -4.138350963592529, 'ave_value': -4.1436465912372675, 'soft_opc': nan} step=2064




2022-04-20 17:58.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175825/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:58.39 [info     ] FQE_20220420175825: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.0001720709856166396, 'time_algorithm_update': 0.0050589906614880225, 'loss': 0.07133195632364774, 'time_step': 0.005307940549628679, 'init_value': -4.90258264541626, 'ave_value': -4.862895582962009, 'soft_opc': nan} step=2408




2022-04-20 17:58.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175825/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:58.41 [info     ] FQE_20220420175825: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.0001717050408208093, 'time_algorithm_update': 0.005044586436693059, 'loss': 0.09071879910824951, 'time_step': 0.0052945329699405404, 'init_value': -5.347587585449219, 'ave_value': -5.234639300859055, 'soft_opc': nan} step=2752




2022-04-20 17:58.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175825/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:58.43 [info     ] FQE_20220420175825: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00016984066297841627, 'time_algorithm_update': 0.005029159229855204, 'loss': 0.10700727964556495, 'time_step': 0.005275713842968608, 'init_value': -5.852715969085693, 'ave_value': -5.713907569618308, 'soft_opc': nan} step=3096




2022-04-20 17:58.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175825/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:58.44 [info     ] FQE_20220420175825: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00016701983851055767, 'time_algorithm_update': 0.004578281280606292, 'loss': 0.12852997374504285, 'time_step': 0.004820273366085318, 'init_value': -6.525347709655762, 'ave_value': -6.319235154644897, 'soft_opc': nan} step=3440




2022-04-20 17:58.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175825/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:58.46 [info     ] FQE_20220420175825: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00017175840777020122, 'time_algorithm_update': 0.005088716745376587, 'loss': 0.1517985653846936, 'time_step': 0.005338009706763334, 'init_value': -7.129596710205078, 'ave_value': -6.993238322121335, 'soft_opc': nan} step=3784




2022-04-20 17:58.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175825/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:58.48 [info     ] FQE_20220420175825: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00017118592594945155, 'time_algorithm_update': 0.005061790000560672, 'loss': 0.1737235328669898, 'time_step': 0.005309910968292591, 'init_value': -7.5525031089782715, 'ave_value': -7.369674815618861, 'soft_opc': nan} step=4128




2022-04-20 17:58.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175825/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:58.50 [info     ] FQE_20220420175825: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00016966531443041424, 'time_algorithm_update': 0.005002839620723281, 'loss': 0.19437897934708312, 'time_step': 0.005246316277703574, 'init_value': -8.118874549865723, 'ave_value': -7.958074638776078, 'soft_opc': nan} step=4472




2022-04-20 17:58.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175825/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:58.52 [info     ] FQE_20220420175825: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00017055799794751545, 'time_algorithm_update': 0.005003114079320153, 'loss': 0.21209415881677943, 'time_step': 0.005249869684840358, 'init_value': -8.286041259765625, 'ave_value': -8.082424660361381, 'soft_opc': nan} step=4816




2022-04-20 17:58.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175825/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:58.54 [info     ] FQE_20220420175825: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00016928828039834665, 'time_algorithm_update': 0.004821163970370626, 'loss': 0.23485904584941997, 'time_step': 0.0050642753756323525, 'init_value': -8.53889274597168, 'ave_value': -8.275308785359334, 'soft_opc': nan} step=5160




2022-04-20 17:58.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175825/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:58.56 [info     ] FQE_20220420175825: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00017083453577618267, 'time_algorithm_update': 0.0050387729045956635, 'loss': 0.2573715715376704, 'time_step': 0.0052852720715278806, 'init_value': -9.180088996887207, 'ave_value': -8.953768450130818, 'soft_opc': nan} step=5504




2022-04-20 17:58.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175825/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:58.58 [info     ] FQE_20220420175825: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00016952531282291857, 'time_algorithm_update': 0.005080495462861172, 'loss': 0.27743712137994725, 'time_step': 0.005327251068381376, 'init_value': -9.118906021118164, 'ave_value': -8.92919931873503, 'soft_opc': nan} step=5848




2022-04-20 17:58.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175825/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:59.00 [info     ] FQE_20220420175825: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00017180623010147448, 'time_algorithm_update': 0.005110845316288083, 'loss': 0.29644380470867765, 'time_step': 0.005360622738682946, 'init_value': -9.541610717773438, 'ave_value': -9.267081444037592, 'soft_opc': nan} step=6192




2022-04-20 17:59.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175825/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:59.02 [info     ] FQE_20220420175825: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00017098285431085632, 'time_algorithm_update': 0.004566129556922025, 'loss': 0.3123596161170754, 'time_step': 0.004813953887584598, 'init_value': -9.864706039428711, 'ave_value': -9.659834765507798, 'soft_opc': nan} step=6536




2022-04-20 17:59.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175825/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:59.04 [info     ] FQE_20220420175825: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00017100711201512538, 'time_algorithm_update': 0.005126940649609233, 'loss': 0.32485949344456544, 'time_step': 0.005374818347221197, 'init_value': -10.466554641723633, 'ave_value': -10.30763202295303, 'soft_opc': nan} step=6880




2022-04-20 17:59.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175825/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:59.06 [info     ] FQE_20220420175825: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00017170157543448515, 'time_algorithm_update': 0.005011007536289303, 'loss': 0.3430388404576238, 'time_step': 0.005262163489363914, 'init_value': -10.95386028289795, 'ave_value': -10.854681803894373, 'soft_opc': nan} step=7224




2022-04-20 17:59.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175825/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:59.08 [info     ] FQE_20220420175825: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00017625509306441907, 'time_algorithm_update': 0.005060782959294873, 'loss': 0.35358246755894535, 'time_step': 0.005314250325047692, 'init_value': -11.126879692077637, 'ave_value': -11.044474704222132, 'soft_opc': nan} step=7568




2022-04-20 17:59.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175825/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:59.10 [info     ] FQE_20220420175825: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.0001728534698486328, 'time_algorithm_update': 0.0049536595510882, 'loss': 0.361633998980789, 'time_step': 0.005204603422519772, 'init_value': -11.621957778930664, 'ave_value': -11.500124400652744, 'soft_opc': nan} step=7912




2022-04-20 17:59.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175825/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:59.12 [info     ] FQE_20220420175825: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.0001697242259979248, 'time_algorithm_update': 0.004682378020397452, 'loss': 0.3742084462780419, 'time_step': 0.004930615425109863, 'init_value': -11.956991195678711, 'ave_value': -11.76521582567628, 'soft_opc': nan} step=8256




2022-04-20 17:59.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175825/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:59.14 [info     ] FQE_20220420175825: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.0001754691434461017, 'time_algorithm_update': 0.005088202482046083, 'loss': 0.37661575456691343, 'time_step': 0.0053395102190416915, 'init_value': -11.936391830444336, 'ave_value': -11.723096954955107, 'soft_opc': nan} step=8600




2022-04-20 17:59.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175825/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:59.16 [info     ] FQE_20220420175825: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00017038611478583756, 'time_algorithm_update': 0.005073892515759135, 'loss': 0.3909093758502845, 'time_step': 0.0053205829720164455, 'init_value': -12.402193069458008, 'ave_value': -12.279901209074652, 'soft_opc': nan} step=8944




2022-04-20 17:59.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175825/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:59.18 [info     ] FQE_20220420175825: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00016900342564250148, 'time_algorithm_update': 0.005108535289764404, 'loss': 0.3943162585370416, 'time_step': 0.005354708017304886, 'init_value': -12.799628257751465, 'ave_value': -12.675059458948589, 'soft_opc': nan} step=9288




2022-04-20 17:59.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175825/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:59.19 [info     ] FQE_20220420175825: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00016895352407943372, 'time_algorithm_update': 0.004548491433609364, 'loss': 0.40127508279455953, 'time_step': 0.004795923482540042, 'init_value': -13.000473976135254, 'ave_value': -12.873973686299244, 'soft_opc': nan} step=9632




2022-04-20 17:59.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175825/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:59.21 [info     ] FQE_20220420175825: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00016962511594905409, 'time_algorithm_update': 0.0051070022028546, 'loss': 0.40636449331027824, 'time_step': 0.005353576222131419, 'init_value': -13.178244590759277, 'ave_value': -13.132011510343668, 'soft_opc': nan} step=9976




2022-04-20 17:59.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175825/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:59.23 [info     ] FQE_20220420175825: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.0001738265503284543, 'time_algorithm_update': 0.005080548829810564, 'loss': 0.41274902960935306, 'time_step': 0.005332684794137644, 'init_value': -13.642319679260254, 'ave_value': -13.791507175517781, 'soft_opc': nan} step=10320




2022-04-20 17:59.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175825/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:59.25 [info     ] FQE_20220420175825: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.0001708539419395979, 'time_algorithm_update': 0.005036679811255876, 'loss': 0.42287430876519444, 'time_step': 0.005282329265461411, 'init_value': -13.740413665771484, 'ave_value': -14.00493445154699, 'soft_opc': nan} step=10664




2022-04-20 17:59.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175825/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:59.27 [info     ] FQE_20220420175825: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00016982264296953067, 'time_algorithm_update': 0.005101734815641891, 'loss': 0.42118763388676006, 'time_step': 0.005349021318346955, 'init_value': -13.690690040588379, 'ave_value': -14.220832352416993, 'soft_opc': nan} step=11008




2022-04-20 17:59.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175825/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:59.29 [info     ] FQE_20220420175825: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00017102651817854062, 'time_algorithm_update': 0.004644306593163069, 'loss': 0.42057658720025143, 'time_step': 0.004889326040134873, 'init_value': -13.750778198242188, 'ave_value': -14.509251065569016, 'soft_opc': nan} step=11352




2022-04-20 17:59.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175825/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:59.31 [info     ] FQE_20220420175825: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00017212781795235567, 'time_algorithm_update': 0.005204053812248762, 'loss': 0.4140550930616121, 'time_step': 0.005454319161038066, 'init_value': -13.364158630371094, 'ave_value': -14.297068039134045, 'soft_opc': nan} step=11696




2022-04-20 17:59.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175825/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:59.33 [info     ] FQE_20220420175825: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00017255752585655036, 'time_algorithm_update': 0.005227515863817792, 'loss': 0.4119947933331998, 'time_step': 0.005479156970977783, 'init_value': -13.236959457397461, 'ave_value': -14.373332969397978, 'soft_opc': nan} step=12040




2022-04-20 17:59.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175825/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:59.35 [info     ] FQE_20220420175825: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00017064671183741369, 'time_algorithm_update': 0.0051789942175843, 'loss': 0.4118457776710911, 'time_step': 0.0054261185402093935, 'init_value': -13.220218658447266, 'ave_value': -14.667073692032346, 'soft_opc': nan} step=12384




2022-04-20 17:59.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175825/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:59.37 [info     ] FQE_20220420175825: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00017179652701976688, 'time_algorithm_update': 0.004831817954085594, 'loss': 0.409765946115692, 'time_step': 0.005079085743704507, 'init_value': -13.120461463928223, 'ave_value': -14.680883145401136, 'soft_opc': nan} step=12728




2022-04-20 17:59.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175825/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:59.39 [info     ] FQE_20220420175825: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.0001756576604621355, 'time_algorithm_update': 0.005487240331117497, 'loss': 0.41487928052151274, 'time_step': 0.005746282117311345, 'init_value': -13.121418952941895, 'ave_value': -15.008574694561194, 'soft_opc': nan} step=13072




2022-04-20 17:59.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175825/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:59.41 [info     ] FQE_20220420175825: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.0001767236132954442, 'time_algorithm_update': 0.0054518760636795395, 'loss': 0.4218913537374329, 'time_step': 0.0057084629702013594, 'init_value': -13.429303169250488, 'ave_value': -15.541961610805965, 'soft_opc': nan} step=13416




2022-04-20 17:59.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175825/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:59.43 [info     ] FQE_20220420175825: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00017660925554674725, 'time_algorithm_update': 0.005059949880422548, 'loss': 0.4180978967504966, 'time_step': 0.005314657854479413, 'init_value': -13.251572608947754, 'ave_value': -15.517103624149216, 'soft_opc': nan} step=13760




2022-04-20 17:59.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175825/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:59.45 [info     ] FQE_20220420175825: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00017583439516466717, 'time_algorithm_update': 0.005305658246195594, 'loss': 0.4206720034505219, 'time_step': 0.005558621744776881, 'init_value': -13.417167663574219, 'ave_value': -15.938483659182214, 'soft_opc': nan} step=14104




2022-04-20 17:59.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175825/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:59.47 [info     ] FQE_20220420175825: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00017263168512388717, 'time_algorithm_update': 0.004906658516373745, 'loss': 0.42345030361591557, 'time_step': 0.005156499701876973, 'init_value': -13.263484001159668, 'ave_value': -15.879740271643476, 'soft_opc': nan} step=14448




2022-04-20 17:59.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175825/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:59.49 [info     ] FQE_20220420175825: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.0001706259195194688, 'time_algorithm_update': 0.005318609087966209, 'loss': 0.4212055192545576, 'time_step': 0.005567946406297906, 'init_value': -13.195419311523438, 'ave_value': -15.891329924838601, 'soft_opc': nan} step=14792




2022-04-20 17:59.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175825/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:59.52 [info     ] FQE_20220420175825: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00017042076864907908, 'time_algorithm_update': 0.005382201006246167, 'loss': 0.42563307402265627, 'time_step': 0.005631877932437631, 'init_value': -13.289549827575684, 'ave_value': -16.157085065314178, 'soft_opc': nan} step=15136




2022-04-20 17:59.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175825/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:59.54 [info     ] FQE_20220420175825: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00017238425654034282, 'time_algorithm_update': 0.005311058011165885, 'loss': 0.4302323317252706, 'time_step': 0.005559960076975268, 'init_value': -12.982536315917969, 'ave_value': -16.023048627396694, 'soft_opc': nan} step=15480




2022-04-20 17:59.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175825/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:59.55 [info     ] FQE_20220420175825: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00017163850540338562, 'time_algorithm_update': 0.004743239907331245, 'loss': 0.4419589635190489, 'time_step': 0.004991384439690169, 'init_value': -12.990428924560547, 'ave_value': -16.239769132510414, 'soft_opc': nan} step=15824




2022-04-20 17:59.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175825/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:59.58 [info     ] FQE_20220420175825: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00017505468324173327, 'time_algorithm_update': 0.005664478207743445, 'loss': 0.4470090720271908, 'time_step': 0.005919609652009121, 'init_value': -12.89937973022461, 'ave_value': -16.25234349620369, 'soft_opc': nan} step=16168




2022-04-20 17:59.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175825/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:00.00 [info     ] FQE_20220420175825: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.0001727543597997621, 'time_algorithm_update': 0.005306117756422176, 'loss': 0.4539510836651505, 'time_step': 0.005556261816690135, 'init_value': -12.994293212890625, 'ave_value': -16.33187097570154, 'soft_opc': nan} step=16512




2022-04-20 18:00.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175825/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:00.02 [info     ] FQE_20220420175825: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00017361308253088662, 'time_algorithm_update': 0.005477723687194114, 'loss': 0.46617352843912707, 'time_step': 0.005730462628741597, 'init_value': -13.175893783569336, 'ave_value': -16.62928345208979, 'soft_opc': nan} step=16856




2022-04-20 18:00.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175825/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:00.04 [info     ] FQE_20220420175825: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.0001755315203999364, 'time_algorithm_update': 0.0054265184457912, 'loss': 0.4795963963504534, 'time_step': 0.0056805548279784445, 'init_value': -13.26073932647705, 'ave_value': -16.764065910567872, 'soft_opc': nan} step=17200




2022-04-20 18:00.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175825/model_17200.pt
search iteration:  30
using hyper params:  [0.006837866104683946, 0.0003198167097881431, 4.6615184533159966e-05, 1]
2022-04-20 18:00.04 [debug    ] RoundIterator is selected.
2022-04-20 18:00.04 [info     ] Directory is created at d3rlpy_logs/TD3PlusBC_20220420180004
2022-04-20 18:00.04 [debug    ] Fitting scaler...              scaler=standard
2022-04-20 18:00.04 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 18:00.04 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 18:00.04 [info     ] Parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180004/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.006837866104

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:00.08 [info     ] TD3PlusBC_20220420180004: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.00032875900380095546, 'time_algorithm_update': 0.008982930964196634, 'critic_loss': 1.9762936491105292, 'actor_loss': 0.5545810558206854, 'time_step': 0.009395229189019454, 'td_error': 0.7934011710479713, 'init_value': -0.48808765411376953, 'ave_value': 0.13365903696259232} step=342
2022-04-20 18:00.08 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180004/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:00.12 [info     ] TD3PlusBC_20220420180004: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.00033530645203172114, 'time_algorithm_update': 0.009711632254528023, 'critic_loss': 0.28469245270853155, 'actor_loss': 0.055124725645397145, 'time_step': 0.010128067250837359, 'td_error': 0.809808814026115, 'init_value': -0.7325807809829712, 'ave_value': 0.16894400065435466} step=684
2022-04-20 18:00.12 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180004/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:00.16 [info     ] TD3PlusBC_20220420180004: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.00033144713842380813, 'time_algorithm_update': 0.008759580160442152, 'critic_loss': 0.27052944375757587, 'actor_loss': 0.06526917465336142, 'time_step': 0.00917245212354158, 'td_error': 0.8096190610361532, 'init_value': -1.0068546533584595, 'ave_value': 0.22461070550551118} step=1026
2022-04-20 18:00.16 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180004/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:00.20 [info     ] TD3PlusBC_20220420180004: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.00033337540096706815, 'time_algorithm_update': 0.009323246994910881, 'critic_loss': 0.279825198371508, 'actor_loss': 0.053837002674390004, 'time_step': 0.009739134046766493, 'td_error': 0.8094810167621205, 'init_value': -1.284947395324707, 'ave_value': 0.2813145920948486} step=1368
2022-04-20 18:00.20 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180004/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:00.23 [info     ] TD3PlusBC_20220420180004: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.00033578956336305853, 'time_algorithm_update': 0.009510621689913566, 'critic_loss': 0.30812419587147166, 'actor_loss': 0.03872836768975732, 'time_step': 0.009928203465645774, 'td_error': 0.8112945320475712, 'init_value': -1.546419382095337, 'ave_value': 0.36431292425939676} step=1710
2022-04-20 18:00.23 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180004/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:00.27 [info     ] TD3PlusBC_20220420180004: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.00033890294749834386, 'time_algorithm_update': 0.00936992056885658, 'critic_loss': 0.3485489038434642, 'actor_loss': 0.05205601207606974, 'time_step': 0.00979122223212705, 'td_error': 0.8183636886827731, 'init_value': -1.8758022785186768, 'ave_value': 0.4022335164998406} step=2052
2022-04-20 18:00.27 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180004/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:00.31 [info     ] TD3PlusBC_20220420180004: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.0003331990269889608, 'time_algorithm_update': 0.009282374242592973, 'critic_loss': 0.3998923814087583, 'actor_loss': 0.042687957289448956, 'time_step': 0.00969861752805654, 'td_error': 0.8263989495767133, 'init_value': -2.142066478729248, 'ave_value': 0.47607701017231857} step=2394
2022-04-20 18:00.31 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180004/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:00.35 [info     ] TD3PlusBC_20220420180004: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.0003303721634268064, 'time_algorithm_update': 0.008698520604629962, 'critic_loss': 0.4500514092631856, 'actor_loss': 0.0633135053018729, 'time_step': 0.009109984364425927, 'td_error': 0.8388871289509802, 'init_value': -2.425985097885132, 'ave_value': 0.5486202960946337} step=2736
2022-04-20 18:00.35 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180004/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:00.39 [info     ] TD3PlusBC_20220420180004: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.00033093823326958553, 'time_algorithm_update': 0.009031988723933349, 'critic_loss': 0.504178949441129, 'actor_loss': 0.053922472358272786, 'time_step': 0.00944482373912432, 'td_error': 0.8529278802414146, 'init_value': -2.760878086090088, 'ave_value': 0.5900467411318358} step=3078
2022-04-20 18:00.39 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180004/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:00.42 [info     ] TD3PlusBC_20220420180004: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.00032803259397807876, 'time_algorithm_update': 0.008987269206353796, 'critic_loss': 0.5608199638319992, 'actor_loss': 0.06130513692634147, 'time_step': 0.009395997426663225, 'td_error': 0.872245349418326, 'init_value': -3.0373425483703613, 'ave_value': 0.667657034282325} step=3420
2022-04-20 18:00.42 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180004/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:00.46 [info     ] TD3PlusBC_20220420180004: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.00032927697164970534, 'time_algorithm_update': 0.008498594077707033, 'critic_loss': 0.6132396613165998, 'actor_loss': 0.06760260835289955, 'time_step': 0.008907303475497062, 'td_error': 0.8916533541560004, 'init_value': -3.3882713317871094, 'ave_value': 0.6688311106407059} step=3762
2022-04-20 18:00.46 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180004/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:00.50 [info     ] TD3PlusBC_20220420180004: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.0003278429745233547, 'time_algorithm_update': 0.008956182769864623, 'critic_loss': 0.6740266645860951, 'actor_loss': 0.08189462977107505, 'time_step': 0.009366423763029756, 'td_error': 0.9195853362799706, 'init_value': -3.6624019145965576, 'ave_value': 0.7547660050799164} step=4104
2022-04-20 18:00.50 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180004/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:00.53 [info     ] TD3PlusBC_20220420180004: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.0003317747896874857, 'time_algorithm_update': 0.008835924299139725, 'critic_loss': 0.7400784836700786, 'actor_loss': 0.06804487623317897, 'time_step': 0.009249515003628202, 'td_error': 0.9473573216479273, 'init_value': -3.973878860473633, 'ave_value': 0.8185442482192606} step=4446
2022-04-20 18:00.53 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180004/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:00.57 [info     ] TD3PlusBC_20220420180004: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.0003301713898865103, 'time_algorithm_update': 0.00907619863922833, 'critic_loss': 0.7932224134037718, 'actor_loss': 0.07288320122929344, 'time_step': 0.009487935674120809, 'td_error': 0.9737349101262806, 'init_value': -4.294371604919434, 'ave_value': 0.8325361661123833} step=4788
2022-04-20 18:00.57 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180004/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:01.01 [info     ] TD3PlusBC_20220420180004: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.00033371420631631775, 'time_algorithm_update': 0.009052328896104243, 'critic_loss': 0.8576302316628004, 'actor_loss': 0.07806454836363681, 'time_step': 0.009467870868437471, 'td_error': 1.0079363686182878, 'init_value': -4.5544586181640625, 'ave_value': 0.9366327893112257} step=5130
2022-04-20 18:01.01 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180004/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:01.04 [info     ] TD3PlusBC_20220420180004: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.00033951572507445575, 'time_algorithm_update': 0.00859678558438842, 'critic_loss': 0.9125631282552641, 'actor_loss': 0.0629595880837817, 'time_step': 0.009016432260212145, 'td_error': 1.0425476496447732, 'init_value': -4.868735313415527, 'ave_value': 0.9479187675744194} step=5472
2022-04-20 18:01.04 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180004/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:01.08 [info     ] TD3PlusBC_20220420180004: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.00032450441728558454, 'time_algorithm_update': 0.008924543509009288, 'critic_loss': 0.9526755732228185, 'actor_loss': 0.09031954146268074, 'time_step': 0.009329561601605332, 'td_error': 1.0791811485730767, 'init_value': -5.1706438064575195, 'ave_value': 1.0074225657496199} step=5814
2022-04-20 18:01.08 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180004/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:01.12 [info     ] TD3PlusBC_20220420180004: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.0003338613008197985, 'time_algorithm_update': 0.008885701497395834, 'critic_loss': 1.027795994033416, 'actor_loss': 0.07339737285473193, 'time_step': 0.009302261977167854, 'td_error': 1.1180631282092341, 'init_value': -5.432162284851074, 'ave_value': 1.0748441866068756} step=6156
2022-04-20 18:01.12 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180004/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:01.16 [info     ] TD3PlusBC_20220420180004: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.0003306531069571512, 'time_algorithm_update': 0.00886249960514537, 'critic_loss': 1.0886323788361243, 'actor_loss': 0.09275836451796063, 'time_step': 0.009274137647528397, 'td_error': 1.1681592159871679, 'init_value': -5.75469446182251, 'ave_value': 1.1198989569738524} step=6498
2022-04-20 18:01.16 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180004/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:01.19 [info     ] TD3PlusBC_20220420180004: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.0003279600924218607, 'time_algorithm_update': 0.008880466048480475, 'critic_loss': 1.1458223186956162, 'actor_loss': 0.06699524349287937, 'time_step': 0.00928935948868244, 'td_error': 1.2111850798920512, 'init_value': -6.062450408935547, 'ave_value': 1.16947983241489} step=6840
2022-04-20 18:01.19 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180004/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:01.23 [info     ] TD3PlusBC_20220420180004: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.0003385007032874035, 'time_algorithm_update': 0.008636795986465544, 'critic_loss': 1.1984805228155957, 'actor_loss': 0.08095134681428386, 'time_step': 0.00905637782916688, 'td_error': 1.2579202390637099, 'init_value': -6.338275909423828, 'ave_value': 1.2070088356763946} step=7182
2022-04-20 18:01.23 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180004/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:01.27 [info     ] TD3PlusBC_20220420180004: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.0003350025031998841, 'time_algorithm_update': 0.008945626822131418, 'critic_loss': 1.2427585361558093, 'actor_loss': 0.07277999855360093, 'time_step': 0.009364569396303412, 'td_error': 1.3103500136363837, 'init_value': -6.613104820251465, 'ave_value': 1.2556625060420818} step=7524
2022-04-20 18:01.27 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180004/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:01.30 [info     ] TD3PlusBC_20220420180004: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.0003292246868735866, 'time_algorithm_update': 0.008993718359205458, 'critic_loss': 1.2895228165632102, 'actor_loss': 0.09477578648175412, 'time_step': 0.009405387772454156, 'td_error': 1.3618078204437278, 'init_value': -6.858163356781006, 'ave_value': 1.3424549590965296} step=7866
2022-04-20 18:01.30 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180004/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:01.34 [info     ] TD3PlusBC_20220420180004: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.0003365926575242427, 'time_algorithm_update': 0.008633063550581011, 'critic_loss': 1.3584618514392808, 'actor_loss': 0.08941507058446868, 'time_step': 0.00905100365131222, 'td_error': 1.4205933259213086, 'init_value': -7.218687534332275, 'ave_value': 1.3315602896988459} step=8208
2022-04-20 18:01.34 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180004/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:01.38 [info     ] TD3PlusBC_20220420180004: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.0003311452809830158, 'time_algorithm_update': 0.008964416576407806, 'critic_loss': 1.4170376267175229, 'actor_loss': 0.08216773666310728, 'time_step': 0.009375521314074422, 'td_error': 1.4766647301505231, 'init_value': -7.475822448730469, 'ave_value': 1.3910191115074968} step=8550
2022-04-20 18:01.38 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180004/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:01.41 [info     ] TD3PlusBC_20220420180004: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.0003348756254765025, 'time_algorithm_update': 0.008641215095743102, 'critic_loss': 1.4769488788749043, 'actor_loss': 0.09983717634925368, 'time_step': 0.00905859330941362, 'td_error': 1.5337781798880419, 'init_value': -7.753195285797119, 'ave_value': 1.4332321575852867} step=8892
2022-04-20 18:01.41 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180004/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:01.45 [info     ] TD3PlusBC_20220420180004: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.0003332457347222936, 'time_algorithm_update': 0.00905374058505945, 'critic_loss': 1.521108629410727, 'actor_loss': 0.08383472928265381, 'time_step': 0.009468436241149902, 'td_error': 1.595800328970115, 'init_value': -8.052279472351074, 'ave_value': 1.4718641042202576} step=9234
2022-04-20 18:01.45 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180004/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:01.49 [info     ] TD3PlusBC_20220420180004: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.0003317008938705712, 'time_algorithm_update': 0.008953756756252713, 'critic_loss': 1.5788183440590462, 'actor_loss': 0.09968338394809885, 'time_step': 0.00936865736866555, 'td_error': 1.6508889070066382, 'init_value': -8.357858657836914, 'ave_value': 1.4695671202448775} step=9576
2022-04-20 18:01.49 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180004/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:01.52 [info     ] TD3PlusBC_20220420180004: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.0003325562728078742, 'time_algorithm_update': 0.008496520812051338, 'critic_loss': 1.6190102476822703, 'actor_loss': 0.08541884005331156, 'time_step': 0.008910271856519911, 'td_error': 1.7173263041167668, 'init_value': -8.597593307495117, 'ave_value': 1.5355764312976719} step=9918
2022-04-20 18:01.52 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180004/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:01.56 [info     ] TD3PlusBC_20220420180004: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.00033694191982871607, 'time_algorithm_update': 0.008997419424224318, 'critic_loss': 1.6768346406650125, 'actor_loss': 0.07990099650900266, 'time_step': 0.009417232017070926, 'td_error': 1.7879225648581754, 'init_value': -8.83894157409668, 'ave_value': 1.6295173548405721} step=10260
2022-04-20 18:01.56 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180004/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:02.00 [info     ] TD3PlusBC_20220420180004: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.0003345458828217802, 'time_algorithm_update': 0.00882524565646523, 'critic_loss': 1.7339081110225782, 'actor_loss': 0.07895612980277218, 'time_step': 0.009243720456173546, 'td_error': 1.851796716640585, 'init_value': -9.148418426513672, 'ave_value': 1.6282380420749856} step=10602
2022-04-20 18:02.00 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180004/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:02.04 [info     ] TD3PlusBC_20220420180004: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.0003367369635063305, 'time_algorithm_update': 0.008882325295119257, 'critic_loss': 1.7847693023538729, 'actor_loss': 0.10059678284396902, 'time_step': 0.009300197077076338, 'td_error': 1.918173791955859, 'init_value': -9.37026309967041, 'ave_value': 1.6888473564085156} step=10944
2022-04-20 18:02.04 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180004/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:02.07 [info     ] TD3PlusBC_20220420180004: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.00033166952300489995, 'time_algorithm_update': 0.008892736936870375, 'critic_loss': 1.8307072599118912, 'actor_loss': 0.0852235698560525, 'time_step': 0.009306826786688196, 'td_error': 1.991877932901091, 'init_value': -9.623007774353027, 'ave_value': 1.7572297038023805} step=11286
2022-04-20 18:02.07 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180004/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:02.11 [info     ] TD3PlusBC_20220420180004: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.00033305611526756955, 'time_algorithm_update': 0.008469127075016847, 'critic_loss': 1.8888380695679035, 'actor_loss': 0.09665887571914851, 'time_step': 0.008884494067632665, 'td_error': 2.061061366824648, 'init_value': -9.909897804260254, 'ave_value': 1.76115211626154} step=11628
2022-04-20 18:02.11 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180004/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:02.15 [info     ] TD3PlusBC_20220420180004: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.0003340453432317366, 'time_algorithm_update': 0.008906736011393586, 'critic_loss': 1.9291206634881204, 'actor_loss': 0.1050076670291131, 'time_step': 0.009321191157513892, 'td_error': 2.145377323515856, 'init_value': -10.201070785522461, 'ave_value': 1.7987766606634616} step=11970
2022-04-20 18:02.15 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180004/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:02.18 [info     ] TD3PlusBC_20220420180004: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.0003342816704197934, 'time_algorithm_update': 0.00887387816668951, 'critic_loss': 1.9817182366349544, 'actor_loss': 0.07453107685722106, 'time_step': 0.009288245474385936, 'td_error': 2.2123946424617276, 'init_value': -10.420900344848633, 'ave_value': 1.8632203889520722} step=12312
2022-04-20 18:02.18 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180004/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:02.22 [info     ] TD3PlusBC_20220420180004: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.0003354012617590832, 'time_algorithm_update': 0.008781626907705564, 'critic_loss': 2.028441081008716, 'actor_loss': 0.09392083349109392, 'time_step': 0.009197242775855706, 'td_error': 2.283679643038969, 'init_value': -10.685739517211914, 'ave_value': 1.8860980675167296} step=12654
2022-04-20 18:02.22 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180004/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:02.26 [info     ] TD3PlusBC_20220420180004: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.00033015187023675927, 'time_algorithm_update': 0.008911636140611436, 'critic_loss': 2.07473340678459, 'actor_loss': 0.08298634837942513, 'time_step': 0.00932302461032979, 'td_error': 2.369013191705555, 'init_value': -10.93644905090332, 'ave_value': 1.9191468871856394} step=12996
2022-04-20 18:02.26 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180004/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:02.29 [info     ] TD3PlusBC_20220420180004: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.0003321310232954416, 'time_algorithm_update': 0.008453052643446894, 'critic_loss': 2.120637445683368, 'actor_loss': 0.09033230888216119, 'time_step': 0.00886472693660803, 'td_error': 2.448835084749196, 'init_value': -11.211564064025879, 'ave_value': 1.9661273679409914} step=13338
2022-04-20 18:02.29 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180004/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:02.33 [info     ] TD3PlusBC_20220420180004: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.0003365835948297155, 'time_algorithm_update': 0.009029055199427911, 'critic_loss': 2.1784357387158604, 'actor_loss': 0.07376855864510899, 'time_step': 0.009448958419219793, 'td_error': 2.514603055440235, 'init_value': -11.4287748336792, 'ave_value': 2.0104689374716265} step=13680
2022-04-20 18:02.33 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180004/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:02.37 [info     ] TD3PlusBC_20220420180004: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.00033600706803171257, 'time_algorithm_update': 0.009012851101613184, 'critic_loss': 2.2218111272008096, 'actor_loss': 0.10983869834252966, 'time_step': 0.00942968067369963, 'td_error': 2.616975810716513, 'init_value': -11.667399406433105, 'ave_value': 2.0351669743110485} step=14022
2022-04-20 18:02.37 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180004/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:02.40 [info     ] TD3PlusBC_20220420180004: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.00033180476629246047, 'time_algorithm_update': 0.006917312131290547, 'critic_loss': 2.264025649872788, 'actor_loss': 0.08415735900750634, 'time_step': 0.007330051639623809, 'td_error': 2.69318671308121, 'init_value': -11.893290519714355, 'ave_value': 2.0900265172186} step=14364
2022-04-20 18:02.40 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180004/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:02.43 [info     ] TD3PlusBC_20220420180004: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.00033204876191434806, 'time_algorithm_update': 0.007006885015476517, 'critic_loss': 2.3152268965516174, 'actor_loss': 0.11153841678772056, 'time_step': 0.00742103412137394, 'td_error': 2.7782896120997482, 'init_value': -12.147007942199707, 'ave_value': 2.147364779797861} step=14706
2022-04-20 18:02.43 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180004/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:02.46 [info     ] TD3PlusBC_20220420180004: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.00032842577549449184, 'time_algorithm_update': 0.007008999411822759, 'critic_loss': 2.3568978016400894, 'actor_loss': 0.09687012246651956, 'time_step': 0.007415575590747142, 'td_error': 2.8630060262826738, 'init_value': -12.400594711303711, 'ave_value': 2.153185684437788} step=15048
2022-04-20 18:02.46 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180004/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:02.48 [info     ] TD3PlusBC_20220420180004: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.00033188633054320575, 'time_algorithm_update': 0.006965462924444187, 'critic_loss': 2.3776749979421408, 'actor_loss': 0.09342111250636173, 'time_step': 0.007379550682870965, 'td_error': 2.942493947955879, 'init_value': -12.638440132141113, 'ave_value': 2.188660737200061} step=15390
2022-04-20 18:02.49 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180004/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:02.51 [info     ] TD3PlusBC_20220420180004: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.00033166255170141744, 'time_algorithm_update': 0.007038427375213444, 'critic_loss': 2.4481703891740207, 'actor_loss': 0.0909424482120408, 'time_step': 0.007453462533783494, 'td_error': 3.034792742782758, 'init_value': -12.878191947937012, 'ave_value': 2.216853846927962} step=15732
2022-04-20 18:02.51 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180004/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:02.54 [info     ] TD3PlusBC_20220420180004: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.0003338306270844755, 'time_algorithm_update': 0.007036439856590583, 'critic_loss': 2.4919356087669295, 'actor_loss': 0.10825586096759428, 'time_step': 0.007451719010782521, 'td_error': 3.1142990756428586, 'init_value': -13.101840019226074, 'ave_value': 2.2518864295381196} step=16074
2022-04-20 18:02.54 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180004/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:02.57 [info     ] TD3PlusBC_20220420180004: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.00033097587830839103, 'time_algorithm_update': 0.006971510530215258, 'critic_loss': 2.5334351936801833, 'actor_loss': 0.10767228487465116, 'time_step': 0.007383493652120668, 'td_error': 3.1971577299423686, 'init_value': -13.245953559875488, 'ave_value': 2.339223818041401} step=16416
2022-04-20 18:02.57 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180004/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:03.00 [info     ] TD3PlusBC_20220420180004: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.00033029966187058833, 'time_algorithm_update': 0.006996664387440821, 'critic_loss': 2.5896213691659837, 'actor_loss': 0.09087731610787542, 'time_step': 0.007407095000060678, 'td_error': 3.287055158126084, 'init_value': -13.521924018859863, 'ave_value': 2.3323186679867827} step=16758
2022-04-20 18:03.00 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180004/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:03.03 [info     ] TD3PlusBC_20220420180004: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.00034112679330926195, 'time_algorithm_update': 0.007129007612752636, 'critic_loss': 2.6277591783923713, 'actor_loss': 0.09226187834875625, 'time_step': 0.0075546893460011625, 'td_error': 3.380034886167641, 'init_value': -13.715337753295898, 'ave_value': 2.3942658649676956} step=17100
2022-04-20 18:03.03 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180004/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 102 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.28189108e-

Epoch 1/50:   0%|          | 0/177 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 18:03.04 [info     ] FQE_20220420180303: epoch=1 step=177 epoch=1 metrics={'time_sample_batch': 0.00016421652109609486, 'time_algorithm_update': 0.0037083464153742384, 'loss': 0.00856910270275706, 'time_step': 0.003945638904463773, 'init_value': -0.08470303565263748, 'ave_value': -0.006916045199788816, 'soft_opc': nan} step=177




2022-04-20 18:03.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180303/model_177.pt


Epoch 2/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:03.05 [info     ] FQE_20220420180303: epoch=2 step=354 epoch=2 metrics={'time_sample_batch': 0.00015851737415723208, 'time_algorithm_update': 0.0036010594017761577, 'loss': 0.006503641464626469, 'time_step': 0.003834553357571532, 'init_value': -0.26791104674339294, 'ave_value': -0.12579157441524458, 'soft_opc': nan} step=354




2022-04-20 18:03.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180303/model_354.pt


Epoch 3/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:03.06 [info     ] FQE_20220420180303: epoch=3 step=531 epoch=3 metrics={'time_sample_batch': 0.00016264996286165917, 'time_algorithm_update': 0.0035950073414603195, 'loss': 0.005855338975503031, 'time_step': 0.0038304504028147898, 'init_value': -0.39942774176597595, 'ave_value': -0.20111616151994682, 'soft_opc': nan} step=531




2022-04-20 18:03.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180303/model_531.pt


Epoch 4/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:03.06 [info     ] FQE_20220420180303: epoch=4 step=708 epoch=4 metrics={'time_sample_batch': 0.00016450612558483404, 'time_algorithm_update': 0.0036615880869202695, 'loss': 0.005442511773073656, 'time_step': 0.003895340666259076, 'init_value': -0.49837246537208557, 'ave_value': -0.26779665648937223, 'soft_opc': nan} step=708




2022-04-20 18:03.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180303/model_708.pt


Epoch 5/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:03.07 [info     ] FQE_20220420180303: epoch=5 step=885 epoch=5 metrics={'time_sample_batch': 0.0001688299879515912, 'time_algorithm_update': 0.0037037046615686796, 'loss': 0.005301305474345125, 'time_step': 0.00395291673261567, 'init_value': -0.5147565603256226, 'ave_value': -0.2703505679086835, 'soft_opc': nan} step=885




2022-04-20 18:03.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180303/model_885.pt


Epoch 6/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:03.08 [info     ] FQE_20220420180303: epoch=6 step=1062 epoch=6 metrics={'time_sample_batch': 0.0001628304605430129, 'time_algorithm_update': 0.003648337671312235, 'loss': 0.005034443668463388, 'time_step': 0.0038802920088256147, 'init_value': -0.5708940625190735, 'ave_value': -0.29633290336296725, 'soft_opc': nan} step=1062




2022-04-20 18:03.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180303/model_1062.pt


Epoch 7/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:03.09 [info     ] FQE_20220420180303: epoch=7 step=1239 epoch=7 metrics={'time_sample_batch': 0.00016182425331934697, 'time_algorithm_update': 0.0036224645409880384, 'loss': 0.00469016179963315, 'time_step': 0.003857289330433991, 'init_value': -0.582520067691803, 'ave_value': -0.2945845263157118, 'soft_opc': nan} step=1239




2022-04-20 18:03.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180303/model_1239.pt


Epoch 8/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:03.09 [info     ] FQE_20220420180303: epoch=8 step=1416 epoch=8 metrics={'time_sample_batch': 0.00016626799847446592, 'time_algorithm_update': 0.0035119676320566296, 'loss': 0.004292330129787663, 'time_step': 0.0037525777762892555, 'init_value': -0.6002057194709778, 'ave_value': -0.30091317359682496, 'soft_opc': nan} step=1416




2022-04-20 18:03.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180303/model_1416.pt


Epoch 9/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:03.10 [info     ] FQE_20220420180303: epoch=9 step=1593 epoch=9 metrics={'time_sample_batch': 0.00016374372493075786, 'time_algorithm_update': 0.0035860551952642235, 'loss': 0.004128922017733172, 'time_step': 0.003822857377219335, 'init_value': -0.6297325491905212, 'ave_value': -0.3232294196179053, 'soft_opc': nan} step=1593




2022-04-20 18:03.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180303/model_1593.pt


Epoch 10/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:03.11 [info     ] FQE_20220420180303: epoch=10 step=1770 epoch=10 metrics={'time_sample_batch': 0.00016554061975856285, 'time_algorithm_update': 0.003487275818646964, 'loss': 0.0040636828832202035, 'time_step': 0.0037256189658816923, 'init_value': -0.705457866191864, 'ave_value': -0.3752623899775493, 'soft_opc': nan} step=1770




2022-04-20 18:03.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180303/model_1770.pt


Epoch 11/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:03.12 [info     ] FQE_20220420180303: epoch=11 step=1947 epoch=11 metrics={'time_sample_batch': 0.0001662450995148912, 'time_algorithm_update': 0.003579375434056514, 'loss': 0.004133392324655364, 'time_step': 0.0038214376417257017, 'init_value': -0.7445652484893799, 'ave_value': -0.39219126520795866, 'soft_opc': nan} step=1947




2022-04-20 18:03.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180303/model_1947.pt


Epoch 12/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:03.12 [info     ] FQE_20220420180303: epoch=12 step=2124 epoch=12 metrics={'time_sample_batch': 0.00016657511393229166, 'time_algorithm_update': 0.0035747673551915055, 'loss': 0.003906134477853354, 'time_step': 0.0038171757412495586, 'init_value': -0.7666783928871155, 'ave_value': -0.4188355578976619, 'soft_opc': nan} step=2124




2022-04-20 18:03.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180303/model_2124.pt


Epoch 13/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:03.13 [info     ] FQE_20220420180303: epoch=13 step=2301 epoch=13 metrics={'time_sample_batch': 0.00016843531764833267, 'time_algorithm_update': 0.0036253605858754304, 'loss': 0.0038928981616535344, 'time_step': 0.0038695416208040915, 'init_value': -0.818065345287323, 'ave_value': -0.4525952837522234, 'soft_opc': nan} step=2301




2022-04-20 18:03.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180303/model_2301.pt


Epoch 14/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:03.14 [info     ] FQE_20220420180303: epoch=14 step=2478 epoch=14 metrics={'time_sample_batch': 0.00016530354817708334, 'time_algorithm_update': 0.0036013059023409914, 'loss': 0.004086694798542787, 'time_step': 0.003843323659088652, 'init_value': -0.8704202771186829, 'ave_value': -0.5006659283697068, 'soft_opc': nan} step=2478




2022-04-20 18:03.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180303/model_2478.pt


Epoch 15/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:03.15 [info     ] FQE_20220420180303: epoch=15 step=2655 epoch=15 metrics={'time_sample_batch': 0.00016422325608420507, 'time_algorithm_update': 0.0036265109218446548, 'loss': 0.004158686864252944, 'time_step': 0.003869076906624487, 'init_value': -0.9759454131126404, 'ave_value': -0.5718983179836481, 'soft_opc': nan} step=2655




2022-04-20 18:03.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180303/model_2655.pt


Epoch 16/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:03.15 [info     ] FQE_20220420180303: epoch=16 step=2832 epoch=16 metrics={'time_sample_batch': 0.00016430676993677172, 'time_algorithm_update': 0.0034907120095807953, 'loss': 0.004311336402381418, 'time_step': 0.003727056212344412, 'init_value': -1.078729271888733, 'ave_value': -0.6705067379506411, 'soft_opc': nan} step=2832




2022-04-20 18:03.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180303/model_2832.pt


Epoch 17/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:03.16 [info     ] FQE_20220420180303: epoch=17 step=3009 epoch=17 metrics={'time_sample_batch': 0.00016275368167855645, 'time_algorithm_update': 0.003467834601968022, 'loss': 0.004516735551931427, 'time_step': 0.0037041437827934655, 'init_value': -1.1312768459320068, 'ave_value': -0.7329719092769621, 'soft_opc': nan} step=3009




2022-04-20 18:03.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180303/model_3009.pt


Epoch 18/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:03.17 [info     ] FQE_20220420180303: epoch=18 step=3186 epoch=18 metrics={'time_sample_batch': 0.00015906560218940347, 'time_algorithm_update': 0.0035067857322046312, 'loss': 0.004765380766521613, 'time_step': 0.0037435906081549866, 'init_value': -1.1486722230911255, 'ave_value': -0.7226902563796118, 'soft_opc': nan} step=3186




2022-04-20 18:03.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180303/model_3186.pt


Epoch 19/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:03.17 [info     ] FQE_20220420180303: epoch=19 step=3363 epoch=19 metrics={'time_sample_batch': 0.00016438085480598407, 'time_algorithm_update': 0.002689353490279893, 'loss': 0.005259882999415704, 'time_step': 0.0029279108101365257, 'init_value': -1.2714260816574097, 'ave_value': -0.8219908351610015, 'soft_opc': nan} step=3363




2022-04-20 18:03.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180303/model_3363.pt


Epoch 20/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:03.18 [info     ] FQE_20220420180303: epoch=20 step=3540 epoch=20 metrics={'time_sample_batch': 0.0001666613217801024, 'time_algorithm_update': 0.0037055042503917283, 'loss': 0.0050803910760671995, 'time_step': 0.003947224320664917, 'init_value': -1.358774185180664, 'ave_value': -0.8353016524153339, 'soft_opc': nan} step=3540




2022-04-20 18:03.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180303/model_3540.pt


Epoch 21/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:03.19 [info     ] FQE_20220420180303: epoch=21 step=3717 epoch=21 metrics={'time_sample_batch': 0.00016132182320632504, 'time_algorithm_update': 0.003426545083859546, 'loss': 0.005725724450286246, 'time_step': 0.003660214149345786, 'init_value': -1.5167409181594849, 'ave_value': -0.9639378367780566, 'soft_opc': nan} step=3717




2022-04-20 18:03.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180303/model_3717.pt


Epoch 22/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:03.20 [info     ] FQE_20220420180303: epoch=22 step=3894 epoch=22 metrics={'time_sample_batch': 0.00016361306616141972, 'time_algorithm_update': 0.0036816785564530367, 'loss': 0.006288959217220965, 'time_step': 0.003917770870661331, 'init_value': -1.5112348794937134, 'ave_value': -0.9224354960706768, 'soft_opc': nan} step=3894




2022-04-20 18:03.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180303/model_3894.pt


Epoch 23/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:03.20 [info     ] FQE_20220420180303: epoch=23 step=4071 epoch=23 metrics={'time_sample_batch': 0.00016081400510281494, 'time_algorithm_update': 0.003523918194959393, 'loss': 0.006434304336491067, 'time_step': 0.0037603957504875915, 'init_value': -1.6142911911010742, 'ave_value': -0.998791400045604, 'soft_opc': nan} step=4071




2022-04-20 18:03.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180303/model_4071.pt


Epoch 24/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:03.21 [info     ] FQE_20220420180303: epoch=24 step=4248 epoch=24 metrics={'time_sample_batch': 0.00016930682510979433, 'time_algorithm_update': 0.0036441889186363437, 'loss': 0.006946263181066698, 'time_step': 0.0038856180374231717, 'init_value': -1.8202146291732788, 'ave_value': -1.1254940552704102, 'soft_opc': nan} step=4248




2022-04-20 18:03.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180303/model_4248.pt


Epoch 25/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:03.22 [info     ] FQE_20220420180303: epoch=25 step=4425 epoch=25 metrics={'time_sample_batch': 0.00016212059279619638, 'time_algorithm_update': 0.0035383849494201314, 'loss': 0.007566968378963443, 'time_step': 0.003771886987201238, 'init_value': -1.9093574285507202, 'ave_value': -1.1634455466844342, 'soft_opc': nan} step=4425




2022-04-20 18:03.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180303/model_4425.pt


Epoch 26/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:03.23 [info     ] FQE_20220420180303: epoch=26 step=4602 epoch=26 metrics={'time_sample_batch': 0.0001693000901216841, 'time_algorithm_update': 0.0037042097856769454, 'loss': 0.00822208074676557, 'time_step': 0.003950959545070842, 'init_value': -1.982747197151184, 'ave_value': -1.2058470916221136, 'soft_opc': nan} step=4602




2022-04-20 18:03.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180303/model_4602.pt


Epoch 27/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:03.23 [info     ] FQE_20220420180303: epoch=27 step=4779 epoch=27 metrics={'time_sample_batch': 0.00016412088426492983, 'time_algorithm_update': 0.0035066671964138914, 'loss': 0.008690101205039833, 'time_step': 0.003743792657798293, 'init_value': -2.086897373199463, 'ave_value': -1.2193147371525879, 'soft_opc': nan} step=4779




2022-04-20 18:03.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180303/model_4779.pt


Epoch 28/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:03.24 [info     ] FQE_20220420180303: epoch=28 step=4956 epoch=28 metrics={'time_sample_batch': 0.00016689839336158193, 'time_algorithm_update': 0.0036643049811239297, 'loss': 0.009170196495955484, 'time_step': 0.003907574098662468, 'init_value': -2.0779531002044678, 'ave_value': -1.2212214173978424, 'soft_opc': nan} step=4956




2022-04-20 18:03.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180303/model_4956.pt


Epoch 29/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:03.25 [info     ] FQE_20220420180303: epoch=29 step=5133 epoch=29 metrics={'time_sample_batch': 0.0001663366953531901, 'time_algorithm_update': 0.0035305077073264257, 'loss': 0.009686353271116771, 'time_step': 0.0037708040011131157, 'init_value': -2.282515287399292, 'ave_value': -1.321607565510649, 'soft_opc': nan} step=5133




2022-04-20 18:03.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180303/model_5133.pt


Epoch 30/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:03.26 [info     ] FQE_20220420180303: epoch=30 step=5310 epoch=30 metrics={'time_sample_batch': 0.00016594337204755362, 'time_algorithm_update': 0.0036647993292512194, 'loss': 0.010124135133678798, 'time_step': 0.00390688308888236, 'init_value': -2.3853578567504883, 'ave_value': -1.3342088224506146, 'soft_opc': nan} step=5310




2022-04-20 18:03.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180303/model_5310.pt


Epoch 31/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:03.26 [info     ] FQE_20220420180303: epoch=31 step=5487 epoch=31 metrics={'time_sample_batch': 0.00016420035712463033, 'time_algorithm_update': 0.003619268115630931, 'loss': 0.010717488407015295, 'time_step': 0.003855557091492044, 'init_value': -2.512611150741577, 'ave_value': -1.3880812017946271, 'soft_opc': nan} step=5487




2022-04-20 18:03.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180303/model_5487.pt


Epoch 32/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:03.27 [info     ] FQE_20220420180303: epoch=32 step=5664 epoch=32 metrics={'time_sample_batch': 0.00016048264368779243, 'time_algorithm_update': 0.0035477034789694233, 'loss': 0.011715836803912905, 'time_step': 0.0037809563221904516, 'init_value': -2.5997321605682373, 'ave_value': -1.4486287525041146, 'soft_opc': nan} step=5664




2022-04-20 18:03.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180303/model_5664.pt


Epoch 33/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:03.28 [info     ] FQE_20220420180303: epoch=33 step=5841 epoch=33 metrics={'time_sample_batch': 0.00016934184704796743, 'time_algorithm_update': 0.003599399900705801, 'loss': 0.012189054417708036, 'time_step': 0.0038462978298381225, 'init_value': -2.720033645629883, 'ave_value': -1.5163972000102024, 'soft_opc': nan} step=5841




2022-04-20 18:03.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180303/model_5841.pt


Epoch 34/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:03.29 [info     ] FQE_20220420180303: epoch=34 step=6018 epoch=34 metrics={'time_sample_batch': 0.0001678466796875, 'time_algorithm_update': 0.0037566659140721554, 'loss': 0.01183003991497628, 'time_step': 0.003999737022960253, 'init_value': -2.8325507640838623, 'ave_value': -1.5853546114386738, 'soft_opc': nan} step=6018




2022-04-20 18:03.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180303/model_6018.pt


Epoch 35/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:03.29 [info     ] FQE_20220420180303: epoch=35 step=6195 epoch=35 metrics={'time_sample_batch': 0.00015696563289663886, 'time_algorithm_update': 0.003618261908407265, 'loss': 0.013050210738480932, 'time_step': 0.0038453522375074484, 'init_value': -2.895465135574341, 'ave_value': -1.604821747908363, 'soft_opc': nan} step=6195




2022-04-20 18:03.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180303/model_6195.pt


Epoch 36/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:03.30 [info     ] FQE_20220420180303: epoch=36 step=6372 epoch=36 metrics={'time_sample_batch': 0.00016252738607805327, 'time_algorithm_update': 0.003604568330581579, 'loss': 0.013293662411029224, 'time_step': 0.003837744394938151, 'init_value': -2.9930713176727295, 'ave_value': -1.6371145254498845, 'soft_opc': nan} step=6372




2022-04-20 18:03.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180303/model_6372.pt


Epoch 37/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:03.31 [info     ] FQE_20220420180303: epoch=37 step=6549 epoch=37 metrics={'time_sample_batch': 0.00016410606729108735, 'time_algorithm_update': 0.00370946172940529, 'loss': 0.014342166382114345, 'time_step': 0.00394765401290635, 'init_value': -3.1331729888916016, 'ave_value': -1.7247226060331762, 'soft_opc': nan} step=6549




2022-04-20 18:03.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180303/model_6549.pt


Epoch 38/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:03.32 [info     ] FQE_20220420180303: epoch=38 step=6726 epoch=38 metrics={'time_sample_batch': 0.00016155215979969434, 'time_algorithm_update': 0.0036218206761247022, 'loss': 0.015254293456621005, 'time_step': 0.0038581581439002086, 'init_value': -3.330993413925171, 'ave_value': -1.8278703162083039, 'soft_opc': nan} step=6726




2022-04-20 18:03.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180303/model_6726.pt


Epoch 39/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:03.32 [info     ] FQE_20220420180303: epoch=39 step=6903 epoch=39 metrics={'time_sample_batch': 0.0001670236641404319, 'time_algorithm_update': 0.0036595904894467803, 'loss': 0.01615790691947575, 'time_step': 0.003900417500296555, 'init_value': -3.383669853210449, 'ave_value': -1.8952696762732915, 'soft_opc': nan} step=6903




2022-04-20 18:03.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180303/model_6903.pt


Epoch 40/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:03.33 [info     ] FQE_20220420180303: epoch=40 step=7080 epoch=40 metrics={'time_sample_batch': 0.00016203977293887382, 'time_algorithm_update': 0.003545559058755131, 'loss': 0.016280983770687114, 'time_step': 0.0037807313735875704, 'init_value': -3.4289491176605225, 'ave_value': -1.8751039127956282, 'soft_opc': nan} step=7080




2022-04-20 18:03.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180303/model_7080.pt


Epoch 41/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:03.34 [info     ] FQE_20220420180303: epoch=41 step=7257 epoch=41 metrics={'time_sample_batch': 0.00016472164520436088, 'time_algorithm_update': 0.0037211806087170617, 'loss': 0.017037647945338092, 'time_step': 0.003961536170399122, 'init_value': -3.574597120285034, 'ave_value': -1.9569360179049116, 'soft_opc': nan} step=7257




2022-04-20 18:03.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180303/model_7257.pt


Epoch 42/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:03.35 [info     ] FQE_20220420180303: epoch=42 step=7434 epoch=42 metrics={'time_sample_batch': 0.00016347297840872726, 'time_algorithm_update': 0.0034958454175184, 'loss': 0.017711359618815703, 'time_step': 0.0037332146854723914, 'init_value': -3.640721559524536, 'ave_value': -1.96717453045888, 'soft_opc': nan} step=7434




2022-04-20 18:03.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180303/model_7434.pt


Epoch 43/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:03.35 [info     ] FQE_20220420180303: epoch=43 step=7611 epoch=43 metrics={'time_sample_batch': 0.00016424211405091367, 'time_algorithm_update': 0.0036209814966061695, 'loss': 0.018365616475410844, 'time_step': 0.00386218162579725, 'init_value': -3.859670400619507, 'ave_value': -2.0919317496252487, 'soft_opc': nan} step=7611




2022-04-20 18:03.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180303/model_7611.pt


Epoch 44/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:03.36 [info     ] FQE_20220420180303: epoch=44 step=7788 epoch=44 metrics={'time_sample_batch': 0.00016827906592417572, 'time_algorithm_update': 0.003622293472290039, 'loss': 0.019087307437949178, 'time_step': 0.003864917377967619, 'init_value': -3.938971519470215, 'ave_value': -2.101224239130278, 'soft_opc': nan} step=7788




2022-04-20 18:03.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180303/model_7788.pt


Epoch 45/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:03.37 [info     ] FQE_20220420180303: epoch=45 step=7965 epoch=45 metrics={'time_sample_batch': 0.0001657184234446725, 'time_algorithm_update': 0.0036170172826044977, 'loss': 0.020071553193311612, 'time_step': 0.0038551839731507383, 'init_value': -3.9291763305664062, 'ave_value': -2.1137542123551127, 'soft_opc': nan} step=7965




2022-04-20 18:03.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180303/model_7965.pt


Epoch 46/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:03.37 [info     ] FQE_20220420180303: epoch=46 step=8142 epoch=46 metrics={'time_sample_batch': 0.000166616870858575, 'time_algorithm_update': 0.003613997313935878, 'loss': 0.020927979533250134, 'time_step': 0.0038572475735077077, 'init_value': -4.221136569976807, 'ave_value': -2.302848308848905, 'soft_opc': nan} step=8142




2022-04-20 18:03.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180303/model_8142.pt


Epoch 47/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:03.38 [info     ] FQE_20220420180303: epoch=47 step=8319 epoch=47 metrics={'time_sample_batch': 0.00016451286057294424, 'time_algorithm_update': 0.0036652397974736273, 'loss': 0.02144067009547883, 'time_step': 0.00390246493668206, 'init_value': -4.173120021820068, 'ave_value': -2.305231060029508, 'soft_opc': nan} step=8319




2022-04-20 18:03.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180303/model_8319.pt


Epoch 48/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:03.39 [info     ] FQE_20220420180303: epoch=48 step=8496 epoch=48 metrics={'time_sample_batch': 0.0001667650405969997, 'time_algorithm_update': 0.0034981285784877626, 'loss': 0.021948253631981163, 'time_step': 0.003740417081757454, 'init_value': -4.2442946434021, 'ave_value': -2.336021901143564, 'soft_opc': nan} step=8496




2022-04-20 18:03.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180303/model_8496.pt


Epoch 49/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:03.40 [info     ] FQE_20220420180303: epoch=49 step=8673 epoch=49 metrics={'time_sample_batch': 0.00016643906717246534, 'time_algorithm_update': 0.003639446140009131, 'loss': 0.021839141055131277, 'time_step': 0.0038815123886711852, 'init_value': -4.32153844833374, 'ave_value': -2.3555708319933206, 'soft_opc': nan} step=8673




2022-04-20 18:03.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180303/model_8673.pt


Epoch 50/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:03.40 [info     ] FQE_20220420180303: epoch=50 step=8850 epoch=50 metrics={'time_sample_batch': 0.00016697247823079428, 'time_algorithm_update': 0.0035549678371450994, 'loss': 0.022995344494038002, 'time_step': 0.0037944101344394147, 'init_value': -4.425958156585693, 'ave_value': -2.436412293363262, 'soft_opc': nan} step=8850




2022-04-20 18:03.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180303/model_8850.pt
start
[ 0.00000000e+00  7.95731469e+08  1.43210892e-01 -3.25999953e-02
 -1.38000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.28049971e-01  6.00000000e-01  4.67532035e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.87189108e-01  2.46000047e-02
 -8.00013420e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.61927906e-02  2.08952959e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.53789108e-01  1.32000047e-02
  8.79998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -9.71586383e-02 -6.00000000e-01 -5.33093061e-02]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.39489108e-01 -4.75999953e-02
 -9.30001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  1.41298638e-01  5.43892365e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.9573146

2022-04-20 18:03.41 [debug    ] RoundIterator is selected.
2022-04-20 18:03.41 [info     ] Directory is created at d3rlpy_logs/FQE_20220420180341
2022-04-20 18:03.41 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 18:03.41 [debug    ] Building models...
2022-04-20 18:03.41 [debug    ] Models have been built.
2022-04-20 18:03.41 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420180341/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}},

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 18:03.43 [info     ] FQE_20220420180341: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00016959392747213674, 'time_algorithm_update': 0.0036221056483512703, 'loss': 0.027569149501708357, 'time_step': 0.003865153983581898, 'init_value': -1.5696383714675903, 'ave_value': -1.5386056479570027, 'soft_opc': nan} step=344




2022-04-20 18:03.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180341/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:03.44 [info     ] FQE_20220420180341: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.0001685785692791606, 'time_algorithm_update': 0.0036073507264603017, 'loss': 0.02429709500405764, 'time_step': 0.0038476738818856173, 'init_value': -2.465975046157837, 'ave_value': -2.4019783945233972, 'soft_opc': nan} step=688




2022-04-20 18:03.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180341/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:03.46 [info     ] FQE_20220420180341: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00016861946083778558, 'time_algorithm_update': 0.003575483726900677, 'loss': 0.02759610177396775, 'time_step': 0.003815419452134953, 'init_value': -3.6352832317352295, 'ave_value': -3.5422613454563123, 'soft_opc': nan} step=1032




2022-04-20 18:03.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180341/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:03.47 [info     ] FQE_20220420180341: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00016979491987893747, 'time_algorithm_update': 0.0035599296869233596, 'loss': 0.033106794456279905, 'time_step': 0.0038042428881623026, 'init_value': -4.487561225891113, 'ave_value': -4.3697054721213675, 'soft_opc': nan} step=1376




2022-04-20 18:03.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180341/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:03.49 [info     ] FQE_20220420180341: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00017197326172229855, 'time_algorithm_update': 0.003582449846489485, 'loss': 0.040262724660055406, 'time_step': 0.003829063371170399, 'init_value': -5.474830627441406, 'ave_value': -5.366086298415253, 'soft_opc': nan} step=1720




2022-04-20 18:03.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180341/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:03.50 [info     ] FQE_20220420180341: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00016913718955461368, 'time_algorithm_update': 0.003599764302719471, 'loss': 0.049352845353031055, 'time_step': 0.0038439354231191237, 'init_value': -6.362174034118652, 'ave_value': -6.300680707676991, 'soft_opc': nan} step=2064




2022-04-20 18:03.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180341/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:03.51 [info     ] FQE_20220420180341: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00016775380733401277, 'time_algorithm_update': 0.00355984790380611, 'loss': 0.06044857113535494, 'time_step': 0.0038030480229577354, 'init_value': -7.370092391967773, 'ave_value': -7.37220707527689, 'soft_opc': nan} step=2408




2022-04-20 18:03.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180341/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:03.53 [info     ] FQE_20220420180341: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00017216247181559718, 'time_algorithm_update': 0.0036095172859901604, 'loss': 0.0713579228965479, 'time_step': 0.0038578565730605016, 'init_value': -8.18075942993164, 'ave_value': -8.267135626158199, 'soft_opc': nan} step=2752




2022-04-20 18:03.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180341/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:03.54 [info     ] FQE_20220420180341: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00017061483028323152, 'time_algorithm_update': 0.003652758376542912, 'loss': 0.08372978895880975, 'time_step': 0.0038988437763480253, 'init_value': -8.906421661376953, 'ave_value': -9.055055257436392, 'soft_opc': nan} step=3096




2022-04-20 18:03.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180341/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:03.56 [info     ] FQE_20220420180341: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00017139038374257642, 'time_algorithm_update': 0.003637255624283192, 'loss': 0.09859228351824852, 'time_step': 0.00388434044150419, 'init_value': -9.718842506408691, 'ave_value': -9.983355592150946, 'soft_opc': nan} step=3440




2022-04-20 18:03.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180341/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:03.57 [info     ] FQE_20220420180341: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.0001700873984846958, 'time_algorithm_update': 0.0036383714786795683, 'loss': 0.1104550646336446, 'time_step': 0.003880901392116103, 'init_value': -10.285943984985352, 'ave_value': -10.647068717914658, 'soft_opc': nan} step=3784




2022-04-20 18:03.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180341/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:03.59 [info     ] FQE_20220420180341: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00017249168351639148, 'time_algorithm_update': 0.003607373598010041, 'loss': 0.12337727074749595, 'time_step': 0.0038551507994186046, 'init_value': -11.160837173461914, 'ave_value': -11.579545580347379, 'soft_opc': nan} step=4128




2022-04-20 18:03.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180341/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:04.00 [info     ] FQE_20220420180341: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.0001707874065221742, 'time_algorithm_update': 0.003654748894447504, 'loss': 0.1373665763747467, 'time_step': 0.0038972746494204498, 'init_value': -11.933730125427246, 'ave_value': -12.369361748056368, 'soft_opc': nan} step=4472




2022-04-20 18:04.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180341/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:04.02 [info     ] FQE_20220420180341: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.0001729449560475904, 'time_algorithm_update': 0.0036079668721487354, 'loss': 0.15562975144576888, 'time_step': 0.0038567996302316357, 'init_value': -12.816020965576172, 'ave_value': -13.384674972215214, 'soft_opc': nan} step=4816




2022-04-20 18:04.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180341/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:04.03 [info     ] FQE_20220420180341: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.0001719531624816185, 'time_algorithm_update': 0.0035769183968388757, 'loss': 0.17443657360516143, 'time_step': 0.0038248258967732273, 'init_value': -13.455890655517578, 'ave_value': -14.057664404151677, 'soft_opc': nan} step=5160




2022-04-20 18:04.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180341/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:04.05 [info     ] FQE_20220420180341: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00017180623010147448, 'time_algorithm_update': 0.0036442002584767897, 'loss': 0.1893209015766462, 'time_step': 0.003891043884809627, 'init_value': -14.301074981689453, 'ave_value': -15.096429877533568, 'soft_opc': nan} step=5504




2022-04-20 18:04.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180341/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:04.06 [info     ] FQE_20220420180341: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.0001749763655108075, 'time_algorithm_update': 0.0036067720069441686, 'loss': 0.2071120212406873, 'time_step': 0.0038579134053962176, 'init_value': -15.035845756530762, 'ave_value': -15.909649368852108, 'soft_opc': nan} step=5848




2022-04-20 18:04.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180341/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:04.08 [info     ] FQE_20220420180341: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00016805668209874354, 'time_algorithm_update': 0.0036758780479431152, 'loss': 0.22643992946439878, 'time_step': 0.003921275222024252, 'init_value': -15.400577545166016, 'ave_value': -16.406028445051597, 'soft_opc': nan} step=6192




2022-04-20 18:04.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180341/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:04.09 [info     ] FQE_20220420180341: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00017349110093227652, 'time_algorithm_update': 0.003698678210724232, 'loss': 0.24297316078919656, 'time_step': 0.0039512251698693565, 'init_value': -16.18892478942871, 'ave_value': -17.293956128809903, 'soft_opc': nan} step=6536




2022-04-20 18:04.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180341/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:04.11 [info     ] FQE_20220420180341: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00017535339954287508, 'time_algorithm_update': 0.0036775989587916886, 'loss': 0.25625951703486227, 'time_step': 0.003929547792257264, 'init_value': -16.625213623046875, 'ave_value': -17.939254699499756, 'soft_opc': nan} step=6880




2022-04-20 18:04.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180341/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:04.12 [info     ] FQE_20220420180341: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.0001718796962915465, 'time_algorithm_update': 0.003634863121564998, 'loss': 0.27588958682514036, 'time_step': 0.003883428351823674, 'init_value': -17.20614242553711, 'ave_value': -18.682422136777156, 'soft_opc': nan} step=7224




2022-04-20 18:04.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180341/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:04.13 [info     ] FQE_20220420180341: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00017015116159306017, 'time_algorithm_update': 0.0035854993864547374, 'loss': 0.2899642552920552, 'time_step': 0.0038306921027427498, 'init_value': -17.464420318603516, 'ave_value': -19.07029919009488, 'soft_opc': nan} step=7568




2022-04-20 18:04.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180341/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:04.15 [info     ] FQE_20220420180341: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00017293248065682344, 'time_algorithm_update': 0.0036016896713611693, 'loss': 0.2955110588403375, 'time_step': 0.0038518718508786932, 'init_value': -17.403762817382812, 'ave_value': -19.202781443203893, 'soft_opc': nan} step=7912




2022-04-20 18:04.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180341/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:04.16 [info     ] FQE_20220420180341: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.0001715393953545149, 'time_algorithm_update': 0.0036453791829042658, 'loss': 0.3073195143613618, 'time_step': 0.0038948405620663666, 'init_value': -17.88542938232422, 'ave_value': -19.83500493928656, 'soft_opc': nan} step=8256




2022-04-20 18:04.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180341/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:04.18 [info     ] FQE_20220420180341: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00016856609388839368, 'time_algorithm_update': 0.0036116498847340427, 'loss': 0.3165072804143609, 'time_step': 0.0038586563842241154, 'init_value': -18.150793075561523, 'ave_value': -20.25253917498363, 'soft_opc': nan} step=8600




2022-04-20 18:04.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180341/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:04.19 [info     ] FQE_20220420180341: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00016954402590906896, 'time_algorithm_update': 0.0036290232525315394, 'loss': 0.32726150096480755, 'time_step': 0.0038754149924877077, 'init_value': -17.939178466796875, 'ave_value': -20.198852002123992, 'soft_opc': nan} step=8944




2022-04-20 18:04.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180341/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:04.21 [info     ] FQE_20220420180341: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00017146177070085392, 'time_algorithm_update': 0.003660974114440208, 'loss': 0.33637323428140303, 'time_step': 0.0039089938928914625, 'init_value': -18.594423294067383, 'ave_value': -21.004443502171082, 'soft_opc': nan} step=9288




2022-04-20 18:04.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180341/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:04.22 [info     ] FQE_20220420180341: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00017370526180710902, 'time_algorithm_update': 0.0037479677865671556, 'loss': 0.34876684179572864, 'time_step': 0.003997842239779096, 'init_value': -18.916404724121094, 'ave_value': -21.65211282827564, 'soft_opc': nan} step=9632




2022-04-20 18:04.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180341/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:04.24 [info     ] FQE_20220420180341: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00017194553863170535, 'time_algorithm_update': 0.0036177454992782237, 'loss': 0.3602361771613793, 'time_step': 0.003865413887556209, 'init_value': -19.246492385864258, 'ave_value': -22.026587162938743, 'soft_opc': nan} step=9976




2022-04-20 18:04.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180341/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:04.25 [info     ] FQE_20220420180341: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00016953848129095032, 'time_algorithm_update': 0.0036647520786107974, 'loss': 0.36663432912576166, 'time_step': 0.003910112519596898, 'init_value': -19.153942108154297, 'ave_value': -22.194400714176734, 'soft_opc': nan} step=10320




2022-04-20 18:04.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180341/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:04.27 [info     ] FQE_20220420180341: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.0001720730648484341, 'time_algorithm_update': 0.003634007171142933, 'loss': 0.3773719939608993, 'time_step': 0.003882214773532956, 'init_value': -19.697154998779297, 'ave_value': -22.935243796389383, 'soft_opc': nan} step=10664




2022-04-20 18:04.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180341/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:04.28 [info     ] FQE_20220420180341: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00017077215882234796, 'time_algorithm_update': 0.0036228978356649707, 'loss': 0.38636252301918383, 'time_step': 0.003871729900670606, 'init_value': -19.45449447631836, 'ave_value': -22.8783740727684, 'soft_opc': nan} step=11008




2022-04-20 18:04.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180341/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:04.30 [info     ] FQE_20220420180341: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00017642974853515625, 'time_algorithm_update': 0.005145317593286204, 'loss': 0.39035002410758374, 'time_step': 0.005400705476139867, 'init_value': -19.241615295410156, 'ave_value': -22.83815515193719, 'soft_opc': nan} step=11352




2022-04-20 18:04.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180341/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:04.32 [info     ] FQE_20220420180341: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00017768213915270428, 'time_algorithm_update': 0.005080017239548439, 'loss': 0.4000036238454456, 'time_step': 0.005336704642273659, 'init_value': -19.5234375, 'ave_value': -23.258286473108036, 'soft_opc': nan} step=11696




2022-04-20 18:04.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180341/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:04.34 [info     ] FQE_20220420180341: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00017543656881465467, 'time_algorithm_update': 0.005117381034895431, 'loss': 0.4038724876333808, 'time_step': 0.00537148256634557, 'init_value': -19.568269729614258, 'ave_value': -23.57440777557256, 'soft_opc': nan} step=12040




2022-04-20 18:04.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180341/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:04.36 [info     ] FQE_20220420180341: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00017639232236285542, 'time_algorithm_update': 0.005135161932124648, 'loss': 0.4113916782716419, 'time_step': 0.005389664755311123, 'init_value': -19.46978187561035, 'ave_value': -23.72695823774279, 'soft_opc': nan} step=12384




2022-04-20 18:04.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180341/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:04.38 [info     ] FQE_20220420180341: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.0001737274402795836, 'time_algorithm_update': 0.0046880002631697545, 'loss': 0.4192764067719149, 'time_step': 0.004941352578096612, 'init_value': -19.452838897705078, 'ave_value': -23.881029433914804, 'soft_opc': nan} step=12728




2022-04-20 18:04.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180341/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:04.40 [info     ] FQE_20220420180341: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00017530488413433696, 'time_algorithm_update': 0.005130640989126161, 'loss': 0.4229464718021539, 'time_step': 0.005385849364968233, 'init_value': -19.735427856445312, 'ave_value': -24.21867623990318, 'soft_opc': nan} step=13072




2022-04-20 18:04.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180341/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:04.42 [info     ] FQE_20220420180341: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00017543310342833053, 'time_algorithm_update': 0.005109278961669567, 'loss': 0.4256179287412399, 'time_step': 0.005363175342249316, 'init_value': -19.914505004882812, 'ave_value': -24.617853758752613, 'soft_opc': nan} step=13416




2022-04-20 18:04.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180341/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:04.44 [info     ] FQE_20220420180341: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00017491329547970793, 'time_algorithm_update': 0.005097764175991679, 'loss': 0.43256095284596086, 'time_step': 0.0053520791752393855, 'init_value': -20.18061637878418, 'ave_value': -25.00556592556673, 'soft_opc': nan} step=13760




2022-04-20 18:04.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180341/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:04.46 [info     ] FQE_20220420180341: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00017546637113704237, 'time_algorithm_update': 0.004875797865002654, 'loss': 0.4353053617199103, 'time_step': 0.005131210698637851, 'init_value': -20.073814392089844, 'ave_value': -24.9695619932938, 'soft_opc': nan} step=14104




2022-04-20 18:04.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180341/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:04.48 [info     ] FQE_20220420180341: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.0001760977645253026, 'time_algorithm_update': 0.004958642083545064, 'loss': 0.4466678270918512, 'time_step': 0.005212453908698503, 'init_value': -19.85340118408203, 'ave_value': -24.762269497827234, 'soft_opc': nan} step=14448




2022-04-20 18:04.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180341/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:04.50 [info     ] FQE_20220420180341: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.000177671049916467, 'time_algorithm_update': 0.005106957152832386, 'loss': 0.4471617136800358, 'time_step': 0.005361912555472795, 'init_value': -19.753238677978516, 'ave_value': -24.717467241606734, 'soft_opc': nan} step=14792




2022-04-20 18:04.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180341/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:04.52 [info     ] FQE_20220420180341: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.0001817456511564033, 'time_algorithm_update': 0.005113893470098806, 'loss': 0.45479358545894366, 'time_step': 0.005375133697376694, 'init_value': -20.42072296142578, 'ave_value': -25.495083412501188, 'soft_opc': nan} step=15136




2022-04-20 18:04.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180341/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:04.54 [info     ] FQE_20220420180341: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00017684351566225984, 'time_algorithm_update': 0.00511801797290181, 'loss': 0.46597345706162063, 'time_step': 0.005373423875764359, 'init_value': -20.66988754272461, 'ave_value': -25.786408780978338, 'soft_opc': nan} step=15480




2022-04-20 18:04.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180341/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:04.56 [info     ] FQE_20220420180341: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00017336911933366642, 'time_algorithm_update': 0.004633459933968478, 'loss': 0.4724551926984257, 'time_step': 0.004886468482571979, 'init_value': -20.917770385742188, 'ave_value': -26.144169076820752, 'soft_opc': nan} step=15824




2022-04-20 18:04.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180341/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:04.58 [info     ] FQE_20220420180341: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00017812848091125488, 'time_algorithm_update': 0.005111643741297168, 'loss': 0.48470605020194724, 'time_step': 0.005369825418605361, 'init_value': -21.233379364013672, 'ave_value': -26.42758809226053, 'soft_opc': nan} step=16168




2022-04-20 18:04.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180341/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:05.00 [info     ] FQE_20220420180341: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00017265663590542105, 'time_algorithm_update': 0.005199320094529973, 'loss': 0.499258833206367, 'time_step': 0.005449749702631041, 'init_value': -20.979022979736328, 'ave_value': -26.184974352102557, 'soft_opc': nan} step=16512




2022-04-20 18:05.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180341/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:05.02 [info     ] FQE_20220420180341: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00017567082893016727, 'time_algorithm_update': 0.005130663167598636, 'loss': 0.4912573377233605, 'time_step': 0.005384625390518543, 'init_value': -21.15674591064453, 'ave_value': -26.412619348850335, 'soft_opc': nan} step=16856




2022-04-20 18:05.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180341/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:05.04 [info     ] FQE_20220420180341: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00017570063125255496, 'time_algorithm_update': 0.005116789146911266, 'loss': 0.5037034040392745, 'time_step': 0.005371023056118987, 'init_value': -21.449459075927734, 'ave_value': -26.67031754871478, 'soft_opc': nan} step=17200




2022-04-20 18:05.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180341/model_17200.pt
search iteration:  31
using hyper params:  [0.00046175717242202265, 0.009979384005345926, 6.309015379909715e-05, 1]
2022-04-20 18:05.04 [debug    ] RoundIterator is selected.
2022-04-20 18:05.04 [info     ] Directory is created at d3rlpy_logs/TD3PlusBC_20220420180504
2022-04-20 18:05.04 [debug    ] Fitting scaler...              scaler=standard
2022-04-20 18:05.04 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 18:05.04 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 18:05.04 [info     ] Parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180504/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.000461757172

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:05.08 [info     ] TD3PlusBC_20220420180504: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.00032896186873229623, 'time_algorithm_update': 0.009002794299209327, 'critic_loss': 0.30664870441395636, 'actor_loss': 0.08510106771487241, 'time_step': 0.009415575635363485, 'td_error': 0.8103589426757832, 'init_value': -0.4913206994533539, 'ave_value': 0.19373027397036502} step=342
2022-04-20 18:05.08 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180504/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:05.11 [info     ] TD3PlusBC_20220420180504: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.00033061476478799743, 'time_algorithm_update': 0.008935033229359408, 'critic_loss': 0.17808941668934292, 'actor_loss': -0.005177740486916046, 'time_step': 0.009349118199264794, 'td_error': 0.8013788059169717, 'init_value': -0.7874923944473267, 'ave_value': 0.19007038243867977} step=684
2022-04-20 18:05.11 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180504/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:05.15 [info     ] TD3PlusBC_20220420180504: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.0003281190381412618, 'time_algorithm_update': 0.008535711388838919, 'critic_loss': 0.23454584928545338, 'actor_loss': -0.026677411637808148, 'time_step': 0.008944779111627946, 'td_error': 0.7993898946199044, 'init_value': -1.0270928144454956, 'ave_value': 0.2729784399026434} step=1026
2022-04-20 18:05.15 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180504/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:05.19 [info     ] TD3PlusBC_20220420180504: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.0003305659656636199, 'time_algorithm_update': 0.008942548294513547, 'critic_loss': 0.2810190086258434, 'actor_loss': -0.01188845745129892, 'time_step': 0.009353536611412003, 'td_error': 0.7997385763542976, 'init_value': -1.3137353658676147, 'ave_value': 0.31540148854515776} step=1368
2022-04-20 18:05.19 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180504/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:05.23 [info     ] TD3PlusBC_20220420180504: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.0003310658081233153, 'time_algorithm_update': 0.008975583210326078, 'critic_loss': 0.3583245642308952, 'actor_loss': -0.019811098469279663, 'time_step': 0.009389176006205598, 'td_error': 0.8009869585240107, 'init_value': -1.5739625692367554, 'ave_value': 0.38248987355689007} step=1710
2022-04-20 18:05.23 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180504/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:05.26 [info     ] TD3PlusBC_20220420180504: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.00032843065540692957, 'time_algorithm_update': 0.008717474881668536, 'critic_loss': 0.42267040008602785, 'actor_loss': -0.004217972379853154, 'time_step': 0.009128880082515249, 'td_error': 0.805101698028876, 'init_value': -1.886935830116272, 'ave_value': 0.4435241445478446} step=2052
2022-04-20 18:05.26 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180504/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:05.30 [info     ] TD3PlusBC_20220420180504: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.0003340955366168106, 'time_algorithm_update': 0.00918544035905983, 'critic_loss': 0.532559592777874, 'actor_loss': 0.023923002945923665, 'time_step': 0.009600851270887587, 'td_error': 0.8111757062904881, 'init_value': -2.0965452194213867, 'ave_value': 0.5690438421389407} step=2394
2022-04-20 18:05.30 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180504/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:05.34 [info     ] TD3PlusBC_20220420180504: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.00032997758764969677, 'time_algorithm_update': 0.008608848727934541, 'critic_loss': 0.5811578362570171, 'actor_loss': 0.021478558536510022, 'time_step': 0.009021063297115571, 'td_error': 0.8275543789932592, 'init_value': -2.4629828929901123, 'ave_value': 0.5721304734617992} step=2736
2022-04-20 18:05.34 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180504/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:05.37 [info     ] TD3PlusBC_20220420180504: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.0003317747896874857, 'time_algorithm_update': 0.00906523068745931, 'critic_loss': 0.7018705176418287, 'actor_loss': 0.026907291524765783, 'time_step': 0.00947794300770899, 'td_error': 0.8385035918737547, 'init_value': -2.7942378520965576, 'ave_value': 0.6562952866916637} step=3078
2022-04-20 18:05.37 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180504/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:05.41 [info     ] TD3PlusBC_20220420180504: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.0003345082377829747, 'time_algorithm_update': 0.008987838064717967, 'critic_loss': 0.7878450081623786, 'actor_loss': 0.039842517185978026, 'time_step': 0.009408312234265066, 'td_error': 0.857550608683643, 'init_value': -3.140998363494873, 'ave_value': 0.641666982554042} step=3420
2022-04-20 18:05.41 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180504/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:05.45 [info     ] TD3PlusBC_20220420180504: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.00032466963717811987, 'time_algorithm_update': 0.008489586456477294, 'critic_loss': 0.8960961048540316, 'actor_loss': 0.050285739050796856, 'time_step': 0.00889508905466537, 'td_error': 0.8709610672713418, 'init_value': -3.435969829559326, 'ave_value': 0.7410072552306192} step=3762
2022-04-20 18:05.45 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180504/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:05.49 [info     ] TD3PlusBC_20220420180504: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.00032582966207760815, 'time_algorithm_update': 0.008893115478649474, 'critic_loss': 1.0433385489802611, 'actor_loss': 0.027355285916934934, 'time_step': 0.009303229594091226, 'td_error': 0.893331374437384, 'init_value': -3.7706024646759033, 'ave_value': 0.7817533756737169} step=4104
2022-04-20 18:05.49 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180504/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:05.52 [info     ] TD3PlusBC_20220420180504: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.0003253660703960218, 'time_algorithm_update': 0.00838301126022785, 'critic_loss': 1.1676499147338477, 'actor_loss': 0.025934954680371703, 'time_step': 0.008789602078889546, 'td_error': 0.90734247533485, 'init_value': -4.065135955810547, 'ave_value': 0.8437484724837699} step=4446
2022-04-20 18:05.52 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180504/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:05.56 [info     ] TD3PlusBC_20220420180504: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.0003277600160119129, 'time_algorithm_update': 0.0088744400537502, 'critic_loss': 1.3100759151734804, 'actor_loss': 0.05190939859252924, 'time_step': 0.009284354789912352, 'td_error': 0.9298165275829019, 'init_value': -4.486732482910156, 'ave_value': 0.8746763177956077} step=4788
2022-04-20 18:05.56 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180504/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:05.59 [info     ] TD3PlusBC_20220420180504: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.0002986590067545573, 'time_algorithm_update': 0.008563586842944051, 'critic_loss': 1.5259836770114843, 'actor_loss': 0.06657807568186208, 'time_step': 0.00893658225299322, 'td_error': 0.9565170202455294, 'init_value': -4.782480716705322, 'ave_value': 0.9295892275360136} step=5130
2022-04-20 18:05.59 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180504/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:06.03 [info     ] TD3PlusBC_20220420180504: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.00028581298582735116, 'time_algorithm_update': 0.007955993825232077, 'critic_loss': 1.668189110898832, 'actor_loss': 0.06676383617154338, 'time_step': 0.008311234719572012, 'td_error': 0.9896789172258814, 'init_value': -5.023343563079834, 'ave_value': 1.0060913360045032} step=5472
2022-04-20 18:06.03 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180504/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:06.06 [info     ] TD3PlusBC_20220420180504: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.00032173611267268307, 'time_algorithm_update': 0.008944511413574219, 'critic_loss': 1.8420690111598077, 'actor_loss': 0.06429719506648549, 'time_step': 0.009347326574269791, 'td_error': 1.007775397747516, 'init_value': -5.4995903968811035, 'ave_value': 1.0475760424023846} step=5814
2022-04-20 18:06.06 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180504/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:06.10 [info     ] TD3PlusBC_20220420180504: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.0003277628045333059, 'time_algorithm_update': 0.008518824800413254, 'critic_loss': 2.022025880757828, 'actor_loss': 0.07737581582184423, 'time_step': 0.008928050771791336, 'td_error': 1.0516126070494827, 'init_value': -5.610335826873779, 'ave_value': 1.1130482633774346} step=6156
2022-04-20 18:06.10 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180504/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:06.14 [info     ] TD3PlusBC_20220420180504: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.0003101993025394908, 'time_algorithm_update': 0.008840065253408332, 'critic_loss': 2.25843679852653, 'actor_loss': 0.06819423974344604, 'time_step': 0.009227529603835435, 'td_error': 1.0719531820595847, 'init_value': -6.059112548828125, 'ave_value': 1.11625905284549} step=6498
2022-04-20 18:06.14 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180504/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:06.17 [info     ] TD3PlusBC_20220420180504: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.00028990932375366925, 'time_algorithm_update': 0.008443108776159454, 'critic_loss': 2.432943435266004, 'actor_loss': 0.06971656827375902, 'time_step': 0.008804566678945084, 'td_error': 1.111817696966752, 'init_value': -6.467411994934082, 'ave_value': 1.159248385318043} step=6840
2022-04-20 18:06.17 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180504/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:06.21 [info     ] TD3PlusBC_20220420180504: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.0003246724256995129, 'time_algorithm_update': 0.008485439925165901, 'critic_loss': 2.635938175763303, 'actor_loss': 0.07707066927649821, 'time_step': 0.008892266373885306, 'td_error': 1.1348761415754192, 'init_value': -6.857916355133057, 'ave_value': 1.1678901482928914} step=7182
2022-04-20 18:06.21 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180504/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:06.24 [info     ] TD3PlusBC_20220420180504: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.0003195840712876348, 'time_algorithm_update': 0.008922989605463038, 'critic_loss': 2.8628259702035557, 'actor_loss': 0.06404244071907467, 'time_step': 0.00932278898027208, 'td_error': 1.176592234906437, 'init_value': -7.1715593338012695, 'ave_value': 1.2390708533049402} step=7524
2022-04-20 18:06.24 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180504/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:06.28 [info     ] TD3PlusBC_20220420180504: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.00032072387940702384, 'time_algorithm_update': 0.008367934422186243, 'critic_loss': 3.1006199630380373, 'actor_loss': 0.07062416767690614, 'time_step': 0.008770120771307694, 'td_error': 1.193888258227346, 'init_value': -7.736727714538574, 'ave_value': 1.2296346588792313} step=7866
2022-04-20 18:06.28 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180504/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:06.32 [info     ] TD3PlusBC_20220420180504: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.00032833514854921933, 'time_algorithm_update': 0.00889604482037282, 'critic_loss': 3.335531436909012, 'actor_loss': 0.07765675381871692, 'time_step': 0.009303785206978782, 'td_error': 1.2447939056815966, 'init_value': -8.044986724853516, 'ave_value': 1.2769900907436804} step=8208
2022-04-20 18:06.32 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180504/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:06.35 [info     ] TD3PlusBC_20220420180504: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.00032405825386270446, 'time_algorithm_update': 0.008814306984170836, 'critic_loss': 3.633913538260767, 'actor_loss': 0.05928805407289176, 'time_step': 0.00921917310235096, 'td_error': 1.291970265951244, 'init_value': -8.238470077514648, 'ave_value': 1.2868485835205494} step=8550
2022-04-20 18:06.35 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180504/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:06.39 [info     ] TD3PlusBC_20220420180504: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.0003324537946466814, 'time_algorithm_update': 0.008577860586824472, 'critic_loss': 3.8574500519629806, 'actor_loss': 0.08580308819287702, 'time_step': 0.00899372533050894, 'td_error': 1.3136294520787881, 'init_value': -8.550210952758789, 'ave_value': 1.3785572690562857} step=8892
2022-04-20 18:06.39 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180504/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:06.43 [info     ] TD3PlusBC_20220420180504: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.00032548388542487605, 'time_algorithm_update': 0.008893461952432555, 'critic_loss': 4.032968016744357, 'actor_loss': 0.07632215229565636, 'time_step': 0.009299635887145996, 'td_error': 1.3391193266519903, 'init_value': -8.981569290161133, 'ave_value': 1.428694836127127} step=9234
2022-04-20 18:06.43 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180504/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:06.46 [info     ] TD3PlusBC_20220420180504: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.0003237591849433051, 'time_algorithm_update': 0.008732711362559892, 'critic_loss': 4.254697878806912, 'actor_loss': 0.052728136990502564, 'time_step': 0.009138691495036521, 'td_error': 1.3900898836034015, 'init_value': -9.230571746826172, 'ave_value': 1.5395070176400445} step=9576
2022-04-20 18:06.46 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180504/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:06.50 [info     ] TD3PlusBC_20220420180504: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.0003212069907383612, 'time_algorithm_update': 0.008921273967675995, 'critic_loss': 4.631222977972867, 'actor_loss': 0.0867394662478514, 'time_step': 0.009323530726962619, 'td_error': 1.40872137218407, 'init_value': -9.89908218383789, 'ave_value': 1.4460729674679471} step=9918
2022-04-20 18:06.50 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180504/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:06.54 [info     ] TD3PlusBC_20220420180504: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.00032517226815920825, 'time_algorithm_update': 0.00882528957567717, 'critic_loss': 4.752436654958111, 'actor_loss': 0.08981989058311919, 'time_step': 0.00923237256836473, 'td_error': 1.4365342641082888, 'init_value': -10.377537727355957, 'ave_value': 1.4158532539141775} step=10260
2022-04-20 18:06.54 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180504/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:06.57 [info     ] TD3PlusBC_20220420180504: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.0003268140101293374, 'time_algorithm_update': 0.008420894020482114, 'critic_loss': 5.020632135240655, 'actor_loss': 0.09372937801288582, 'time_step': 0.00882807391428808, 'td_error': 1.4823075826989591, 'init_value': -10.594181060791016, 'ave_value': 1.5034521006636725} step=10602
2022-04-20 18:06.57 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180504/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:07.01 [info     ] TD3PlusBC_20220420180504: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.0003266885266666524, 'time_algorithm_update': 0.00900910402599134, 'critic_loss': 5.22956854592987, 'actor_loss': 0.09946174618967793, 'time_step': 0.00941901109371966, 'td_error': 1.5532748100361746, 'init_value': -10.864995956420898, 'ave_value': 1.5485275729035808} step=10944
2022-04-20 18:07.01 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180504/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:07.05 [info     ] TD3PlusBC_20220420180504: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.0003298541955780565, 'time_algorithm_update': 0.008959181127492447, 'critic_loss': 5.581595710843628, 'actor_loss': 0.1049124927679349, 'time_step': 0.009369522507427728, 'td_error': 1.5657201007495336, 'init_value': -11.352757453918457, 'ave_value': 1.5499254504719597} step=11286
2022-04-20 18:07.05 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180504/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:07.08 [info     ] TD3PlusBC_20220420180504: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.0003273654402348033, 'time_algorithm_update': 0.008969733589573911, 'critic_loss': 5.777593928122381, 'actor_loss': 0.08228188177995514, 'time_step': 0.009376970648068434, 'td_error': 1.6423903620547964, 'init_value': -11.575380325317383, 'ave_value': 1.6043811454018158} step=11628
2022-04-20 18:07.08 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180504/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:07.12 [info     ] TD3PlusBC_20220420180504: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.0003298479214049222, 'time_algorithm_update': 0.009006396371718735, 'critic_loss': 6.107419131443515, 'actor_loss': 0.08147941793353237, 'time_step': 0.009416933645281875, 'td_error': 1.6787627006372707, 'init_value': -12.125322341918945, 'ave_value': 1.6516470344467005} step=11970
2022-04-20 18:07.12 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180504/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:07.16 [info     ] TD3PlusBC_20220420180504: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.0003259244718049702, 'time_algorithm_update': 0.008411207394293177, 'critic_loss': 6.296007450561078, 'actor_loss': 0.12135046156264885, 'time_step': 0.008816150196811609, 'td_error': 1.6934154620071602, 'init_value': -12.695449829101562, 'ave_value': 1.6579886353806266} step=12312
2022-04-20 18:07.16 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180504/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:07.19 [info     ] TD3PlusBC_20220420180504: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.0003282236076934993, 'time_algorithm_update': 0.008932204971536559, 'critic_loss': 6.5278473796203125, 'actor_loss': 0.092812290731055, 'time_step': 0.009340107789513661, 'td_error': 1.7785457051491536, 'init_value': -12.562898635864258, 'ave_value': 1.8177385439614657} step=12654
2022-04-20 18:07.19 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180504/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:07.23 [info     ] TD3PlusBC_20220420180504: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.00032984583001387745, 'time_algorithm_update': 0.008941271151715552, 'critic_loss': 6.845241309955106, 'actor_loss': 0.10888947553017683, 'time_step': 0.009350276132773238, 'td_error': 1.7957417286713797, 'init_value': -13.199933052062988, 'ave_value': 1.7831165539805551} step=12996
2022-04-20 18:07.23 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180504/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:07.27 [info     ] TD3PlusBC_20220420180504: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.000316622661568268, 'time_algorithm_update': 0.008613451879624038, 'critic_loss': 6.941816884523247, 'actor_loss': 0.08790652558468935, 'time_step': 0.009009278308578401, 'td_error': 1.852089977116811, 'init_value': -13.402688980102539, 'ave_value': 1.80176904653392} step=13338
2022-04-20 18:07.27 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180504/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:07.30 [info     ] TD3PlusBC_20220420180504: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.0003246117753592151, 'time_algorithm_update': 0.00895001665193435, 'critic_loss': 7.40439285276926, 'actor_loss': 0.11585213289710514, 'time_step': 0.009356484078524406, 'td_error': 1.9176311632586969, 'init_value': -13.811912536621094, 'ave_value': 1.7713592257538626} step=13680
2022-04-20 18:07.30 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180504/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:07.34 [info     ] TD3PlusBC_20220420180504: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.0003251171948616965, 'time_algorithm_update': 0.008499434816907023, 'critic_loss': 7.567334686106409, 'actor_loss': 0.08767719455716903, 'time_step': 0.00890586599271897, 'td_error': 1.96195403645048, 'init_value': -14.800979614257812, 'ave_value': 1.8182558729580964} step=14022
2022-04-20 18:07.34 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180504/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:07.38 [info     ] TD3PlusBC_20220420180504: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.00032821315073827554, 'time_algorithm_update': 0.00901435899455645, 'critic_loss': 8.021871948799891, 'actor_loss': 0.10535111086584671, 'time_step': 0.009425061488012124, 'td_error': 2.0269195594763043, 'init_value': -14.241800308227539, 'ave_value': 1.9681554369286105} step=14364
2022-04-20 18:07.38 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180504/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:07.41 [info     ] TD3PlusBC_20220420180504: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.0003227560143721731, 'time_algorithm_update': 0.008855275243346453, 'critic_loss': 8.24740488591947, 'actor_loss': 0.0875902923793472, 'time_step': 0.009259163984778331, 'td_error': 2.085288541686003, 'init_value': -14.767626762390137, 'ave_value': 1.9748941087919112} step=14706
2022-04-20 18:07.41 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180504/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:07.45 [info     ] TD3PlusBC_20220420180504: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.00032543996621293633, 'time_algorithm_update': 0.008581685741045321, 'critic_loss': 8.588432951280248, 'actor_loss': 0.09209890665802342, 'time_step': 0.008988168504503038, 'td_error': 2.1030019141622844, 'init_value': -15.246831893920898, 'ave_value': 2.005790189959965} step=15048
2022-04-20 18:07.45 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180504/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:07.49 [info     ] TD3PlusBC_20220420180504: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.00032514508007562646, 'time_algorithm_update': 0.008855650299473813, 'critic_loss': 8.869099914679053, 'actor_loss': 0.0861949116775864, 'time_step': 0.009262072412591232, 'td_error': 2.183819977271655, 'init_value': -15.509908676147461, 'ave_value': 2.0114739566437296} step=15390
2022-04-20 18:07.49 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180504/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:07.52 [info     ] TD3PlusBC_20220420180504: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.0003216928905910916, 'time_algorithm_update': 0.00845600638473243, 'critic_loss': 9.179506905594764, 'actor_loss': 0.09199690082442691, 'time_step': 0.008860150973002115, 'td_error': 2.229381216506394, 'init_value': -15.951464653015137, 'ave_value': 2.0469742598705967} step=15732
2022-04-20 18:07.52 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180504/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:07.56 [info     ] TD3PlusBC_20220420180504: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.00031944255382694, 'time_algorithm_update': 0.008771410462451957, 'critic_loss': 9.47422046340697, 'actor_loss': 0.10437984936680013, 'time_step': 0.009171251665081894, 'td_error': 2.2952989802144717, 'init_value': -16.40066909790039, 'ave_value': 2.043015658587704} step=16074
2022-04-20 18:07.56 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180504/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:08.00 [info     ] TD3PlusBC_20220420180504: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.000321276006642838, 'time_algorithm_update': 0.00882159199631005, 'critic_loss': 9.780226365864625, 'actor_loss': 0.09785597706050203, 'time_step': 0.0092241178479111, 'td_error': 2.3535197990382124, 'init_value': -16.691234588623047, 'ave_value': 2.134285871828025} step=16416
2022-04-20 18:08.00 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180504/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:08.03 [info     ] TD3PlusBC_20220420180504: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.0003213519938507972, 'time_algorithm_update': 0.008580568241097077, 'critic_loss': 10.05100990631427, 'actor_loss': 0.1162136335862659, 'time_step': 0.00898326349537275, 'td_error': 2.416678978620776, 'init_value': -17.3680477142334, 'ave_value': 2.136422205118479} step=16758
2022-04-20 18:08.03 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180504/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:08.07 [info     ] TD3PlusBC_20220420180504: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.0003233395124736585, 'time_algorithm_update': 0.008814738507856402, 'critic_loss': 10.449450347158644, 'actor_loss': 0.10462531277485061, 'time_step': 0.009220278053952936, 'td_error': 2.4778103935883102, 'init_value': -17.403268814086914, 'ave_value': 2.1201803447005783} step=17100
2022-04-20 18:08.07 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420180504/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 102 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.28189108e-0

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 18:08.08 [info     ] FQE_20220420180807: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.0001678366258919957, 'time_algorithm_update': 0.004968535469239016, 'loss': 0.00623148775249944, 'time_step': 0.005216123109840485, 'init_value': -0.4458613395690918, 'ave_value': -0.4225674138614186, 'soft_opc': nan} step=166




2022-04-20 18:08.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180807/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:08.09 [info     ] FQE_20220420180807: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00016776050429746328, 'time_algorithm_update': 0.005053415355912174, 'loss': 0.004725656724613474, 'time_step': 0.005299276616199907, 'init_value': -0.604622483253479, 'ave_value': -0.507273578076615, 'soft_opc': nan} step=332




2022-04-20 18:08.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180807/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:08.10 [info     ] FQE_20220420180807: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00016283414450036474, 'time_algorithm_update': 0.004102276032229504, 'loss': 0.004198991831010544, 'time_step': 0.0043410206415567055, 'init_value': -0.6874459981918335, 'ave_value': -0.5502121196136819, 'soft_opc': nan} step=498




2022-04-20 18:08.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180807/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:08.11 [info     ] FQE_20220420180807: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00016884631421192582, 'time_algorithm_update': 0.005138438868235393, 'loss': 0.0041137601077803464, 'time_step': 0.00537921578051096, 'init_value': -0.7804965972900391, 'ave_value': -0.5901134335880612, 'soft_opc': nan} step=664




2022-04-20 18:08.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180807/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:08.12 [info     ] FQE_20220420180807: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00016976264585931617, 'time_algorithm_update': 0.005132666553359434, 'loss': 0.004073651121783418, 'time_step': 0.005373144724282874, 'init_value': -0.8567656874656677, 'ave_value': -0.6284250872293571, 'soft_opc': nan} step=830




2022-04-20 18:08.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180807/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:08.13 [info     ] FQE_20220420180807: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00017086712710828665, 'time_algorithm_update': 0.005087614059448242, 'loss': 0.0039041780793747627, 'time_step': 0.0053301388958850535, 'init_value': -0.934041440486908, 'ave_value': -0.6700516710745859, 'soft_opc': nan} step=996




2022-04-20 18:08.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180807/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:08.14 [info     ] FQE_20220420180807: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00017230194735239787, 'time_algorithm_update': 0.005062921937689723, 'loss': 0.0038787727840300873, 'time_step': 0.005309333284217191, 'init_value': -0.9992834329605103, 'ave_value': -0.6789939594094281, 'soft_opc': nan} step=1162




2022-04-20 18:08.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180807/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:08.15 [info     ] FQE_20220420180807: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00016848437757377164, 'time_algorithm_update': 0.005007491054305111, 'loss': 0.0036987874275410033, 'time_step': 0.005248743367482381, 'init_value': -1.0831921100616455, 'ave_value': -0.740495488483895, 'soft_opc': nan} step=1328




2022-04-20 18:08.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180807/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:08.16 [info     ] FQE_20220420180807: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00017013607254947526, 'time_algorithm_update': 0.005022731172033103, 'loss': 0.003661064034137112, 'time_step': 0.005264017955366388, 'init_value': -1.1411744356155396, 'ave_value': -0.7630232194119746, 'soft_opc': nan} step=1494




2022-04-20 18:08.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180807/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:08.17 [info     ] FQE_20220420180807: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.0001714789723775473, 'time_algorithm_update': 0.005024545163993376, 'loss': 0.003683682636312961, 'time_step': 0.00527022114719253, 'init_value': -1.23158860206604, 'ave_value': -0.8066604369865344, 'soft_opc': nan} step=1660




2022-04-20 18:08.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180807/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:08.18 [info     ] FQE_20220420180807: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.0001703716186155756, 'time_algorithm_update': 0.005096629441502583, 'loss': 0.003712013687136168, 'time_step': 0.005337934896170375, 'init_value': -1.2997775077819824, 'ave_value': -0.8426149972420823, 'soft_opc': nan} step=1826




2022-04-20 18:08.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180807/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:08.18 [info     ] FQE_20220420180807: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00016769012772893332, 'time_algorithm_update': 0.004285967493631753, 'loss': 0.0037343415448509425, 'time_step': 0.004524511027048869, 'init_value': -1.3620405197143555, 'ave_value': -0.8546790522632298, 'soft_opc': nan} step=1992




2022-04-20 18:08.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180807/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:08.19 [info     ] FQE_20220420180807: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00016969945057328925, 'time_algorithm_update': 0.005138582493885454, 'loss': 0.004038426808378917, 'time_step': 0.005380945033337696, 'init_value': -1.5155973434448242, 'ave_value': -0.9694650906532466, 'soft_opc': nan} step=2158




2022-04-20 18:08.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180807/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:08.20 [info     ] FQE_20220420180807: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00016924846603209713, 'time_algorithm_update': 0.005114562540169221, 'loss': 0.004080208158681551, 'time_step': 0.0053581243537994755, 'init_value': -1.6354413032531738, 'ave_value': -1.0684393437126198, 'soft_opc': nan} step=2324




2022-04-20 18:08.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180807/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:08.21 [info     ] FQE_20220420180807: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00016873715871787933, 'time_algorithm_update': 0.005069140928337373, 'loss': 0.0041625679818442065, 'time_step': 0.005314544022801411, 'init_value': -1.7350707054138184, 'ave_value': -1.1144116297632725, 'soft_opc': nan} step=2490




2022-04-20 18:08.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180807/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:08.22 [info     ] FQE_20220420180807: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00016903159130050475, 'time_algorithm_update': 0.005086562719689794, 'loss': 0.004472346985165636, 'time_step': 0.005333053060324795, 'init_value': -1.7940151691436768, 'ave_value': -1.1282149555178376, 'soft_opc': nan} step=2656




2022-04-20 18:08.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180807/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:08.23 [info     ] FQE_20220420180807: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00016839102090123188, 'time_algorithm_update': 0.004996615720082478, 'loss': 0.004676132608437911, 'time_step': 0.005237809146743223, 'init_value': -1.925426959991455, 'ave_value': -1.2020927478399899, 'soft_opc': nan} step=2822




2022-04-20 18:08.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180807/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:08.24 [info     ] FQE_20220420180807: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00016892099954995764, 'time_algorithm_update': 0.005054496857057135, 'loss': 0.005139761612082955, 'time_step': 0.0052958051842379285, 'init_value': -2.0240912437438965, 'ave_value': -1.264159777679959, 'soft_opc': nan} step=2988




2022-04-20 18:08.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180807/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:08.25 [info     ] FQE_20220420180807: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00016728223088275954, 'time_algorithm_update': 0.005096352243997964, 'loss': 0.005536585023026375, 'time_step': 0.005339855171111693, 'init_value': -2.166032314300537, 'ave_value': -1.3618458169873233, 'soft_opc': nan} step=3154




2022-04-20 18:08.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180807/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:08.26 [info     ] FQE_20220420180807: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.0001720506024647908, 'time_algorithm_update': 0.005158395652311394, 'loss': 0.006068412948074381, 'time_step': 0.005404167864696089, 'init_value': -2.2853035926818848, 'ave_value': -1.412197093672312, 'soft_opc': nan} step=3320




2022-04-20 18:08.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180807/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:08.27 [info     ] FQE_20220420180807: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00016645638339490775, 'time_algorithm_update': 0.004535369126193495, 'loss': 0.006273603697969432, 'time_step': 0.004774439765746335, 'init_value': -2.363243818283081, 'ave_value': -1.4622660334851292, 'soft_opc': nan} step=3486




2022-04-20 18:08.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180807/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:08.28 [info     ] FQE_20220420180807: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00016980142478483268, 'time_algorithm_update': 0.0048606223370655476, 'loss': 0.007148300059818584, 'time_step': 0.005102566925876112, 'init_value': -2.4946775436401367, 'ave_value': -1.5323895571065378, 'soft_opc': nan} step=3652




2022-04-20 18:08.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180807/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:08.29 [info     ] FQE_20220420180807: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.0001699464866913945, 'time_algorithm_update': 0.005107353968792651, 'loss': 0.0073580753218792155, 'time_step': 0.005351121167102492, 'init_value': -2.659466505050659, 'ave_value': -1.6552379382421842, 'soft_opc': nan} step=3818




2022-04-20 18:08.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180807/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:08.30 [info     ] FQE_20220420180807: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00016780071947948043, 'time_algorithm_update': 0.0051044182605054006, 'loss': 0.007795011470408504, 'time_step': 0.0053459764963173, 'init_value': -2.7006773948669434, 'ave_value': -1.6722436789322543, 'soft_opc': nan} step=3984




2022-04-20 18:08.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180807/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:08.31 [info     ] FQE_20220420180807: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00016879030020840196, 'time_algorithm_update': 0.0051476467086608154, 'loss': 0.008551606820077154, 'time_step': 0.005388814282704549, 'init_value': -2.854323387145996, 'ave_value': -1.7707727887072005, 'soft_opc': nan} step=4150




2022-04-20 18:08.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180807/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:08.32 [info     ] FQE_20220420180807: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00016980716981083513, 'time_algorithm_update': 0.005105505506676364, 'loss': 0.009423776047236949, 'time_step': 0.005349265523703702, 'init_value': -3.0531811714172363, 'ave_value': -1.8932936766730235, 'soft_opc': nan} step=4316




2022-04-20 18:08.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180807/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:08.33 [info     ] FQE_20220420180807: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00016958024128373847, 'time_algorithm_update': 0.004940225417355457, 'loss': 0.009816454855321222, 'time_step': 0.005182689931019243, 'init_value': -3.1309337615966797, 'ave_value': -1.9124739336202274, 'soft_opc': nan} step=4482




2022-04-20 18:08.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180807/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:08.34 [info     ] FQE_20220420180807: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00016917665320706656, 'time_algorithm_update': 0.005020177507975015, 'loss': 0.010683268009886953, 'time_step': 0.005264703049717179, 'init_value': -3.294748067855835, 'ave_value': -2.0138111002474757, 'soft_opc': nan} step=4648




2022-04-20 18:08.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180807/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:08.35 [info     ] FQE_20220420180807: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00016971812190779722, 'time_algorithm_update': 0.004974175648516919, 'loss': 0.011499064589474708, 'time_step': 0.005219450916152403, 'init_value': -3.4398646354675293, 'ave_value': -2.1001674651159896, 'soft_opc': nan} step=4814




2022-04-20 18:08.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180807/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:08.36 [info     ] FQE_20220420180807: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00017049944544413002, 'time_algorithm_update': 0.004817563367177205, 'loss': 0.012233751784611762, 'time_step': 0.00505826559411474, 'init_value': -3.558429002761841, 'ave_value': -2.170344933424447, 'soft_opc': nan} step=4980




2022-04-20 18:08.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180807/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:08.37 [info     ] FQE_20220420180807: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.0001641296478639166, 'time_algorithm_update': 0.004665124847228269, 'loss': 0.013291162117496582, 'time_step': 0.004899672715060682, 'init_value': -3.716487407684326, 'ave_value': -2.2908333656487163, 'soft_opc': nan} step=5146




2022-04-20 18:08.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180807/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:08.38 [info     ] FQE_20220420180807: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00016911920294704208, 'time_algorithm_update': 0.00510040392358619, 'loss': 0.014206787805544236, 'time_step': 0.005344161068100527, 'init_value': -3.880821466445923, 'ave_value': -2.4103211675141307, 'soft_opc': nan} step=5312




2022-04-20 18:08.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180807/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:08.39 [info     ] FQE_20220420180807: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00017100357147584478, 'time_algorithm_update': 0.005076836390667651, 'loss': 0.01445442990607481, 'time_step': 0.005320806101144078, 'init_value': -3.9212396144866943, 'ave_value': -2.4186222056682047, 'soft_opc': nan} step=5478




2022-04-20 18:08.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180807/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:08.40 [info     ] FQE_20220420180807: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.0001713942332440112, 'time_algorithm_update': 0.005048816462597215, 'loss': 0.015270966338695038, 'time_step': 0.005295420267495765, 'init_value': -4.070959091186523, 'ave_value': -2.4904438288630666, 'soft_opc': nan} step=5644




2022-04-20 18:08.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180807/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:08.41 [info     ] FQE_20220420180807: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00017070913889321936, 'time_algorithm_update': 0.005083082670188812, 'loss': 0.015764192929799973, 'time_step': 0.005326990621635713, 'init_value': -4.123473644256592, 'ave_value': -2.5108670808442004, 'soft_opc': nan} step=5810




2022-04-20 18:08.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180807/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:08.41 [info     ] FQE_20220420180807: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00017355436302093138, 'time_algorithm_update': 0.005120202719447124, 'loss': 0.016622894698843718, 'time_step': 0.005371975611491376, 'init_value': -4.188260078430176, 'ave_value': -2.5428427884036355, 'soft_opc': nan} step=5976




2022-04-20 18:08.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180807/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:08.42 [info     ] FQE_20220420180807: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00016906893396952065, 'time_algorithm_update': 0.005096240215990917, 'loss': 0.017202458439776337, 'time_step': 0.005341086042932717, 'init_value': -4.394203186035156, 'ave_value': -2.6755203402391423, 'soft_opc': nan} step=6142




2022-04-20 18:08.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180807/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:08.43 [info     ] FQE_20220420180807: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.0001683335706412074, 'time_algorithm_update': 0.005054723785584231, 'loss': 0.018298874792210894, 'time_step': 0.005296826362609863, 'init_value': -4.463330268859863, 'ave_value': -2.710392695880151, 'soft_opc': nan} step=6308




2022-04-20 18:08.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180807/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:08.44 [info     ] FQE_20220420180807: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00017173606229115682, 'time_algorithm_update': 0.004900521542652544, 'loss': 0.018818430944783502, 'time_step': 0.005146657127931893, 'init_value': -4.544337272644043, 'ave_value': -2.760338780794058, 'soft_opc': nan} step=6474




2022-04-20 18:08.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180807/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:08.45 [info     ] FQE_20220420180807: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00016603986900973032, 'time_algorithm_update': 0.00441990846610931, 'loss': 0.018785970885143895, 'time_step': 0.004659929907465556, 'init_value': -4.640257835388184, 'ave_value': -2.7966232846180596, 'soft_opc': nan} step=6640




2022-04-20 18:08.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180807/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:08.46 [info     ] FQE_20220420180807: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.0001736290483589632, 'time_algorithm_update': 0.005098836967744023, 'loss': 0.01994375885093966, 'time_step': 0.0053509559976049215, 'init_value': -4.686821937561035, 'ave_value': -2.8138410096501443, 'soft_opc': nan} step=6806




2022-04-20 18:08.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180807/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:08.47 [info     ] FQE_20220420180807: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00017070770263671875, 'time_algorithm_update': 0.005041752953127206, 'loss': 0.01965805518031345, 'time_step': 0.0052865657461694925, 'init_value': -4.736327171325684, 'ave_value': -2.8284266663161484, 'soft_opc': nan} step=6972




2022-04-20 18:08.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180807/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:08.48 [info     ] FQE_20220420180807: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00016885205923792827, 'time_algorithm_update': 0.005064168608332255, 'loss': 0.020379904534875315, 'time_step': 0.005308421261339302, 'init_value': -4.905466079711914, 'ave_value': -2.958046751617043, 'soft_opc': nan} step=7138




2022-04-20 18:08.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180807/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:08.49 [info     ] FQE_20220420180807: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00017047933785312147, 'time_algorithm_update': 0.005081639232405697, 'loss': 0.02135551726012154, 'time_step': 0.0053245791469711855, 'init_value': -5.00645637512207, 'ave_value': -3.011965044117994, 'soft_opc': nan} step=7304




2022-04-20 18:08.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180807/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:08.50 [info     ] FQE_20220420180807: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00016707540994667145, 'time_algorithm_update': 0.005119032170399126, 'loss': 0.022390176403246462, 'time_step': 0.005360765629504101, 'init_value': -5.043489456176758, 'ave_value': -3.026518115293872, 'soft_opc': nan} step=7470




2022-04-20 18:08.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180807/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:08.51 [info     ] FQE_20220420180807: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00016788114984351467, 'time_algorithm_update': 0.005083424499235958, 'loss': 0.022908709520412362, 'time_step': 0.0053229877747685075, 'init_value': -5.200443267822266, 'ave_value': -3.1449086799546406, 'soft_opc': nan} step=7636




2022-04-20 18:08.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180807/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:08.52 [info     ] FQE_20220420180807: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00017446207712931805, 'time_algorithm_update': 0.005003835781511054, 'loss': 0.02300304463077112, 'time_step': 0.005248323980584202, 'init_value': -5.318976402282715, 'ave_value': -3.197463885573922, 'soft_opc': nan} step=7802




2022-04-20 18:08.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180807/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:08.53 [info     ] FQE_20220420180807: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.000173022948115705, 'time_algorithm_update': 0.00508582304759198, 'loss': 0.0240013065238775, 'time_step': 0.00533580923654947, 'init_value': -5.4708380699157715, 'ave_value': -3.2984271190590686, 'soft_opc': nan} step=7968




2022-04-20 18:08.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180807/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:08.54 [info     ] FQE_20220420180807: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.0001651723700833608, 'time_algorithm_update': 0.004221950668886483, 'loss': 0.024493240904910153, 'time_step': 0.004459378231002624, 'init_value': -5.579463005065918, 'ave_value': -3.3778975771421247, 'soft_opc': nan} step=8134




2022-04-20 18:08.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180807/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:08.55 [info     ] FQE_20220420180807: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00017206640128629753, 'time_algorithm_update': 0.005090150488428323, 'loss': 0.025228468074985074, 'time_step': 0.005339957145323236, 'init_value': -5.625576972961426, 'ave_value': -3.423315660789743, 'soft_opc': nan} step=8300




2022-04-20 18:08.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180807/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.43210892e-01 -3.25999953e-02
 -1.38000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.28049971e-01  6.00000000e-01  4.67532035e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.87189108e-01  2.46000047e-02
 -8.00013420e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.61927906e-02  2.08952959e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.53789108e-01  1.32000047e-02
  8.79998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -9.71586383e-02 -6.00000000e-01 -5.33093061e-02]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.39489108e-01 -4.75999953e-02
 -9.30001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  1.41298638e-01  5.43892365e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.9573146

2022-04-20 18:08.55 [info     ] Directory is created at d3rlpy_logs/FQE_20220420180855
2022-04-20 18:08.55 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 18:08.55 [debug    ] Building models...
2022-04-20 18:08.55 [debug    ] Models have been built.
2022-04-20 18:08.55 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420180855/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 18:08.57 [info     ] FQE_20220420180855: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.0001675105372140574, 'time_algorithm_update': 0.005076067392216172, 'loss': 0.030079500170337947, 'time_step': 0.0053177330383034635, 'init_value': -1.2259063720703125, 'ave_value': -1.1977965750672797, 'soft_opc': nan} step=344




2022-04-20 18:08.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180855/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:08.59 [info     ] FQE_20220420180855: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00016885510710782782, 'time_algorithm_update': 0.005070023065389589, 'loss': 0.02471067977603525, 'time_step': 0.00531262852424799, 'init_value': -1.7880691289901733, 'ave_value': -1.775878004850568, 'soft_opc': nan} step=688




2022-04-20 18:08.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180855/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:09.01 [info     ] FQE_20220420180855: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00017039512479028037, 'time_algorithm_update': 0.005094607209050378, 'loss': 0.0282047214067736, 'time_step': 0.005342694909073586, 'init_value': -2.6928091049194336, 'ave_value': -2.6690427199744424, 'soft_opc': nan} step=1032




2022-04-20 18:09.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180855/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:09.03 [info     ] FQE_20220420180855: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00016994878303172977, 'time_algorithm_update': 0.004554722891297451, 'loss': 0.03284603681112098, 'time_step': 0.004798968864041705, 'init_value': -3.098926544189453, 'ave_value': -3.1005474830365127, 'soft_opc': nan} step=1376




2022-04-20 18:09.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180855/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:09.05 [info     ] FQE_20220420180855: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00016552695008211358, 'time_algorithm_update': 0.005031699358030807, 'loss': 0.042754240696816596, 'time_step': 0.005272559648336366, 'init_value': -3.7530934810638428, 'ave_value': -3.8081010945141314, 'soft_opc': nan} step=1720




2022-04-20 18:09.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180855/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:09.07 [info     ] FQE_20220420180855: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.0001689140186753384, 'time_algorithm_update': 0.005059407200924185, 'loss': 0.05255705312565835, 'time_step': 0.005306627861289091, 'init_value': -4.143427848815918, 'ave_value': -4.270337197174914, 'soft_opc': nan} step=2064




2022-04-20 18:09.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180855/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:09.09 [info     ] FQE_20220420180855: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00016951630281847576, 'time_algorithm_update': 0.005064709242000137, 'loss': 0.06502907725354266, 'time_step': 0.005309476408847543, 'init_value': -4.616159439086914, 'ave_value': -4.8562452942741245, 'soft_opc': nan} step=2408




2022-04-20 18:09.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180855/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:09.11 [info     ] FQE_20220420180855: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.000174109325852505, 'time_algorithm_update': 0.004893429750619933, 'loss': 0.0810476788292574, 'time_step': 0.005143934211065603, 'init_value': -4.9983720779418945, 'ave_value': -5.361825867689861, 'soft_opc': nan} step=2752




2022-04-20 18:09.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180855/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:09.13 [info     ] FQE_20220420180855: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.0001701317554296449, 'time_algorithm_update': 0.004941408024277798, 'loss': 0.09776759046389787, 'time_step': 0.005186508561289588, 'init_value': -5.154415130615234, 'ave_value': -5.655066098183024, 'soft_opc': nan} step=3096




2022-04-20 18:09.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180855/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:09.15 [info     ] FQE_20220420180855: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.0001724667327348576, 'time_algorithm_update': 0.005104741384816724, 'loss': 0.1175224118497829, 'time_step': 0.005354784948881282, 'init_value': -5.330663681030273, 'ave_value': -6.032522946348842, 'soft_opc': nan} step=3440




2022-04-20 18:09.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180855/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:09.17 [info     ] FQE_20220420180855: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.0001726095066514126, 'time_algorithm_update': 0.005058644815932873, 'loss': 0.13524371403984206, 'time_step': 0.005306095577949701, 'init_value': -5.707095623016357, 'ave_value': -6.6538495395773065, 'soft_opc': nan} step=3784




2022-04-20 18:09.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180855/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:09.19 [info     ] FQE_20220420180855: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.0001726088135741478, 'time_algorithm_update': 0.005080576552901157, 'loss': 0.1554674015997714, 'time_step': 0.00532680819200915, 'init_value': -5.886072158813477, 'ave_value': -7.162618105412201, 'soft_opc': nan} step=4128




2022-04-20 18:09.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180855/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:09.20 [info     ] FQE_20220420180855: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00016589982565059217, 'time_algorithm_update': 0.004665393468945525, 'loss': 0.17438905245927705, 'time_step': 0.0049046444338421485, 'init_value': -5.811485290527344, 'ave_value': -7.359721437719575, 'soft_opc': nan} step=4472




2022-04-20 18:09.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180855/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:09.22 [info     ] FQE_20220420180855: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00016989264377327852, 'time_algorithm_update': 0.005108258751935737, 'loss': 0.19231777877525189, 'time_step': 0.00535365107447602, 'init_value': -6.219311237335205, 'ave_value': -8.01207901466793, 'soft_opc': nan} step=4816




2022-04-20 18:09.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180855/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:09.24 [info     ] FQE_20220420180855: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.0001702925493550855, 'time_algorithm_update': 0.005117455887240033, 'loss': 0.21235945719012686, 'time_step': 0.005363058905268825, 'init_value': -6.053897380828857, 'ave_value': -8.051402743192188, 'soft_opc': nan} step=5160




2022-04-20 18:09.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180855/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:09.26 [info     ] FQE_20220420180855: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00017421190128769984, 'time_algorithm_update': 0.005109778670377509, 'loss': 0.22916950084009144, 'time_step': 0.005357872608096101, 'init_value': -6.035512924194336, 'ave_value': -8.21036080740002, 'soft_opc': nan} step=5504




2022-04-20 18:09.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180855/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:09.28 [info     ] FQE_20220420180855: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00017359159713567688, 'time_algorithm_update': 0.0049713281698005145, 'loss': 0.24123026350469784, 'time_step': 0.005218390808549038, 'init_value': -6.093498706817627, 'ave_value': -8.517288227739895, 'soft_opc': nan} step=5848




2022-04-20 18:09.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180855/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:09.30 [info     ] FQE_20220420180855: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.000174684579982314, 'time_algorithm_update': 0.00502454680065776, 'loss': 0.25392285828630246, 'time_step': 0.0052738051081812655, 'init_value': -6.118830680847168, 'ave_value': -8.702978205763701, 'soft_opc': nan} step=6192




2022-04-20 18:09.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180855/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:09.32 [info     ] FQE_20220420180855: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.000175296567207159, 'time_algorithm_update': 0.005111097596412481, 'loss': 0.2680408501142073, 'time_step': 0.0053630935591320655, 'init_value': -6.3216552734375, 'ave_value': -9.119987488435036, 'soft_opc': nan} step=6536




2022-04-20 18:09.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180855/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:09.34 [info     ] FQE_20220420180855: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00017221861107404843, 'time_algorithm_update': 0.005039319742557614, 'loss': 0.27645228419799445, 'time_step': 0.005287370016408521, 'init_value': -6.236174583435059, 'ave_value': -9.153313803536868, 'soft_opc': nan} step=6880




2022-04-20 18:09.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180855/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:09.36 [info     ] FQE_20220420180855: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.0001728874306346095, 'time_algorithm_update': 0.0051546277001846665, 'loss': 0.28589769913598373, 'time_step': 0.005403374516686728, 'init_value': -6.445003032684326, 'ave_value': -9.458886245828591, 'soft_opc': nan} step=7224




2022-04-20 18:09.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180855/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:09.38 [info     ] FQE_20220420180855: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00017107572666434355, 'time_algorithm_update': 0.00460278918576795, 'loss': 0.2979591306688827, 'time_step': 0.00485080688498741, 'init_value': -6.8441081047058105, 'ave_value': -9.868963296360521, 'soft_opc': nan} step=7568




2022-04-20 18:09.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180855/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:09.40 [info     ] FQE_20220420180855: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00016654022904329522, 'time_algorithm_update': 0.0050131997396779615, 'loss': 0.30613404390033944, 'time_step': 0.005255075388176497, 'init_value': -6.93264102935791, 'ave_value': -9.898033043195133, 'soft_opc': nan} step=7912




2022-04-20 18:09.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180855/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:09.42 [info     ] FQE_20220420180855: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00017334139624307322, 'time_algorithm_update': 0.005081065865450127, 'loss': 0.31660714814287805, 'time_step': 0.005332680635674055, 'init_value': -7.040892124176025, 'ave_value': -10.08180102237903, 'soft_opc': nan} step=8256




2022-04-20 18:09.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180855/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:09.44 [info     ] FQE_20220420180855: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00017079295114029284, 'time_algorithm_update': 0.005055517651313959, 'loss': 0.3186659173617616, 'time_step': 0.005305285370627115, 'init_value': -6.982967376708984, 'ave_value': -10.012160179709705, 'soft_opc': nan} step=8600




2022-04-20 18:09.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180855/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:09.46 [info     ] FQE_20220420180855: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00017221445261045943, 'time_algorithm_update': 0.004889976839686549, 'loss': 0.3225739248919972, 'time_step': 0.0051404605078142745, 'init_value': -7.24887228012085, 'ave_value': -10.291073767762832, 'soft_opc': nan} step=8944




2022-04-20 18:09.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180855/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:09.48 [info     ] FQE_20220420180855: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.0001709918643152991, 'time_algorithm_update': 0.004942507937897083, 'loss': 0.32698773669057285, 'time_step': 0.005189766024434289, 'init_value': -7.474059581756592, 'ave_value': -10.438313275671707, 'soft_opc': nan} step=9288




2022-04-20 18:09.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180855/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:09.50 [info     ] FQE_20220420180855: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00017219435336977937, 'time_algorithm_update': 0.005102097295051397, 'loss': 0.33555135132506664, 'time_step': 0.005348412796508434, 'init_value': -7.838647842407227, 'ave_value': -10.970821420989326, 'soft_opc': nan} step=9632




2022-04-20 18:09.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180855/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:09.52 [info     ] FQE_20220420180855: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.0001702121523923652, 'time_algorithm_update': 0.005108759846798209, 'loss': 0.3363061186933327, 'time_step': 0.005355133566745492, 'init_value': -7.990037441253662, 'ave_value': -11.089325616025441, 'soft_opc': nan} step=9976




2022-04-20 18:09.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180855/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:09.54 [info     ] FQE_20220420180855: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00017457576685173568, 'time_algorithm_update': 0.00503099657768427, 'loss': 0.3393508020387746, 'time_step': 0.005284306614897972, 'init_value': -8.185203552246094, 'ave_value': -11.29876366833745, 'soft_opc': nan} step=10320




2022-04-20 18:09.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180855/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:09.56 [info     ] FQE_20220420180855: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00016730400018913802, 'time_algorithm_update': 0.004589043384374574, 'loss': 0.34237741539254785, 'time_step': 0.004832560239836227, 'init_value': -8.52896499633789, 'ave_value': -11.712829524022528, 'soft_opc': nan} step=10664




2022-04-20 18:09.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180855/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:09.58 [info     ] FQE_20220420180855: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00017790669618650924, 'time_algorithm_update': 0.0051301322704137755, 'loss': 0.3419491494572613, 'time_step': 0.0053864308567934255, 'init_value': -8.484481811523438, 'ave_value': -11.566311976182073, 'soft_opc': nan} step=11008




2022-04-20 18:09.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180855/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:10.00 [info     ] FQE_20220420180855: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00017535478569740472, 'time_algorithm_update': 0.0050803755604943565, 'loss': 0.34429711987112843, 'time_step': 0.0053340265917223555, 'init_value': -8.687295913696289, 'ave_value': -11.777540726165354, 'soft_opc': nan} step=11352




2022-04-20 18:10.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180855/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:10.02 [info     ] FQE_20220420180855: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.0001756278581397478, 'time_algorithm_update': 0.00502303866452949, 'loss': 0.3457827819078121, 'time_step': 0.005276150481645451, 'init_value': -8.989295959472656, 'ave_value': -11.978992057586643, 'soft_opc': nan} step=11696




2022-04-20 18:10.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180855/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:10.03 [info     ] FQE_20220420180855: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00017340030781058378, 'time_algorithm_update': 0.004939758500387502, 'loss': 0.34906907573472273, 'time_step': 0.005188223927520042, 'init_value': -9.23537826538086, 'ave_value': -12.231248275346651, 'soft_opc': nan} step=12040




2022-04-20 18:10.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180855/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:10.05 [info     ] FQE_20220420180855: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.0001750505247781443, 'time_algorithm_update': 0.0049130105695059136, 'loss': 0.3482071222889042, 'time_step': 0.0051665777383848675, 'init_value': -9.436128616333008, 'ave_value': -12.415856935928979, 'soft_opc': nan} step=12384




2022-04-20 18:10.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180855/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:10.07 [info     ] FQE_20220420180855: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00017049354176188624, 'time_algorithm_update': 0.005058712737504826, 'loss': 0.350565858093242, 'time_step': 0.005304672690325005, 'init_value': -9.632822036743164, 'ave_value': -12.591439668744433, 'soft_opc': nan} step=12728




2022-04-20 18:10.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180855/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:10.09 [info     ] FQE_20220420180855: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00017432625903639684, 'time_algorithm_update': 0.005033891561419465, 'loss': 0.35476377766666023, 'time_step': 0.0052850502868031345, 'init_value': -9.91077995300293, 'ave_value': -12.818873783862763, 'soft_opc': nan} step=13072




2022-04-20 18:10.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180855/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:10.11 [info     ] FQE_20220420180855: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.0001737523910611175, 'time_algorithm_update': 0.005021417556807052, 'loss': 0.3577686964566711, 'time_step': 0.005271976077279379, 'init_value': -9.77386474609375, 'ave_value': -12.75425839204895, 'soft_opc': nan} step=13416




2022-04-20 18:10.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180855/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:10.13 [info     ] FQE_20220420180855: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00017191850861837698, 'time_algorithm_update': 0.004708642876425455, 'loss': 0.3556238740281917, 'time_step': 0.004956255125444989, 'init_value': -10.018006324768066, 'ave_value': -12.860178392788663, 'soft_opc': nan} step=13760




2022-04-20 18:10.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180855/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:10.15 [info     ] FQE_20220420180855: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00017310505689576615, 'time_algorithm_update': 0.0050263363261555515, 'loss': 0.3623660119943494, 'time_step': 0.005277252474496531, 'init_value': -10.465963363647461, 'ave_value': -13.148617440227184, 'soft_opc': nan} step=14104




2022-04-20 18:10.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180855/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:10.17 [info     ] FQE_20220420180855: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00016993006994557935, 'time_algorithm_update': 0.005114190107168153, 'loss': 0.3598196708900464, 'time_step': 0.005360906207284262, 'init_value': -10.389711380004883, 'ave_value': -12.958191060047463, 'soft_opc': nan} step=14448




2022-04-20 18:10.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180855/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:10.19 [info     ] FQE_20220420180855: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00016854668772497841, 'time_algorithm_update': 0.005039761232775311, 'loss': 0.3526590269946948, 'time_step': 0.005285168109938156, 'init_value': -10.18639850616455, 'ave_value': -12.753465067620834, 'soft_opc': nan} step=14792




2022-04-20 18:10.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180855/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:10.21 [info     ] FQE_20220420180855: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00016973947369775108, 'time_algorithm_update': 0.004945800747982291, 'loss': 0.34374301498864107, 'time_step': 0.00518990671911905, 'init_value': -10.357397079467773, 'ave_value': -12.743322987156409, 'soft_opc': nan} step=15136




2022-04-20 18:10.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180855/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:10.23 [info     ] FQE_20220420180855: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00017128572907558706, 'time_algorithm_update': 0.004860622245211934, 'loss': 0.3418214728914981, 'time_step': 0.005108325287353161, 'init_value': -10.44338607788086, 'ave_value': -12.836654540684142, 'soft_opc': nan} step=15480




2022-04-20 18:10.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180855/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:10.25 [info     ] FQE_20220420180855: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.0001714839491733285, 'time_algorithm_update': 0.005082830440166385, 'loss': 0.34436414370226653, 'time_step': 0.005330543878466584, 'init_value': -10.875144004821777, 'ave_value': -13.171251527025776, 'soft_opc': nan} step=15824




2022-04-20 18:10.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180855/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:10.27 [info     ] FQE_20220420180855: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.000170184429301772, 'time_algorithm_update': 0.0050545182338980746, 'loss': 0.3402762318890932, 'time_step': 0.005301824835843818, 'init_value': -11.25661563873291, 'ave_value': -13.618648845818615, 'soft_opc': nan} step=16168




2022-04-20 18:10.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180855/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:10.29 [info     ] FQE_20220420180855: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00017127463983934978, 'time_algorithm_update': 0.005017995141273321, 'loss': 0.3424630577210337, 'time_step': 0.005262981320536414, 'init_value': -11.56295108795166, 'ave_value': -14.069449594089141, 'soft_opc': nan} step=16512




2022-04-20 18:10.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180855/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:10.31 [info     ] FQE_20220420180855: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00017075552496799204, 'time_algorithm_update': 0.004626827184544053, 'loss': 0.3426101678240568, 'time_step': 0.00487387734790181, 'init_value': -11.637677192687988, 'ave_value': -14.09911500201141, 'soft_opc': nan} step=16856




2022-04-20 18:10.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180855/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:10.33 [info     ] FQE_20220420180855: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.0001762100430422051, 'time_algorithm_update': 0.005063054866568987, 'loss': 0.3465257590679928, 'time_step': 0.005316299061442531, 'init_value': -11.798235893249512, 'ave_value': -14.264269190787564, 'soft_opc': nan} step=17200




2022-04-20 18:10.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180855/model_17200.pt
search iteration:  32
using hyper params:  [0.0007290075161751956, 0.007462917854184908, 8.603224774483691e-05, 3]
2022-04-20 18:10.33 [debug    ] RoundIterator is selected.
2022-04-20 18:10.33 [info     ] Directory is created at d3rlpy_logs/TD3PlusBC_20220420181033
2022-04-20 18:10.33 [debug    ] Fitting scaler...              scaler=standard
2022-04-20 18:10.33 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 18:10.33 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 18:10.33 [info     ] Parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181033/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0007290075161

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:10.37 [info     ] TD3PlusBC_20220420181033: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.0003472510834186398, 'time_algorithm_update': 0.008798325968067549, 'critic_loss': 2.2878116823253576, 'actor_loss': 2.4398815827062954, 'time_step': 0.009225330157586706, 'td_error': 0.8206959184995077, 'init_value': -4.416987895965576, 'ave_value': -2.4687990159879707} step=342
2022-04-20 18:10.37 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181033/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:10.40 [info     ] TD3PlusBC_20220420181033: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.00035082039080168074, 'time_algorithm_update': 0.008370110863133481, 'critic_loss': 1.2078048091882851, 'actor_loss': 2.2958430365512243, 'time_step': 0.008797280969675522, 'td_error': 0.8577572581297449, 'init_value': -6.339597225189209, 'ave_value': -3.5624757876598303} step=684
2022-04-20 18:10.40 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181033/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:10.44 [info     ] TD3PlusBC_20220420181033: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.00035037283311810407, 'time_algorithm_update': 0.00887204959378605, 'critic_loss': 1.7817455671335523, 'actor_loss': 2.2940860463861834, 'time_step': 0.00929776548642164, 'td_error': 0.9235380193806648, 'init_value': -8.350191116333008, 'ave_value': -4.7005407634415235} step=1026
2022-04-20 18:10.44 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181033/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:10.48 [info     ] TD3PlusBC_20220420181033: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.0003570994438483701, 'time_algorithm_update': 0.008928476718434115, 'critic_loss': 2.4814965034786023, 'actor_loss': 2.291151119254486, 'time_step': 0.009365292320474547, 'td_error': 1.0142019724154263, 'init_value': -10.47754192352295, 'ave_value': -5.854120358601003} step=1368
2022-04-20 18:10.48 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181033/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:10.51 [info     ] TD3PlusBC_20220420181033: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.00035407877804940205, 'time_algorithm_update': 0.008474554931908324, 'critic_loss': 3.3434122349435125, 'actor_loss': 2.2970225978315923, 'time_step': 0.008906759713825426, 'td_error': 1.12401633352461, 'init_value': -12.669821739196777, 'ave_value': -7.2159793256128815} step=1710
2022-04-20 18:10.51 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181033/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:10.55 [info     ] TD3PlusBC_20220420181033: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.00035948362963938573, 'time_algorithm_update': 0.008831875366077088, 'critic_loss': 4.389720528097878, 'actor_loss': 2.2975144288693254, 'time_step': 0.009270348744085657, 'td_error': 1.2661926325203403, 'init_value': -14.801895141601562, 'ave_value': -8.372029852601699} step=2052
2022-04-20 18:10.55 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181033/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:10.58 [info     ] TD3PlusBC_20220420181033: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.00035129165091709783, 'time_algorithm_update': 0.008487393284401698, 'critic_loss': 5.2984674147695126, 'actor_loss': 2.291552518543444, 'time_step': 0.008915819619831285, 'td_error': 1.4299931820664584, 'init_value': -17.012935638427734, 'ave_value': -9.563491930435244} step=2394
2022-04-20 18:10.58 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181033/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:11.02 [info     ] TD3PlusBC_20220420181033: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.0003592242971498367, 'time_algorithm_update': 0.008944226984392133, 'critic_loss': 6.56409143843846, 'actor_loss': 2.2952004134306434, 'time_step': 0.009383467902914125, 'td_error': 1.5869127901448414, 'init_value': -18.8378963470459, 'ave_value': -10.76847735297518} step=2736
2022-04-20 18:11.02 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181033/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:11.06 [info     ] TD3PlusBC_20220420181033: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.000356633760775739, 'time_algorithm_update': 0.008957609795687492, 'critic_loss': 7.902418211189627, 'actor_loss': 2.291729552006861, 'time_step': 0.009392775290193613, 'td_error': 1.719586272822724, 'init_value': -21.1129207611084, 'ave_value': -12.011383429437473} step=3078
2022-04-20 18:11.06 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181033/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:11.10 [info     ] TD3PlusBC_20220420181033: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.0003527040370026527, 'time_algorithm_update': 0.008587625291612413, 'critic_loss': 9.177484266939219, 'actor_loss': 2.2941795998846577, 'time_step': 0.00901855781064396, 'td_error': 1.9079422238083912, 'init_value': -23.22358512878418, 'ave_value': -13.067566795584638} step=3420
2022-04-20 18:11.10 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181033/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:11.13 [info     ] TD3PlusBC_20220420181033: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.00035432695645337914, 'time_algorithm_update': 0.0088104964696873, 'critic_loss': 10.665396998500267, 'actor_loss': 2.290198593808894, 'time_step': 0.009238949296070121, 'td_error': 2.1224242890973675, 'init_value': -25.12353515625, 'ave_value': -14.229188633673061} step=3762
2022-04-20 18:11.13 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181033/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:11.17 [info     ] TD3PlusBC_20220420181033: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.0003520661627340038, 'time_algorithm_update': 0.008706863163507472, 'critic_loss': 12.495433408614488, 'actor_loss': 2.291871848859285, 'time_step': 0.009127892945942125, 'td_error': 2.2356891887796797, 'init_value': -27.793140411376953, 'ave_value': -15.431992280468647} step=4104
2022-04-20 18:11.17 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181033/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:11.20 [info     ] TD3PlusBC_20220420181033: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.0003584930074145222, 'time_algorithm_update': 0.008976761360614621, 'critic_loss': 14.210244938644053, 'actor_loss': 2.2895046228553815, 'time_step': 0.009408086364032233, 'td_error': 2.509517938957627, 'init_value': -28.6992130279541, 'ave_value': -16.41794535336932} step=4446
2022-04-20 18:11.21 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181033/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:11.24 [info     ] TD3PlusBC_20220420181033: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.0003528462515936957, 'time_algorithm_update': 0.008984712132236413, 'critic_loss': 16.182030224660682, 'actor_loss': 2.295128812566835, 'time_step': 0.009415892829671937, 'td_error': 2.668966045869097, 'init_value': -31.343231201171875, 'ave_value': -17.479348416914828} step=4788
2022-04-20 18:11.24 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181033/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:11.28 [info     ] TD3PlusBC_20220420181033: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.00035489930046929256, 'time_algorithm_update': 0.008448216650221083, 'critic_loss': 18.099352892379315, 'actor_loss': 2.295344023676644, 'time_step': 0.00887522920530442, 'td_error': 2.931961992563238, 'init_value': -32.39117431640625, 'ave_value': -18.53814935678717} step=5130
2022-04-20 18:11.28 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181033/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:11.32 [info     ] TD3PlusBC_20220420181033: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.0003543443847120854, 'time_algorithm_update': 0.00885626516844097, 'critic_loss': 20.010888874879356, 'actor_loss': 2.2932039380770677, 'time_step': 0.009282852474011873, 'td_error': 3.1881545839256473, 'init_value': -35.917083740234375, 'ave_value': -19.7730351432837} step=5472
2022-04-20 18:11.32 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181033/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:11.35 [info     ] TD3PlusBC_20220420181033: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.00035066283934297616, 'time_algorithm_update': 0.00871467660045066, 'critic_loss': 22.246877093064157, 'actor_loss': 2.2934524594691763, 'time_step': 0.009136748592755949, 'td_error': 3.277400703520696, 'init_value': -35.74296188354492, 'ave_value': -20.391083264914553} step=5814
2022-04-20 18:11.35 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181033/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:11.39 [info     ] TD3PlusBC_20220420181033: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.0003508482760156107, 'time_algorithm_update': 0.00898169007217675, 'critic_loss': 24.520105041258518, 'actor_loss': 2.2967429997628197, 'time_step': 0.009403992117496958, 'td_error': 3.567052508849224, 'init_value': -38.393470764160156, 'ave_value': -21.442183162355544} step=6156
2022-04-20 18:11.39 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181033/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:11.43 [info     ] TD3PlusBC_20220420181033: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.00035342417265239517, 'time_algorithm_update': 0.008957572847779035, 'critic_loss': 26.82659125746342, 'actor_loss': 2.291343352948016, 'time_step': 0.009385520254659373, 'td_error': 3.7801836746532653, 'init_value': -39.78273010253906, 'ave_value': -22.42611550665641} step=6498
2022-04-20 18:11.43 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181033/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:11.46 [info     ] TD3PlusBC_20220420181033: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.00035681710605732877, 'time_algorithm_update': 0.008411168354993675, 'critic_loss': 29.24716025625753, 'actor_loss': 2.2920230118154783, 'time_step': 0.008840422184146636, 'td_error': 3.968073442015697, 'init_value': -40.58366394042969, 'ave_value': -23.18137359063376} step=6840
2022-04-20 18:11.46 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181033/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:11.50 [info     ] TD3PlusBC_20220420181033: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.0003573030059100592, 'time_algorithm_update': 0.00894272187997026, 'critic_loss': 31.90364568275318, 'actor_loss': 2.2927147951739575, 'time_step': 0.009377063366404752, 'td_error': 4.350898811957198, 'init_value': -43.553016662597656, 'ave_value': -24.388135791652424} step=7182
2022-04-20 18:11.50 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181033/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:11.54 [info     ] TD3PlusBC_20220420181033: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.0003603097291020622, 'time_algorithm_update': 0.008852133973997239, 'critic_loss': 34.30580321808308, 'actor_loss': 2.296849300986842, 'time_step': 0.009288007055806834, 'td_error': 4.274621527505108, 'init_value': -42.91203689575195, 'ave_value': -24.818506841235248} step=7524
2022-04-20 18:11.54 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181033/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:11.57 [info     ] TD3PlusBC_20220420181033: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.0003586972666065595, 'time_algorithm_update': 0.008617647907190155, 'critic_loss': 36.86321295911109, 'actor_loss': 2.2944786576499716, 'time_step': 0.009053726642452485, 'td_error': 4.552328532994113, 'init_value': -44.512855529785156, 'ave_value': -25.67076454455788} step=7866
2022-04-20 18:11.57 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181033/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:12.01 [info     ] TD3PlusBC_20220420181033: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.0003556877548931635, 'time_algorithm_update': 0.008797427367048654, 'critic_loss': 39.04616021552281, 'actor_loss': 2.2928818192398337, 'time_step': 0.009226326356854355, 'td_error': 4.914118076976522, 'init_value': -46.594757080078125, 'ave_value': -26.696610084789228} step=8208
2022-04-20 18:12.01 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181033/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:12.04 [info     ] TD3PlusBC_20220420181033: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.00035894614214088485, 'time_algorithm_update': 0.007938005770856177, 'critic_loss': 41.61238060778344, 'actor_loss': 2.29826213323582, 'time_step': 0.008372488774751363, 'td_error': 5.058435106838637, 'init_value': -47.86588668823242, 'ave_value': -27.42288777880127} step=8550
2022-04-20 18:12.04 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181033/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:12.07 [info     ] TD3PlusBC_20220420181033: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.0003503561019897461, 'time_algorithm_update': 0.006826808578089664, 'critic_loss': 44.05614018579673, 'actor_loss': 2.296033733769467, 'time_step': 0.007253014553360074, 'td_error': 5.51354810966988, 'init_value': -50.46076583862305, 'ave_value': -28.502059109301666} step=8892
2022-04-20 18:12.07 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181033/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:12.10 [info     ] TD3PlusBC_20220420181033: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.00035073882655093546, 'time_algorithm_update': 0.006736776285004197, 'critic_loss': 46.29254278662609, 'actor_loss': 2.2938762812586555, 'time_step': 0.007164080240573102, 'td_error': 5.434370514148361, 'init_value': -49.95235061645508, 'ave_value': -28.878595402940924} step=9234
2022-04-20 18:12.10 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181033/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:12.13 [info     ] TD3PlusBC_20220420181033: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.0003534471779538874, 'time_algorithm_update': 0.006770987956844575, 'critic_loss': 48.5922729425263, 'actor_loss': 2.293226866694222, 'time_step': 0.0071970754199557835, 'td_error': 5.7048159077862115, 'init_value': -51.01162338256836, 'ave_value': -29.770868137259676} step=9576
2022-04-20 18:12.13 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181033/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:12.16 [info     ] TD3PlusBC_20220420181033: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.00034952860826637313, 'time_algorithm_update': 0.0068537324492694346, 'critic_loss': 51.16922984206886, 'actor_loss': 2.2966276553639196, 'time_step': 0.007278283437093099, 'td_error': 5.923247156166798, 'init_value': -52.64741897583008, 'ave_value': -30.61771235789911} step=9918
2022-04-20 18:12.16 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181033/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:12.19 [info     ] TD3PlusBC_20220420181033: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.0003551370219180458, 'time_algorithm_update': 0.006831077804342348, 'critic_loss': 53.67343393961588, 'actor_loss': 2.298505781686794, 'time_step': 0.007262048665543049, 'td_error': 6.226132564105153, 'init_value': -54.112945556640625, 'ave_value': -31.34966804806294} step=10260
2022-04-20 18:12.19 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181033/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:12.21 [info     ] TD3PlusBC_20220420181033: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.00035447823373894945, 'time_algorithm_update': 0.006796509898894015, 'critic_loss': 55.62290404693425, 'actor_loss': 2.296628185182984, 'time_step': 0.0072292563510917085, 'td_error': 6.427285462744071, 'init_value': -54.873130798339844, 'ave_value': -31.835777830425734} step=10602
2022-04-20 18:12.21 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181033/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:12.24 [info     ] TD3PlusBC_20220420181033: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.00035509658835784736, 'time_algorithm_update': 0.006880667474534776, 'critic_loss': 57.787159635309585, 'actor_loss': 2.2978312732183444, 'time_step': 0.007314492387381214, 'td_error': 6.5523881248911255, 'init_value': -55.54560470581055, 'ave_value': -32.46101763957826} step=10944
2022-04-20 18:12.24 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181033/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:12.27 [info     ] TD3PlusBC_20220420181033: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.00034735077305843954, 'time_algorithm_update': 0.006753193007575141, 'critic_loss': 60.067952295492965, 'actor_loss': 2.299432840960765, 'time_step': 0.007175259422837642, 'td_error': 6.791424997427476, 'init_value': -56.13899612426758, 'ave_value': -33.22363813180602} step=11286
2022-04-20 18:12.27 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181033/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:12.30 [info     ] TD3PlusBC_20220420181033: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.00035822600649114243, 'time_algorithm_update': 0.006921914585849695, 'critic_loss': 62.43737384729218, 'actor_loss': 2.29924035769457, 'time_step': 0.007354606661880226, 'td_error': 6.979724366820767, 'init_value': -57.2148551940918, 'ave_value': -33.685205133292186} step=11628
2022-04-20 18:12.30 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181033/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:12.33 [info     ] TD3PlusBC_20220420181033: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.00035172247747231646, 'time_algorithm_update': 0.006832285931235866, 'critic_loss': 64.48050060606839, 'actor_loss': 2.297940934610646, 'time_step': 0.007258812586466472, 'td_error': 7.1786475925667235, 'init_value': -58.05598831176758, 'ave_value': -34.37810113229017} step=11970
2022-04-20 18:12.33 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181033/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:12.36 [info     ] TD3PlusBC_20220420181033: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.00035156910879570143, 'time_algorithm_update': 0.0068621858518723156, 'critic_loss': 66.57269826967116, 'actor_loss': 2.3039939361706114, 'time_step': 0.007291436892503883, 'td_error': 7.180339307519216, 'init_value': -59.72748565673828, 'ave_value': -35.21978526177522} step=12312
2022-04-20 18:12.36 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181033/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:12.39 [info     ] TD3PlusBC_20220420181033: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.0003509814279121265, 'time_algorithm_update': 0.0068415549763462, 'critic_loss': 68.81625554714984, 'actor_loss': 2.29586326587967, 'time_step': 0.007266551430462397, 'td_error': 7.552756964882628, 'init_value': -60.5985107421875, 'ave_value': -35.73646692539272} step=12654
2022-04-20 18:12.39 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181033/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:12.42 [info     ] TD3PlusBC_20220420181033: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.00035615901500858063, 'time_algorithm_update': 0.006891001037686889, 'critic_loss': 71.48294308311061, 'actor_loss': 2.301697704527113, 'time_step': 0.007323064302143298, 'td_error': 7.643287898157132, 'init_value': -61.04761505126953, 'ave_value': -36.09091539352344} step=12996
2022-04-20 18:12.42 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181033/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:12.45 [info     ] TD3PlusBC_20220420181033: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.0003528086065548902, 'time_algorithm_update': 0.0068520865245172155, 'critic_loss': 73.20534687153777, 'actor_loss': 2.3023340241950856, 'time_step': 0.007280501705861231, 'td_error': 7.8887827956931025, 'init_value': -62.738502502441406, 'ave_value': -36.921820403088205} step=13338
2022-04-20 18:12.45 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181033/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:12.47 [info     ] TD3PlusBC_20220420181033: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.00035712732906230014, 'time_algorithm_update': 0.006371166970994737, 'critic_loss': 75.13532636318988, 'actor_loss': 2.3005994551363047, 'time_step': 0.006807551049349601, 'td_error': 7.853940008962742, 'init_value': -62.3428840637207, 'ave_value': -37.26368306506291} step=13680
2022-04-20 18:12.47 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181033/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:12.50 [info     ] TD3PlusBC_20220420181033: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.0003534555435180664, 'time_algorithm_update': 0.006844731102212828, 'critic_loss': 76.92479422496773, 'actor_loss': 2.3003567756965144, 'time_step': 0.007275197241041396, 'td_error': 8.1423361288909, 'init_value': -63.773155212402344, 'ave_value': -37.883787995912556} step=14022
2022-04-20 18:12.50 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181033/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:12.53 [info     ] TD3PlusBC_20220420181033: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.0003502048247041758, 'time_algorithm_update': 0.006866195048505103, 'critic_loss': 78.7324562741999, 'actor_loss': 2.2991338306003146, 'time_step': 0.007293552683110823, 'td_error': 8.385931298046867, 'init_value': -63.952064514160156, 'ave_value': -38.36033295172611} step=14364
2022-04-20 18:12.53 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181033/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:12.56 [info     ] TD3PlusBC_20220420181033: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.00035095981687133075, 'time_algorithm_update': 0.006831157277202049, 'critic_loss': 80.58430187604581, 'actor_loss': 2.3023119363171314, 'time_step': 0.007260669044583862, 'td_error': 8.475764814946718, 'init_value': -64.02630615234375, 'ave_value': -38.725720366299456} step=14706
2022-04-20 18:12.56 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181033/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:12.59 [info     ] TD3PlusBC_20220420181033: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.00035001659951014826, 'time_algorithm_update': 0.006937651606331094, 'critic_loss': 81.85612840262073, 'actor_loss': 2.2997749618619507, 'time_step': 0.007364044412534836, 'td_error': 8.492315677138853, 'init_value': -63.8005485534668, 'ave_value': -39.10071842611977} step=15048
2022-04-20 18:12.59 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181033/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:13.02 [info     ] TD3PlusBC_20220420181033: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.0003629190879955626, 'time_algorithm_update': 0.007015853597406755, 'critic_loss': 83.85550852546915, 'actor_loss': 2.3022274413304022, 'time_step': 0.007456654693648132, 'td_error': 8.82259758930485, 'init_value': -66.36956024169922, 'ave_value': -39.89134393317323} step=15390
2022-04-20 18:13.02 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181033/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:13.05 [info     ] TD3PlusBC_20220420181033: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.00035238823695489535, 'time_algorithm_update': 0.006896624091075875, 'critic_loss': 85.6014071793584, 'actor_loss': 2.3049252270257963, 'time_step': 0.007324678856029845, 'td_error': 8.924639329959977, 'init_value': -65.86322021484375, 'ave_value': -40.316981958575916} step=15732
2022-04-20 18:13.05 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181033/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:13.08 [info     ] TD3PlusBC_20220420181033: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.0003565940243458887, 'time_algorithm_update': 0.006868866451999597, 'critic_loss': 87.34846329270748, 'actor_loss': 2.3063697912539656, 'time_step': 0.007302981371070907, 'td_error': 9.454703686715007, 'init_value': -67.67548370361328, 'ave_value': -40.950900233640496} step=16074
2022-04-20 18:13.08 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181033/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:13.11 [info     ] TD3PlusBC_20220420181033: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.00035126515996386433, 'time_algorithm_update': 0.006875059758013452, 'critic_loss': 88.8978689762584, 'actor_loss': 2.3063188449681156, 'time_step': 0.007300160781681886, 'td_error': 9.599675337073837, 'init_value': -67.9168472290039, 'ave_value': -41.139282409925734} step=16416
2022-04-20 18:13.11 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181033/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:13.14 [info     ] TD3PlusBC_20220420181033: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.00034750623312609935, 'time_algorithm_update': 0.006835535255789059, 'critic_loss': 90.53542305572688, 'actor_loss': 2.3000359130881685, 'time_step': 0.007259372382136116, 'td_error': 9.521599412446193, 'init_value': -67.44927215576172, 'ave_value': -41.47820262732521} step=16758
2022-04-20 18:13.14 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181033/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:13.17 [info     ] TD3PlusBC_20220420181033: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.0003523638373927066, 'time_algorithm_update': 0.006833876782690572, 'critic_loss': 92.27618854366548, 'actor_loss': 2.303029988941393, 'time_step': 0.0072589143674973156, 'td_error': 9.973708978954074, 'init_value': -70.30715942382812, 'ave_value': -42.259223293975616} step=17100
2022-04-20 18:13.17 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181033/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 102 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.28189108e-01  

Epoch 1/50:   0%|          | 0/177 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 18:13.17 [info     ] FQE_20220420181317: epoch=1 step=177 epoch=1 metrics={'time_sample_batch': 0.0001553088258215263, 'time_algorithm_update': 0.0034927392410019696, 'loss': 0.00775272001796481, 'time_step': 0.003719290771053336, 'init_value': -0.5535033941268921, 'ave_value': -0.47416924789503173, 'soft_opc': nan} step=177




2022-04-20 18:13.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181317/model_177.pt


Epoch 2/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:13.18 [info     ] FQE_20220420181317: epoch=2 step=354 epoch=2 metrics={'time_sample_batch': 0.00015594999668961865, 'time_algorithm_update': 0.003472644730476336, 'loss': 0.006301171035837319, 'time_step': 0.0036979179597843837, 'init_value': -0.651470422744751, 'ave_value': -0.5217783065798046, 'soft_opc': nan} step=354




2022-04-20 18:13.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181317/model_354.pt


Epoch 3/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:13.19 [info     ] FQE_20220420181317: epoch=3 step=531 epoch=3 metrics={'time_sample_batch': 0.00015655210462667173, 'time_algorithm_update': 0.00345289774533719, 'loss': 0.005650217415985919, 'time_step': 0.0036762797899838897, 'init_value': -0.7252230048179626, 'ave_value': -0.5692429588721679, 'soft_opc': nan} step=531




2022-04-20 18:13.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181317/model_531.pt


Epoch 4/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:13.20 [info     ] FQE_20220420181317: epoch=4 step=708 epoch=4 metrics={'time_sample_batch': 0.00015531690780725855, 'time_algorithm_update': 0.0033950697904252735, 'loss': 0.005169275160194875, 'time_step': 0.00362434494966841, 'init_value': -0.7286829948425293, 'ave_value': -0.5691552014501245, 'soft_opc': nan} step=708




2022-04-20 18:13.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181317/model_708.pt


Epoch 5/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:13.20 [info     ] FQE_20220420181317: epoch=5 step=885 epoch=5 metrics={'time_sample_batch': 0.00015700604282530014, 'time_algorithm_update': 0.0034024486433988236, 'loss': 0.0047251916172161785, 'time_step': 0.003627934698331154, 'init_value': -0.7123517990112305, 'ave_value': -0.549592365785404, 'soft_opc': nan} step=885




2022-04-20 18:13.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181317/model_885.pt


Epoch 6/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:13.21 [info     ] FQE_20220420181317: epoch=6 step=1062 epoch=6 metrics={'time_sample_batch': 0.00015574929404393428, 'time_algorithm_update': 0.0035571095633641476, 'loss': 0.00450644856923928, 'time_step': 0.0037813402165127337, 'init_value': -0.7418403625488281, 'ave_value': -0.588153630074438, 'soft_opc': nan} step=1062




2022-04-20 18:13.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181317/model_1062.pt


Epoch 7/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:13.22 [info     ] FQE_20220420181317: epoch=7 step=1239 epoch=7 metrics={'time_sample_batch': 0.00015708551568500067, 'time_algorithm_update': 0.0034044947327867064, 'loss': 0.004092866204952621, 'time_step': 0.0036338372419109453, 'init_value': -0.7369855046272278, 'ave_value': -0.5810603122990411, 'soft_opc': nan} step=1239




2022-04-20 18:13.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181317/model_1239.pt


Epoch 8/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:13.22 [info     ] FQE_20220420181317: epoch=8 step=1416 epoch=8 metrics={'time_sample_batch': 0.00015841096134509071, 'time_algorithm_update': 0.0035652966149109233, 'loss': 0.003923356194985704, 'time_step': 0.003789792626591052, 'init_value': -0.7757338881492615, 'ave_value': -0.5948863924578861, 'soft_opc': nan} step=1416




2022-04-20 18:13.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181317/model_1416.pt


Epoch 9/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:13.23 [info     ] FQE_20220420181317: epoch=9 step=1593 epoch=9 metrics={'time_sample_batch': 0.00016034524993034405, 'time_algorithm_update': 0.003508034399000265, 'loss': 0.004061851568262245, 'time_step': 0.003742724488684013, 'init_value': -0.8023774027824402, 'ave_value': -0.6185358732603154, 'soft_opc': nan} step=1593




2022-04-20 18:13.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181317/model_1593.pt


Epoch 10/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:13.24 [info     ] FQE_20220420181317: epoch=10 step=1770 epoch=10 metrics={'time_sample_batch': 0.0001643498738606771, 'time_algorithm_update': 0.0035399313026902367, 'loss': 0.004333620094483729, 'time_step': 0.003776903206345725, 'init_value': -0.8541523814201355, 'ave_value': -0.6588464654109499, 'soft_opc': nan} step=1770




2022-04-20 18:13.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181317/model_1770.pt


Epoch 11/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:13.25 [info     ] FQE_20220420181317: epoch=11 step=1947 epoch=11 metrics={'time_sample_batch': 0.00015909119514422227, 'time_algorithm_update': 0.003532246681256483, 'loss': 0.004628456917217812, 'time_step': 0.0037623825719801047, 'init_value': -0.9009764194488525, 'ave_value': -0.6901873136649619, 'soft_opc': nan} step=1947




2022-04-20 18:13.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181317/model_1947.pt


Epoch 12/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:13.25 [info     ] FQE_20220420181317: epoch=12 step=2124 epoch=12 metrics={'time_sample_batch': 0.00015149951654638946, 'time_algorithm_update': 0.0034121227803203345, 'loss': 0.004739955926608652, 'time_step': 0.0036326545779987915, 'init_value': -1.0140371322631836, 'ave_value': -0.756536778043564, 'soft_opc': nan} step=2124




2022-04-20 18:13.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181317/model_2124.pt


Epoch 13/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:13.26 [info     ] FQE_20220420181317: epoch=13 step=2301 epoch=13 metrics={'time_sample_batch': 0.0001587638747220659, 'time_algorithm_update': 0.003494363720134153, 'loss': 0.005275736801000049, 'time_step': 0.00372522160158319, 'init_value': -1.0403376817703247, 'ave_value': -0.7861461589554767, 'soft_opc': nan} step=2301




2022-04-20 18:13.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181317/model_2301.pt


Epoch 14/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:13.27 [info     ] FQE_20220420181317: epoch=14 step=2478 epoch=14 metrics={'time_sample_batch': 0.00015696024490615069, 'time_algorithm_update': 0.003380134280792064, 'loss': 0.006076688947955851, 'time_step': 0.0036064420042738405, 'init_value': -1.1123229265213013, 'ave_value': -0.8317199371285267, 'soft_opc': nan} step=2478




2022-04-20 18:13.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181317/model_2478.pt


Epoch 15/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:13.28 [info     ] FQE_20220420181317: epoch=15 step=2655 epoch=15 metrics={'time_sample_batch': 0.00015716498854470118, 'time_algorithm_update': 0.0035415086369056485, 'loss': 0.006360885916714488, 'time_step': 0.0037684615722483835, 'init_value': -1.2055963277816772, 'ave_value': -0.898759114981682, 'soft_opc': nan} step=2655




2022-04-20 18:13.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181317/model_2655.pt


Epoch 16/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:13.28 [info     ] FQE_20220420181317: epoch=16 step=2832 epoch=16 metrics={'time_sample_batch': 0.0001578465693414548, 'time_algorithm_update': 0.003503656656728626, 'loss': 0.006529203785772798, 'time_step': 0.0037346559729279773, 'init_value': -1.2597745656967163, 'ave_value': -0.9403441964248727, 'soft_opc': nan} step=2832




2022-04-20 18:13.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181317/model_2832.pt


Epoch 17/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:13.29 [info     ] FQE_20220420181317: epoch=17 step=3009 epoch=17 metrics={'time_sample_batch': 0.00015924744686837924, 'time_algorithm_update': 0.003500755223850746, 'loss': 0.007278842879801273, 'time_step': 0.003732387628932457, 'init_value': -1.379351258277893, 'ave_value': -1.0142269400504975, 'soft_opc': nan} step=3009




2022-04-20 18:13.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181317/model_3009.pt


Epoch 18/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:13.30 [info     ] FQE_20220420181317: epoch=18 step=3186 epoch=18 metrics={'time_sample_batch': 0.00015941178057826845, 'time_algorithm_update': 0.003513375244571664, 'loss': 0.00749830231478301, 'time_step': 0.0037465822898735436, 'init_value': -1.4043045043945312, 'ave_value': -1.0400992857152456, 'soft_opc': nan} step=3186




2022-04-20 18:13.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181317/model_3186.pt


Epoch 19/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:13.30 [info     ] FQE_20220420181317: epoch=19 step=3363 epoch=19 metrics={'time_sample_batch': 0.0001592905507922846, 'time_algorithm_update': 0.0035533500930010264, 'loss': 0.008013389257472114, 'time_step': 0.0037863914575953944, 'init_value': -1.4408421516418457, 'ave_value': -1.0446955580253143, 'soft_opc': nan} step=3363




2022-04-20 18:13.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181317/model_3363.pt


Epoch 20/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:13.31 [info     ] FQE_20220420181317: epoch=20 step=3540 epoch=20 metrics={'time_sample_batch': 0.0001613932140802933, 'time_algorithm_update': 0.0034469386278572728, 'loss': 0.008535716746141871, 'time_step': 0.0036779743129924194, 'init_value': -1.5375070571899414, 'ave_value': -1.1038994929051256, 'soft_opc': nan} step=3540




2022-04-20 18:13.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181317/model_3540.pt


Epoch 21/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:13.32 [info     ] FQE_20220420181317: epoch=21 step=3717 epoch=21 metrics={'time_sample_batch': 0.0001600408284677624, 'time_algorithm_update': 0.0035698481198758055, 'loss': 0.009152293908357157, 'time_step': 0.0038017310665152165, 'init_value': -1.589410662651062, 'ave_value': -1.1247742527508522, 'soft_opc': nan} step=3717




2022-04-20 18:13.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181317/model_3717.pt


Epoch 22/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:13.33 [info     ] FQE_20220420181317: epoch=22 step=3894 epoch=22 metrics={'time_sample_batch': 0.0001631268000198623, 'time_algorithm_update': 0.0034738853152862376, 'loss': 0.009673925360012273, 'time_step': 0.0037108949348751436, 'init_value': -1.6612480878829956, 'ave_value': -1.1748562578391564, 'soft_opc': nan} step=3894




2022-04-20 18:13.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181317/model_3894.pt


Epoch 23/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:13.33 [info     ] FQE_20220420181317: epoch=23 step=4071 epoch=23 metrics={'time_sample_batch': 0.00016755438120351674, 'time_algorithm_update': 0.0035242509033720374, 'loss': 0.01021848748027647, 'time_step': 0.0037625886626162773, 'init_value': -1.7310336828231812, 'ave_value': -1.2035585478194304, 'soft_opc': nan} step=4071




2022-04-20 18:13.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181317/model_4071.pt


Epoch 24/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:13.34 [info     ] FQE_20220420181317: epoch=24 step=4248 epoch=24 metrics={'time_sample_batch': 0.00016167204258805615, 'time_algorithm_update': 0.0035137012179961985, 'loss': 0.01070389398018498, 'time_step': 0.003745523549742618, 'init_value': -1.8178008794784546, 'ave_value': -1.258050187198339, 'soft_opc': nan} step=4248




2022-04-20 18:13.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181317/model_4248.pt


Epoch 25/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:13.35 [info     ] FQE_20220420181317: epoch=25 step=4425 epoch=25 metrics={'time_sample_batch': 0.00015726870736159848, 'time_algorithm_update': 0.0035111944554215772, 'loss': 0.011490399695375124, 'time_step': 0.0037349496184095827, 'init_value': -1.9457017183303833, 'ave_value': -1.330036481722697, 'soft_opc': nan} step=4425




2022-04-20 18:13.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181317/model_4425.pt


Epoch 26/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:13.35 [info     ] FQE_20220420181317: epoch=26 step=4602 epoch=26 metrics={'time_sample_batch': 0.00016166800159519003, 'time_algorithm_update': 0.0035069891288455595, 'loss': 0.012810146443330088, 'time_step': 0.0037424214142190533, 'init_value': -1.9408296346664429, 'ave_value': -1.2841407150142006, 'soft_opc': nan} step=4602




2022-04-20 18:13.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181317/model_4602.pt


Epoch 27/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:13.36 [info     ] FQE_20220420181317: epoch=27 step=4779 epoch=27 metrics={'time_sample_batch': 0.0001591639330158126, 'time_algorithm_update': 0.0035414197350625938, 'loss': 0.0130457489667092, 'time_step': 0.003771574483752924, 'init_value': -2.037248134613037, 'ave_value': -1.3426979289611092, 'soft_opc': nan} step=4779




2022-04-20 18:13.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181317/model_4779.pt


Epoch 28/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:13.37 [info     ] FQE_20220420181317: epoch=28 step=4956 epoch=28 metrics={'time_sample_batch': 0.00015911409410379702, 'time_algorithm_update': 0.0035209480652027884, 'loss': 0.013664699367400111, 'time_step': 0.0037551276427877827, 'init_value': -2.1306612491607666, 'ave_value': -1.3726710181940605, 'soft_opc': nan} step=4956




2022-04-20 18:13.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181317/model_4956.pt


Epoch 29/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:13.38 [info     ] FQE_20220420181317: epoch=29 step=5133 epoch=29 metrics={'time_sample_batch': 0.00015722425644007105, 'time_algorithm_update': 0.003500357859552243, 'loss': 0.01404569058160766, 'time_step': 0.003727242098016254, 'init_value': -2.1818950176239014, 'ave_value': -1.4019462704032033, 'soft_opc': nan} step=5133




2022-04-20 18:13.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181317/model_5133.pt


Epoch 30/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:13.38 [info     ] FQE_20220420181317: epoch=30 step=5310 epoch=30 metrics={'time_sample_batch': 0.00016064158940719346, 'time_algorithm_update': 0.003637234369913737, 'loss': 0.01511292032487059, 'time_step': 0.0038665768790379757, 'init_value': -2.2626123428344727, 'ave_value': -1.431478612162956, 'soft_opc': nan} step=5310




2022-04-20 18:13.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181317/model_5310.pt


Epoch 31/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:13.39 [info     ] FQE_20220420181317: epoch=31 step=5487 epoch=31 metrics={'time_sample_batch': 0.0001600058065295893, 'time_algorithm_update': 0.0034840268603825975, 'loss': 0.015601227979444094, 'time_step': 0.003715804741207489, 'init_value': -2.31748104095459, 'ave_value': -1.4633884947802926, 'soft_opc': nan} step=5487




2022-04-20 18:13.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181317/model_5487.pt


Epoch 32/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:13.40 [info     ] FQE_20220420181317: epoch=32 step=5664 epoch=32 metrics={'time_sample_batch': 0.0001589026154771363, 'time_algorithm_update': 0.0034634420427225405, 'loss': 0.01659008070345084, 'time_step': 0.0036950138329112595, 'init_value': -2.3416874408721924, 'ave_value': -1.445868684382768, 'soft_opc': nan} step=5664




2022-04-20 18:13.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181317/model_5664.pt


Epoch 33/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:13.41 [info     ] FQE_20220420181317: epoch=33 step=5841 epoch=33 metrics={'time_sample_batch': 0.00015807151794433594, 'time_algorithm_update': 0.003462709276016149, 'loss': 0.017282399846475026, 'time_step': 0.003690733074468408, 'init_value': -2.3921425342559814, 'ave_value': -1.500932632509742, 'soft_opc': nan} step=5841




2022-04-20 18:13.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181317/model_5841.pt


Epoch 34/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:13.41 [info     ] FQE_20220420181317: epoch=34 step=6018 epoch=34 metrics={'time_sample_batch': 0.00015438747944804908, 'time_algorithm_update': 0.0035062226871986173, 'loss': 0.018151475934148634, 'time_step': 0.0037305435891878806, 'init_value': -2.528095006942749, 'ave_value': -1.542500701383338, 'soft_opc': nan} step=6018




2022-04-20 18:13.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181317/model_6018.pt


Epoch 35/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:13.42 [info     ] FQE_20220420181317: epoch=35 step=6195 epoch=35 metrics={'time_sample_batch': 0.00016565107356357036, 'time_algorithm_update': 0.0035428084896109197, 'loss': 0.018798860037925594, 'time_step': 0.003780959016185696, 'init_value': -2.573138952255249, 'ave_value': -1.545696587348217, 'soft_opc': nan} step=6195




2022-04-20 18:13.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181317/model_6195.pt


Epoch 36/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:13.43 [info     ] FQE_20220420181317: epoch=36 step=6372 epoch=36 metrics={'time_sample_batch': 0.00016186331625038622, 'time_algorithm_update': 0.0035783867378019344, 'loss': 0.019626814833221335, 'time_step': 0.0038128572668732898, 'init_value': -2.705507278442383, 'ave_value': -1.6528489088477405, 'soft_opc': nan} step=6372




2022-04-20 18:13.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181317/model_6372.pt


Epoch 37/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:13.43 [info     ] FQE_20220420181317: epoch=37 step=6549 epoch=37 metrics={'time_sample_batch': 0.00015966771012645658, 'time_algorithm_update': 0.0035401198823573226, 'loss': 0.021294192666198405, 'time_step': 0.0037705723175221243, 'init_value': -2.7298195362091064, 'ave_value': -1.664296601502387, 'soft_opc': nan} step=6549




2022-04-20 18:13.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181317/model_6549.pt


Epoch 38/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:13.44 [info     ] FQE_20220420181317: epoch=38 step=6726 epoch=38 metrics={'time_sample_batch': 0.00016113997852734927, 'time_algorithm_update': 0.0034868663313698633, 'loss': 0.02179962131036919, 'time_step': 0.003715319822063554, 'init_value': -2.951443910598755, 'ave_value': -1.826623114245432, 'soft_opc': nan} step=6726




2022-04-20 18:13.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181317/model_6726.pt


Epoch 39/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:13.45 [info     ] FQE_20220420181317: epoch=39 step=6903 epoch=39 metrics={'time_sample_batch': 0.00016280217359295, 'time_algorithm_update': 0.003466055218109303, 'loss': 0.022422315370644007, 'time_step': 0.003701614121259269, 'init_value': -2.905529022216797, 'ave_value': -1.753832608955848, 'soft_opc': nan} step=6903




2022-04-20 18:13.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181317/model_6903.pt


Epoch 40/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:13.46 [info     ] FQE_20220420181317: epoch=40 step=7080 epoch=40 metrics={'time_sample_batch': 0.00016384070875954493, 'time_algorithm_update': 0.0035293977812858625, 'loss': 0.02196545427399398, 'time_step': 0.0037659655856547383, 'init_value': -2.960477113723755, 'ave_value': -1.7754527331170467, 'soft_opc': nan} step=7080




2022-04-20 18:13.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181317/model_7080.pt


Epoch 41/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:13.46 [info     ] FQE_20220420181317: epoch=41 step=7257 epoch=41 metrics={'time_sample_batch': 0.00016113593753448315, 'time_algorithm_update': 0.003386440923658468, 'loss': 0.02372993086464703, 'time_step': 0.0036176840464274087, 'init_value': -2.9394266605377197, 'ave_value': -1.8157058064051128, 'soft_opc': nan} step=7257




2022-04-20 18:13.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181317/model_7257.pt


Epoch 42/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:13.47 [info     ] FQE_20220420181317: epoch=42 step=7434 epoch=42 metrics={'time_sample_batch': 0.00016873435112042617, 'time_algorithm_update': 0.0036285125603110102, 'loss': 0.024587095206692857, 'time_step': 0.003872560242475089, 'init_value': -3.066737174987793, 'ave_value': -1.8848903835703572, 'soft_opc': nan} step=7434




2022-04-20 18:13.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181317/model_7434.pt


Epoch 43/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:13.48 [info     ] FQE_20220420181317: epoch=43 step=7611 epoch=43 metrics={'time_sample_batch': 0.00015987784175549522, 'time_algorithm_update': 0.0035707559962730624, 'loss': 0.024859064054743803, 'time_step': 0.0038010548737089514, 'init_value': -3.1470251083374023, 'ave_value': -1.9493711686561683, 'soft_opc': nan} step=7611




2022-04-20 18:13.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181317/model_7611.pt


Epoch 44/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:13.49 [info     ] FQE_20220420181317: epoch=44 step=7788 epoch=44 metrics={'time_sample_batch': 0.00016230513147041622, 'time_algorithm_update': 0.003659422114744025, 'loss': 0.026469238679250967, 'time_step': 0.003894937913970085, 'init_value': -3.1405603885650635, 'ave_value': -1.896081373770092, 'soft_opc': nan} step=7788




2022-04-20 18:13.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181317/model_7788.pt


Epoch 45/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:13.49 [info     ] FQE_20220420181317: epoch=45 step=7965 epoch=45 metrics={'time_sample_batch': 0.00016053113560218595, 'time_algorithm_update': 0.0035690870662193515, 'loss': 0.02706399731703713, 'time_step': 0.003802516366128868, 'init_value': -3.180036783218384, 'ave_value': -1.8985653738449286, 'soft_opc': nan} step=7965




2022-04-20 18:13.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181317/model_7965.pt


Epoch 46/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:13.50 [info     ] FQE_20220420181317: epoch=46 step=8142 epoch=46 metrics={'time_sample_batch': 0.00016346489642299502, 'time_algorithm_update': 0.0035699733906546555, 'loss': 0.026919082260958977, 'time_step': 0.003806464416159075, 'init_value': -3.256697416305542, 'ave_value': -1.9240149506890738, 'soft_opc': nan} step=8142




2022-04-20 18:13.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181317/model_8142.pt


Epoch 47/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:13.51 [info     ] FQE_20220420181317: epoch=47 step=8319 epoch=47 metrics={'time_sample_batch': 0.00015997347858666026, 'time_algorithm_update': 0.00349564740886796, 'loss': 0.0275130061074923, 'time_step': 0.003729804087493379, 'init_value': -3.270798683166504, 'ave_value': -1.9201079862007686, 'soft_opc': nan} step=8319




2022-04-20 18:13.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181317/model_8319.pt


Epoch 48/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:13.51 [info     ] FQE_20220420181317: epoch=48 step=8496 epoch=48 metrics={'time_sample_batch': 0.00016451824856343242, 'time_algorithm_update': 0.0034627200519971256, 'loss': 0.02788329198640787, 'time_step': 0.003695972895218154, 'init_value': -3.2819273471832275, 'ave_value': -1.9424691790485524, 'soft_opc': nan} step=8496




2022-04-20 18:13.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181317/model_8496.pt


Epoch 49/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:13.52 [info     ] FQE_20220420181317: epoch=49 step=8673 epoch=49 metrics={'time_sample_batch': 0.00015979028691006245, 'time_algorithm_update': 0.0035809743202338783, 'loss': 0.02815828368128251, 'time_step': 0.0038110428610763983, 'init_value': -3.2752373218536377, 'ave_value': -1.8993970437696925, 'soft_opc': nan} step=8673




2022-04-20 18:13.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181317/model_8673.pt


Epoch 50/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:13.53 [info     ] FQE_20220420181317: epoch=50 step=8850 epoch=50 metrics={'time_sample_batch': 0.00016513382647670595, 'time_algorithm_update': 0.0035242522503696593, 'loss': 0.02770019325988083, 'time_step': 0.003762770507295253, 'init_value': -3.2492847442626953, 'ave_value': -1.8436981802722354, 'soft_opc': nan} step=8850




2022-04-20 18:13.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181317/model_8850.pt
start
[ 0.00000000e+00  7.95731469e+08  1.43210892e-01 -3.25999953e-02
 -1.38000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.28049971e-01  6.00000000e-01  4.67532035e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.87189108e-01  2.46000047e-02
 -8.00013420e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.61927906e-02  2.08952959e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.53789108e-01  1.32000047e-02
  8.79998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -9.71586383e-02 -6.00000000e-01 -5.33093061e-02]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.39489108e-01 -4.75999953e-02
 -9.30001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  1.41298638e-01  5.43892365e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.9573146

2022-04-20 18:13.53 [info     ] Directory is created at d3rlpy_logs/FQE_20220420181353
2022-04-20 18:13.53 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 18:13.53 [debug    ] Building models...
2022-04-20 18:13.53 [debug    ] Models have been built.
2022-04-20 18:13.53 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420181353/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 18:13.55 [info     ] FQE_20220420181353: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00015944589016049406, 'time_algorithm_update': 0.0035106927849525627, 'loss': 0.027740520335798866, 'time_step': 0.0037426685178002647, 'init_value': -1.1234936714172363, 'ave_value': -1.1063135840871312, 'soft_opc': nan} step=344




2022-04-20 18:13.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181353/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:13.56 [info     ] FQE_20220420181353: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00016194235446841219, 'time_algorithm_update': 0.0034738737483357273, 'loss': 0.023463603014364665, 'time_step': 0.0037090341712153235, 'init_value': -2.004904270172119, 'ave_value': -1.9439982322086622, 'soft_opc': nan} step=688




2022-04-20 18:13.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181353/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:13.58 [info     ] FQE_20220420181353: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.0001613012579984443, 'time_algorithm_update': 0.0035593218581621037, 'loss': 0.027355937850336697, 'time_step': 0.003791595614233682, 'init_value': -3.165099859237671, 'ave_value': -3.0570128593224664, 'soft_opc': nan} step=1032




2022-04-20 18:13.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181353/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:13.59 [info     ] FQE_20220420181353: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00016138026880663494, 'time_algorithm_update': 0.0034717259018920187, 'loss': 0.03159766540007103, 'time_step': 0.0037058647288832555, 'init_value': -3.977611541748047, 'ave_value': -3.8368408273871952, 'soft_opc': nan} step=1376




2022-04-20 18:13.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181353/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:14.01 [info     ] FQE_20220420181353: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.0001645677311475887, 'time_algorithm_update': 0.0041819596013357475, 'loss': 0.03987255240898839, 'time_step': 0.004422496224558631, 'init_value': -4.969561576843262, 'ave_value': -4.799383029006086, 'soft_opc': nan} step=1720




2022-04-20 18:14.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181353/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:14.03 [info     ] FQE_20220420181353: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.0001716953377391017, 'time_algorithm_update': 0.005180620869924856, 'loss': 0.048711511149439356, 'time_step': 0.005430500867754914, 'init_value': -5.918826580047607, 'ave_value': -5.726937215419503, 'soft_opc': nan} step=2064




2022-04-20 18:14.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181353/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:14.05 [info     ] FQE_20220420181353: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.000170437402503435, 'time_algorithm_update': 0.0050734787486320316, 'loss': 0.06221684331864898, 'time_step': 0.0053209752537483395, 'init_value': -7.021852493286133, 'ave_value': -6.792228944156621, 'soft_opc': nan} step=2408




2022-04-20 18:14.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181353/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:14.07 [info     ] FQE_20220420181353: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.0001739526903906534, 'time_algorithm_update': 0.005132657150889552, 'loss': 0.07405210056526283, 'time_step': 0.005383407653764237, 'init_value': -7.912992000579834, 'ave_value': -7.661415323078096, 'soft_opc': nan} step=2752




2022-04-20 18:14.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181353/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:14.08 [info     ] FQE_20220420181353: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.0001708629519440407, 'time_algorithm_update': 0.004736885774967282, 'loss': 0.08646492204942935, 'time_step': 0.0049830460271169975, 'init_value': -8.582466125488281, 'ave_value': -8.306921706731256, 'soft_opc': nan} step=3096




2022-04-20 18:14.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181353/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:14.10 [info     ] FQE_20220420181353: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.0001703479955362719, 'time_algorithm_update': 0.005065667767857396, 'loss': 0.10380883225737962, 'time_step': 0.005309724530508352, 'init_value': -9.659210205078125, 'ave_value': -9.361732267501118, 'soft_opc': nan} step=3440




2022-04-20 18:14.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181353/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:14.12 [info     ] FQE_20220420181353: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00016904639643292095, 'time_algorithm_update': 0.005028487637985584, 'loss': 0.12274966622583655, 'time_step': 0.005272311526675557, 'init_value': -10.38602066040039, 'ave_value': -10.101832658562575, 'soft_opc': nan} step=3784




2022-04-20 18:14.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181353/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:14.14 [info     ] FQE_20220420181353: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00017499507859695788, 'time_algorithm_update': 0.0049952233946600625, 'loss': 0.1416315998270198, 'time_step': 0.005247033612672673, 'init_value': -11.052328109741211, 'ave_value': -10.683880704976954, 'soft_opc': nan} step=4128




2022-04-20 18:14.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181353/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:14.16 [info     ] FQE_20220420181353: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00017189286475957825, 'time_algorithm_update': 0.00503876805305481, 'loss': 0.16034996869898033, 'time_step': 0.005285306725391122, 'init_value': -12.000633239746094, 'ave_value': -11.596532090625784, 'soft_opc': nan} step=4472




2022-04-20 18:14.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181353/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:14.18 [info     ] FQE_20220420181353: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.0001715075138003327, 'time_algorithm_update': 0.004631020301996276, 'loss': 0.18153917097464897, 'time_step': 0.00487724223802256, 'init_value': -12.697956085205078, 'ave_value': -12.29262143005659, 'soft_opc': nan} step=4816




2022-04-20 18:14.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181353/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:14.20 [info     ] FQE_20220420181353: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00017227197802344033, 'time_algorithm_update': 0.005006221837775652, 'loss': 0.19914147357976195, 'time_step': 0.005253267149592555, 'init_value': -13.329963684082031, 'ave_value': -12.843915160510454, 'soft_opc': nan} step=5160




2022-04-20 18:14.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181353/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:14.22 [info     ] FQE_20220420181353: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.0001736609048621599, 'time_algorithm_update': 0.00511084046474723, 'loss': 0.22420519516196882, 'time_step': 0.005360396795494612, 'init_value': -14.326169967651367, 'ave_value': -13.816049154733753, 'soft_opc': nan} step=5504




2022-04-20 18:14.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181353/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:14.24 [info     ] FQE_20220420181353: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.000173069016877995, 'time_algorithm_update': 0.005045584467954414, 'loss': 0.25451355808154613, 'time_step': 0.005293660385664119, 'init_value': -14.893678665161133, 'ave_value': -14.267074558622129, 'soft_opc': nan} step=5848




2022-04-20 18:14.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181353/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:14.26 [info     ] FQE_20220420181353: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00017076037650884583, 'time_algorithm_update': 0.004853216714637224, 'loss': 0.2766429989674497, 'time_step': 0.005099507958389992, 'init_value': -15.520661354064941, 'ave_value': -14.901236724598451, 'soft_opc': nan} step=6192




2022-04-20 18:14.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181353/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:14.28 [info     ] FQE_20220420181353: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00017670628636382347, 'time_algorithm_update': 0.005001322474590567, 'loss': 0.3065916841015826, 'time_step': 0.005254367756289105, 'init_value': -16.504440307617188, 'ave_value': -15.869457342697157, 'soft_opc': nan} step=6536




2022-04-20 18:14.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181353/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:14.30 [info     ] FQE_20220420181353: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.0001760139021762582, 'time_algorithm_update': 0.005088597536087036, 'loss': 0.32781104864769206, 'time_step': 0.0053420538126036175, 'init_value': -17.096908569335938, 'ave_value': -16.49303028323092, 'soft_opc': nan} step=6880




2022-04-20 18:14.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181353/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:14.32 [info     ] FQE_20220420181353: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00017031264859576557, 'time_algorithm_update': 0.00507026148396869, 'loss': 0.3575142839663597, 'time_step': 0.005318409481713938, 'init_value': -17.88876724243164, 'ave_value': -17.18408330727268, 'soft_opc': nan} step=7224




2022-04-20 18:14.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181353/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:14.34 [info     ] FQE_20220420181353: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00017353753710902013, 'time_algorithm_update': 0.005014144403989925, 'loss': 0.38906360827494674, 'time_step': 0.005263654298560564, 'init_value': -18.43564796447754, 'ave_value': -17.703327318193676, 'soft_opc': nan} step=7568




2022-04-20 18:14.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181353/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:14.36 [info     ] FQE_20220420181353: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00016987947530524674, 'time_algorithm_update': 0.00460491000219833, 'loss': 0.42293576180913245, 'time_step': 0.004852570073549138, 'init_value': -19.155210494995117, 'ave_value': -18.27076665670485, 'soft_opc': nan} step=7912




2022-04-20 18:14.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181353/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:14.38 [info     ] FQE_20220420181353: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00017034175784088845, 'time_algorithm_update': 0.005125634892042293, 'loss': 0.4576221560777793, 'time_step': 0.005375634792239167, 'init_value': -19.78710174560547, 'ave_value': -18.83763194911115, 'soft_opc': nan} step=8256




2022-04-20 18:14.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181353/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:14.40 [info     ] FQE_20220420181353: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.0001725609912428745, 'time_algorithm_update': 0.005114884570587513, 'loss': 0.47145097390856855, 'time_step': 0.005363494157791138, 'init_value': -19.92913055419922, 'ave_value': -18.99807960152089, 'soft_opc': nan} step=8600




2022-04-20 18:14.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181353/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:14.42 [info     ] FQE_20220420181353: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.0001693146173344102, 'time_algorithm_update': 0.005090908948765244, 'loss': 0.4962675419614412, 'time_step': 0.005336928506230199, 'init_value': -20.77180290222168, 'ave_value': -19.75250255752791, 'soft_opc': nan} step=8944




2022-04-20 18:14.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181353/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:14.43 [info     ] FQE_20220420181353: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00017114642054535622, 'time_algorithm_update': 0.004894548377325368, 'loss': 0.5261097888050731, 'time_step': 0.005145846411239269, 'init_value': -21.008895874023438, 'ave_value': -20.058837372245822, 'soft_opc': nan} step=9288




2022-04-20 18:14.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181353/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:14.45 [info     ] FQE_20220420181353: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00017495695934739223, 'time_algorithm_update': 0.005001038312911987, 'loss': 0.5594934020462164, 'time_step': 0.005251916342003401, 'init_value': -21.987232208251953, 'ave_value': -20.840208579143425, 'soft_opc': nan} step=9632




2022-04-20 18:14.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181353/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:14.47 [info     ] FQE_20220420181353: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00016976442447928496, 'time_algorithm_update': 0.005113278710564902, 'loss': 0.5918189697888095, 'time_step': 0.005358953808629235, 'init_value': -22.006877899169922, 'ave_value': -20.829422184465837, 'soft_opc': nan} step=9976




2022-04-20 18:14.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181353/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:14.49 [info     ] FQE_20220420181353: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.0001735825871312341, 'time_algorithm_update': 0.005112950191941372, 'loss': 0.6177898256736266, 'time_step': 0.005367746879888135, 'init_value': -21.809322357177734, 'ave_value': -20.71674036262515, 'soft_opc': nan} step=10320




2022-04-20 18:14.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181353/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:14.51 [info     ] FQE_20220420181353: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.0001709343389023182, 'time_algorithm_update': 0.005059412745542304, 'loss': 0.6294082251355745, 'time_step': 0.005306355481924012, 'init_value': -22.481101989746094, 'ave_value': -21.3841526416617, 'soft_opc': nan} step=10664




2022-04-20 18:14.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181353/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:14.53 [info     ] FQE_20220420181353: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00017059958258340525, 'time_algorithm_update': 0.004649555267289628, 'loss': 0.6650218396815796, 'time_step': 0.0048963344374368355, 'init_value': -22.680213928222656, 'ave_value': -21.438366595237422, 'soft_opc': nan} step=11008




2022-04-20 18:14.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181353/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:14.55 [info     ] FQE_20220420181353: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00017225673032361408, 'time_algorithm_update': 0.005159513201824454, 'loss': 0.6869940184230028, 'time_step': 0.005409217851106511, 'init_value': -23.240703582763672, 'ave_value': -21.912965004823196, 'soft_opc': nan} step=11352




2022-04-20 18:14.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181353/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:14.57 [info     ] FQE_20220420181353: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00017235029575436614, 'time_algorithm_update': 0.005079550105471944, 'loss': 0.7168602114435025, 'time_step': 0.005329712878826053, 'init_value': -23.5396785736084, 'ave_value': -22.262321455684397, 'soft_opc': nan} step=11696




2022-04-20 18:14.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181353/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:14.59 [info     ] FQE_20220420181353: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00017009363618007925, 'time_algorithm_update': 0.005049288272857666, 'loss': 0.7420667817729504, 'time_step': 0.005295742389767669, 'init_value': -23.92824935913086, 'ave_value': -22.52466360610482, 'soft_opc': nan} step=12040




2022-04-20 18:14.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181353/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:15.01 [info     ] FQE_20220420181353: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.0001731688200041305, 'time_algorithm_update': 0.004956407602443252, 'loss': 0.7738968573380695, 'time_step': 0.005207169194554173, 'init_value': -24.414703369140625, 'ave_value': -22.90744277819538, 'soft_opc': nan} step=12384




2022-04-20 18:15.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181353/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:15.03 [info     ] FQE_20220420181353: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00016685349996699842, 'time_algorithm_update': 0.004803626343261364, 'loss': 0.8084681811959071, 'time_step': 0.0050455373387004055, 'init_value': -25.59858512878418, 'ave_value': -24.110307749312135, 'soft_opc': nan} step=12728




2022-04-20 18:15.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181353/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:15.05 [info     ] FQE_20220420181353: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00017068205877792005, 'time_algorithm_update': 0.0051047524740529615, 'loss': 0.8514788731889322, 'time_step': 0.005352541457775028, 'init_value': -26.0943660736084, 'ave_value': -24.660878750462892, 'soft_opc': nan} step=13072




2022-04-20 18:15.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181353/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:15.07 [info     ] FQE_20220420181353: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00016860282698342966, 'time_algorithm_update': 0.005001060491384462, 'loss': 0.8829206934112103, 'time_step': 0.005245161610980367, 'init_value': -26.40675163269043, 'ave_value': -24.91493386677005, 'soft_opc': nan} step=13416




2022-04-20 18:15.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181353/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:15.09 [info     ] FQE_20220420181353: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00017096275507017623, 'time_algorithm_update': 0.005013544892155847, 'loss': 0.9069609035802788, 'time_step': 0.005261800316877143, 'init_value': -27.008533477783203, 'ave_value': -25.465220673455278, 'soft_opc': nan} step=13760




2022-04-20 18:15.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181353/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:15.11 [info     ] FQE_20220420181353: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00017068275185518487, 'time_algorithm_update': 0.004565422618111899, 'loss': 0.9427279347748777, 'time_step': 0.004811444947885913, 'init_value': -27.46645164489746, 'ave_value': -25.942885993864085, 'soft_opc': nan} step=14104




2022-04-20 18:15.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181353/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:15.13 [info     ] FQE_20220420181353: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00016914342724999717, 'time_algorithm_update': 0.005085509183794953, 'loss': 0.9640043325029141, 'time_step': 0.0053289595038391825, 'init_value': -28.046707153320312, 'ave_value': -26.61681536339215, 'soft_opc': nan} step=14448




2022-04-20 18:15.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181353/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:15.15 [info     ] FQE_20220420181353: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00017090800196625466, 'time_algorithm_update': 0.005102452843688255, 'loss': 0.9914639751253607, 'time_step': 0.005348796068235885, 'init_value': -28.279165267944336, 'ave_value': -26.705892592337957, 'soft_opc': nan} step=14792




2022-04-20 18:15.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181353/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:15.17 [info     ] FQE_20220420181353: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.0001703958178675452, 'time_algorithm_update': 0.005000534445740456, 'loss': 1.0211784578751513, 'time_step': 0.00524624696997709, 'init_value': -28.813678741455078, 'ave_value': -27.38967241627156, 'soft_opc': nan} step=15136




2022-04-20 18:15.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181353/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:15.19 [info     ] FQE_20220420181353: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00017001254614009412, 'time_algorithm_update': 0.00491677813751753, 'loss': 1.046698717476237, 'time_step': 0.005162408878636914, 'init_value': -29.100479125976562, 'ave_value': -27.58121666551299, 'soft_opc': nan} step=15480




2022-04-20 18:15.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181353/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:15.20 [info     ] FQE_20220420181353: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00017006175462589709, 'time_algorithm_update': 0.0048187922599703765, 'loss': 1.063001512560647, 'time_step': 0.005068422749985096, 'init_value': -29.602407455444336, 'ave_value': -28.238482431692347, 'soft_opc': nan} step=15824




2022-04-20 18:15.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181353/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:15.22 [info     ] FQE_20220420181353: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00017250069352083429, 'time_algorithm_update': 0.005035209794377171, 'loss': 1.0877170137801142, 'time_step': 0.005284528399622718, 'init_value': -29.567874908447266, 'ave_value': -28.329894589472495, 'soft_opc': nan} step=16168




2022-04-20 18:15.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181353/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:15.24 [info     ] FQE_20220420181353: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.0001732672369757364, 'time_algorithm_update': 0.005011783089748648, 'loss': 1.0874891844902967, 'time_step': 0.005259300387182901, 'init_value': -29.69156837463379, 'ave_value': -28.506758997609065, 'soft_opc': nan} step=16512




2022-04-20 18:15.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181353/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:15.26 [info     ] FQE_20220420181353: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00017114711362262104, 'time_algorithm_update': 0.005039503408032794, 'loss': 1.1038601943724897, 'time_step': 0.0052855278170386025, 'init_value': -29.35226058959961, 'ave_value': -28.068935165259784, 'soft_opc': nan} step=16856




2022-04-20 18:15.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181353/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:15.28 [info     ] FQE_20220420181353: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00016836371532706328, 'time_algorithm_update': 0.004555802012598792, 'loss': 1.1314174953068412, 'time_step': 0.004797803801159526, 'init_value': -30.215930938720703, 'ave_value': -28.993198881304178, 'soft_opc': nan} step=17200




2022-04-20 18:15.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181353/model_17200.pt
search iteration:  33
using hyper params:  [0.002659254513205722, 0.003941647591636307, 6.472771232547425e-05, 3]
2022-04-20 18:15.28 [debug    ] RoundIterator is selected.
2022-04-20 18:15.28 [info     ] Directory is created at d3rlpy_logs/TD3PlusBC_20220420181528
2022-04-20 18:15.28 [debug    ] Fitting scaler...              scaler=standard
2022-04-20 18:15.28 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 18:15.28 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 18:15.28 [info     ] Parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181528/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.00265925451320

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:15.32 [info     ] TD3PlusBC_20220420181528: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.0003537211501807497, 'time_algorithm_update': 0.009035217831706443, 'critic_loss': 3.312162647906103, 'actor_loss': 2.436693655817132, 'time_step': 0.009473419328879195, 'td_error': 0.8296107276959227, 'init_value': -4.444615364074707, 'ave_value': -2.5187658987568087} step=342
2022-04-20 18:15.32 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181528/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:15.36 [info     ] TD3PlusBC_20220420181528: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.00034814829017683775, 'time_algorithm_update': 0.00884356345349585, 'critic_loss': 1.172550273917572, 'actor_loss': 2.3199977052142047, 'time_step': 0.009266569600467794, 'td_error': 0.8646880696113481, 'init_value': -6.203372955322266, 'ave_value': -3.523501958297901} step=684
2022-04-20 18:15.36 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181528/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:15.39 [info     ] TD3PlusBC_20220420181528: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.00034911242144846776, 'time_algorithm_update': 0.008506772810952705, 'critic_loss': 1.7191276574692531, 'actor_loss': 2.301501699358399, 'time_step': 0.008928547825729638, 'td_error': 0.9368332821391704, 'init_value': -8.444809913635254, 'ave_value': -4.837868480775715} step=1026
2022-04-20 18:15.39 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181528/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:15.43 [info     ] TD3PlusBC_20220420181528: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.0003243405916537458, 'time_algorithm_update': 0.00855665457876105, 'critic_loss': 2.4021352108633307, 'actor_loss': 2.3027664778525367, 'time_step': 0.00895249495032238, 'td_error': 1.0350341280251647, 'init_value': -10.602294921875, 'ave_value': -6.059891870005837} step=1368
2022-04-20 18:15.43 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181528/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:15.46 [info     ] TD3PlusBC_20220420181528: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.0003087429972419962, 'time_algorithm_update': 0.007912672054000765, 'critic_loss': 3.075226194328732, 'actor_loss': 2.2872224397826613, 'time_step': 0.008287106341088724, 'td_error': 1.1582925241116122, 'init_value': -12.7571439743042, 'ave_value': -7.284916076665317} step=1710
2022-04-20 18:15.46 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181528/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:15.50 [info     ] TD3PlusBC_20220420181528: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.0003445964110525031, 'time_algorithm_update': 0.008866206944337365, 'critic_loss': 3.9532843165927463, 'actor_loss': 2.29430792066786, 'time_step': 0.009289344848945127, 'td_error': 1.3120566892117496, 'init_value': -14.784215927124023, 'ave_value': -8.46492228440997} step=2052
2022-04-20 18:15.50 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181528/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:15.54 [info     ] TD3PlusBC_20220420181528: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.0003277384049711172, 'time_algorithm_update': 0.008484475096763923, 'critic_loss': 4.884696341397469, 'actor_loss': 2.2935439006626956, 'time_step': 0.008886530385379903, 'td_error': 1.4923883489727103, 'init_value': -17.191307067871094, 'ave_value': -9.821328178801457} step=2394
2022-04-20 18:15.54 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181528/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:15.57 [info     ] TD3PlusBC_20220420181528: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.00031354761960213643, 'time_algorithm_update': 0.008058227293672617, 'critic_loss': 6.055122457401097, 'actor_loss': 2.2963532495219807, 'time_step': 0.008439591753552531, 'td_error': 1.6330610241394534, 'init_value': -18.872955322265625, 'ave_value': -10.743183970654442} step=2736
2022-04-20 18:15.57 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181528/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:16.01 [info     ] TD3PlusBC_20220420181528: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.0003558780714782358, 'time_algorithm_update': 0.008990158811647293, 'critic_loss': 7.303598868219476, 'actor_loss': 2.2940420853464225, 'time_step': 0.009426510822006136, 'td_error': 1.8013705574940044, 'init_value': -20.768774032592773, 'ave_value': -11.875354297611914} step=3078
2022-04-20 18:16.01 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181528/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:16.04 [info     ] TD3PlusBC_20220420181528: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.00034955300782856186, 'time_algorithm_update': 0.008417659335666232, 'critic_loss': 8.754797068952817, 'actor_loss': 2.2922249816314517, 'time_step': 0.008847070716277898, 'td_error': 2.032304252056942, 'init_value': -23.19902992248535, 'ave_value': -13.392372900124153} step=3420
2022-04-20 18:16.04 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181528/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:16.08 [info     ] TD3PlusBC_20220420181528: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.0003522460223638523, 'time_algorithm_update': 0.009011847233911704, 'critic_loss': 10.468373850772256, 'actor_loss': 2.2974396253886975, 'time_step': 0.009436272738272683, 'td_error': 2.191348492302174, 'init_value': -25.422067642211914, 'ave_value': -14.453637438696214} step=3762
2022-04-20 18:16.08 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181528/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:16.12 [info     ] TD3PlusBC_20220420181528: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.00035298916331508704, 'time_algorithm_update': 0.008867247062816955, 'critic_loss': 11.97654748520656, 'actor_loss': 2.2953975075169613, 'time_step': 0.009294564960992823, 'td_error': 2.3787603504695913, 'init_value': -27.583911895751953, 'ave_value': -15.61320370157803} step=4104
2022-04-20 18:16.12 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181528/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:16.15 [info     ] TD3PlusBC_20220420181528: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.000347148605257447, 'time_algorithm_update': 0.008477841901500322, 'critic_loss': 13.867445367121556, 'actor_loss': 2.300082750487746, 'time_step': 0.008894661713761893, 'td_error': 2.6315705641760907, 'init_value': -29.006328582763672, 'ave_value': -16.645011689329923} step=4446
2022-04-20 18:16.15 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181528/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:16.19 [info     ] TD3PlusBC_20220420181528: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.00035160675383450694, 'time_algorithm_update': 0.0088118314743042, 'critic_loss': 15.699602711270428, 'actor_loss': 2.294491330085442, 'time_step': 0.009235341646517927, 'td_error': 2.798881669724056, 'init_value': -30.77882957458496, 'ave_value': -17.519853707278013} step=4788
2022-04-20 18:16.19 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181528/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:16.23 [info     ] TD3PlusBC_20220420181528: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.00034975517562955445, 'time_algorithm_update': 0.008577317522283186, 'critic_loss': 18.016569764990557, 'actor_loss': 2.2939173798812065, 'time_step': 0.009000161237883986, 'td_error': 2.9626481705320233, 'init_value': -32.618465423583984, 'ave_value': -18.6253412785754} step=5130
2022-04-20 18:16.23 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181528/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:16.26 [info     ] TD3PlusBC_20220420181528: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.00035231155261658786, 'time_algorithm_update': 0.008887731540969938, 'critic_loss': 20.15646471057022, 'actor_loss': 2.2959572850612173, 'time_step': 0.009309349004288165, 'td_error': 3.145849931420801, 'init_value': -34.47441482543945, 'ave_value': -19.533628411695513} step=5472
2022-04-20 18:16.26 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181528/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:16.30 [info     ] TD3PlusBC_20220420181528: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.0003464640232554653, 'time_algorithm_update': 0.00879655665124369, 'critic_loss': 22.727966244457757, 'actor_loss': 2.294792210149486, 'time_step': 0.009216180321765922, 'td_error': 3.4143112944199774, 'init_value': -36.03015899658203, 'ave_value': -20.494126643091104} step=5814
2022-04-20 18:16.30 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181528/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:16.33 [info     ] TD3PlusBC_20220420181528: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.0003488572717410082, 'time_algorithm_update': 0.008447505577265868, 'critic_loss': 25.157540299041926, 'actor_loss': 2.2940431882066337, 'time_step': 0.008872480420341269, 'td_error': 3.6246529836938133, 'init_value': -37.9490852355957, 'ave_value': -21.571758073903034} step=6156
2022-04-20 18:16.33 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181528/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:16.37 [info     ] TD3PlusBC_20220420181528: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.0003514868474146079, 'time_algorithm_update': 0.008898235901057372, 'critic_loss': 27.28268782018918, 'actor_loss': 2.2939939373417904, 'time_step': 0.009322645371420342, 'td_error': 3.706810164098574, 'init_value': -39.233642578125, 'ave_value': -22.495556376095283} step=6498
2022-04-20 18:16.37 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181528/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:16.41 [info     ] TD3PlusBC_20220420181528: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.00035188909162554827, 'time_algorithm_update': 0.008724777322066458, 'critic_loss': 30.011952974642927, 'actor_loss': 2.295205335170902, 'time_step': 0.009150499488875183, 'td_error': 4.030899006318691, 'init_value': -41.59211349487305, 'ave_value': -23.438515180359587} step=6840
2022-04-20 18:16.41 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181528/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:16.44 [info     ] TD3PlusBC_20220420181528: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.00035042163224248163, 'time_algorithm_update': 0.008788073969166182, 'critic_loss': 32.54951682285956, 'actor_loss': 2.2910413407442864, 'time_step': 0.009212007996631645, 'td_error': 4.352313302145914, 'init_value': -43.23859786987305, 'ave_value': -24.270744364749103} step=7182
2022-04-20 18:16.44 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181528/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:16.48 [info     ] TD3PlusBC_20220420181528: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.00035028638895492106, 'time_algorithm_update': 0.008838422814307854, 'critic_loss': 35.19499867422539, 'actor_loss': 2.2923354126556577, 'time_step': 0.009260314249852944, 'td_error': 4.469786337679616, 'init_value': -44.42470932006836, 'ave_value': -25.13123466438408} step=7524
2022-04-20 18:16.48 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181528/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:16.52 [info     ] TD3PlusBC_20220420181528: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.00034981652310020044, 'time_algorithm_update': 0.008460840286567198, 'critic_loss': 37.77919683958355, 'actor_loss': 2.293225133628176, 'time_step': 0.0088847387603849, 'td_error': 4.72064531055104, 'init_value': -45.74850845336914, 'ave_value': -25.879274473418775} step=7866
2022-04-20 18:16.52 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181528/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:16.55 [info     ] TD3PlusBC_20220420181528: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.0003501992476613898, 'time_algorithm_update': 0.008892918887891267, 'critic_loss': 40.25360071728801, 'actor_loss': 2.295315640711645, 'time_step': 0.009320536552116884, 'td_error': 4.84170180427172, 'init_value': -46.461021423339844, 'ave_value': -26.605277547100393} step=8208
2022-04-20 18:16.55 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181528/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:16.59 [info     ] TD3PlusBC_20220420181528: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.0003569865087319536, 'time_algorithm_update': 0.00891502140558254, 'critic_loss': 42.74770182336283, 'actor_loss': 2.2917639944288464, 'time_step': 0.009347145320379247, 'td_error': 5.106251451515123, 'init_value': -48.42207717895508, 'ave_value': -27.432281595688725} step=8550
2022-04-20 18:16.59 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181528/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:17.03 [info     ] TD3PlusBC_20220420181528: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.0003486669551559359, 'time_algorithm_update': 0.008629292075396979, 'critic_loss': 45.31363672401473, 'actor_loss': 2.2927126814747414, 'time_step': 0.009048114743149071, 'td_error': 5.360456469554677, 'init_value': -49.20684814453125, 'ave_value': -28.216119641707873} step=8892
2022-04-20 18:17.03 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181528/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:17.07 [info     ] TD3PlusBC_20220420181528: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.0003495613733927409, 'time_algorithm_update': 0.008884837055764002, 'critic_loss': 47.968281349940604, 'actor_loss': 2.299258876265141, 'time_step': 0.009309917862652338, 'td_error': 5.565373835715243, 'init_value': -50.905860900878906, 'ave_value': -29.20503952154445} step=9234
2022-04-20 18:17.07 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181528/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:17.10 [info     ] TD3PlusBC_20220420181528: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.0003511578018902338, 'time_algorithm_update': 0.008389339809529266, 'critic_loss': 50.24754651387533, 'actor_loss': 2.2958550913292064, 'time_step': 0.008814466627020585, 'td_error': 5.500513727323464, 'init_value': -50.557151794433594, 'ave_value': -29.519305314396064} step=9576
2022-04-20 18:17.10 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181528/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:17.14 [info     ] TD3PlusBC_20220420181528: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.00035206546560365555, 'time_algorithm_update': 0.009006498849879929, 'critic_loss': 52.6592783732721, 'actor_loss': 2.2966896567428323, 'time_step': 0.009434960041826928, 'td_error': 5.70959920550359, 'init_value': -52.17267990112305, 'ave_value': -30.31020820221251} step=9918
2022-04-20 18:17.14 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181528/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:17.18 [info     ] TD3PlusBC_20220420181528: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.00035145617367928487, 'time_algorithm_update': 0.008823351553309034, 'critic_loss': 55.19150050202308, 'actor_loss': 2.2944607037549827, 'time_step': 0.009249756210728696, 'td_error': 5.974296355765116, 'init_value': -53.12068557739258, 'ave_value': -31.053007480550793} step=10260
2022-04-20 18:17.18 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181528/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:17.21 [info     ] TD3PlusBC_20220420181528: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.00035648318062051696, 'time_algorithm_update': 0.008388609914054648, 'critic_loss': 57.631763776143394, 'actor_loss': 2.299470423257839, 'time_step': 0.008817791938781738, 'td_error': 6.243769573371245, 'init_value': -54.709373474121094, 'ave_value': -31.820305467463175} step=10602
2022-04-20 18:17.21 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181528/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:17.25 [info     ] TD3PlusBC_20220420181528: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.00035191976536087127, 'time_algorithm_update': 0.008913118936862165, 'critic_loss': 60.012260013156464, 'actor_loss': 2.2969904712766236, 'time_step': 0.00934442302636933, 'td_error': 6.466307840615221, 'init_value': -56.10173416137695, 'ave_value': -32.61761623600456} step=10944
2022-04-20 18:17.25 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181528/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:17.28 [info     ] TD3PlusBC_20220420181528: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.00034954464226438287, 'time_algorithm_update': 0.00834841477243524, 'critic_loss': 62.49242600781179, 'actor_loss': 2.2993705244789346, 'time_step': 0.00877514986963997, 'td_error': 6.594424903692254, 'init_value': -56.47278594970703, 'ave_value': -33.1230371084368} step=11286
2022-04-20 18:17.28 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181528/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:17.32 [info     ] TD3PlusBC_20220420181528: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.0003479098715977362, 'time_algorithm_update': 0.008871969423796, 'critic_loss': 64.84850617458946, 'actor_loss': 2.2956848367612963, 'time_step': 0.009291101617422717, 'td_error': 6.7343573260025895, 'init_value': -56.720436096191406, 'ave_value': -33.581245931405576} step=11628
2022-04-20 18:17.32 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181528/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:17.36 [info     ] TD3PlusBC_20220420181528: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.0003535949695877164, 'time_algorithm_update': 0.008794795002853661, 'critic_loss': 67.3621471360413, 'actor_loss': 2.2971521776322037, 'time_step': 0.009222905538235491, 'td_error': 6.873025099856169, 'init_value': -58.2933464050293, 'ave_value': -34.353301378602815} step=11970
2022-04-20 18:17.36 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181528/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:17.39 [info     ] TD3PlusBC_20220420181528: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.0003441607045848467, 'time_algorithm_update': 0.008358664679945562, 'critic_loss': 69.77321652641074, 'actor_loss': 2.299131067175614, 'time_step': 0.008777614922551384, 'td_error': 7.011438846672562, 'init_value': -59.487342834472656, 'ave_value': -35.062095107144366} step=12312
2022-04-20 18:17.39 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181528/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:17.43 [info     ] TD3PlusBC_20220420181528: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.00035321503354792007, 'time_algorithm_update': 0.008915429923966614, 'critic_loss': 72.20851004751106, 'actor_loss': 2.294341080370005, 'time_step': 0.009344531081573309, 'td_error': 7.067211490604997, 'init_value': -59.906494140625, 'ave_value': -35.577075270507066} step=12654
2022-04-20 18:17.43 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181528/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:17.47 [info     ] TD3PlusBC_20220420181528: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.0003560049492016173, 'time_algorithm_update': 0.008576085692957827, 'critic_loss': 74.5777744047823, 'actor_loss': 2.2985482926954304, 'time_step': 0.009007392570986384, 'td_error': 7.421294021630231, 'init_value': -61.0423698425293, 'ave_value': -36.26649515554636} step=12996
2022-04-20 18:17.47 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181528/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:17.50 [info     ] TD3PlusBC_20220420181528: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.00035163603330913345, 'time_algorithm_update': 0.008912572386669136, 'critic_loss': 76.84113543772558, 'actor_loss': 2.2986903581005786, 'time_step': 0.00934045844607883, 'td_error': 7.450400811496415, 'init_value': -61.838706970214844, 'ave_value': -36.72465149583639} step=13338
2022-04-20 18:17.50 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181528/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:17.54 [info     ] TD3PlusBC_20220420181528: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.000354883963601631, 'time_algorithm_update': 0.00876383126130578, 'critic_loss': 79.15793392114472, 'actor_loss': 2.298897213406033, 'time_step': 0.009194270909181115, 'td_error': 7.407403063670552, 'init_value': -61.40301513671875, 'ave_value': -36.95967234154668} step=13680
2022-04-20 18:17.54 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181528/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:17.58 [info     ] TD3PlusBC_20220420181528: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.0003530567849588673, 'time_algorithm_update': 0.008458209316632902, 'critic_loss': 80.99804733231751, 'actor_loss': 2.2990347446753963, 'time_step': 0.008886303818016722, 'td_error': 7.883707833816555, 'init_value': -63.6967658996582, 'ave_value': -37.743528371345135} step=14022
2022-04-20 18:17.58 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181528/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:18.01 [info     ] TD3PlusBC_20220420181528: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.00035459744302850023, 'time_algorithm_update': 0.008894198819210655, 'critic_loss': 83.41160986158583, 'actor_loss': 2.300600974880464, 'time_step': 0.009326142874377512, 'td_error': 7.796003769500512, 'init_value': -63.016944885253906, 'ave_value': -38.131902081255994} step=14364
2022-04-20 18:18.01 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181528/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:18.05 [info     ] TD3PlusBC_20220420181528: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.0003536061236732884, 'time_algorithm_update': 0.008917022169682017, 'critic_loss': 85.23528940078111, 'actor_loss': 2.2961332142701623, 'time_step': 0.009349087525529471, 'td_error': 7.945098304751423, 'init_value': -62.9920654296875, 'ave_value': -38.48184284606143} step=14706
2022-04-20 18:18.05 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181528/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:18.08 [info     ] TD3PlusBC_20220420181528: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.0003492155967400088, 'time_algorithm_update': 0.008700701925489638, 'critic_loss': 87.2729937570137, 'actor_loss': 2.300186363577146, 'time_step': 0.00912364881638198, 'td_error': 8.268936506624364, 'init_value': -64.81770324707031, 'ave_value': -39.29812213439503} step=15048
2022-04-20 18:18.08 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181528/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:18.12 [info     ] TD3PlusBC_20220420181528: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.0003537392755698042, 'time_algorithm_update': 0.008842185226797361, 'critic_loss': 89.463983848081, 'actor_loss': 2.3010615758728563, 'time_step': 0.00927139165108664, 'td_error': 8.26551722745945, 'init_value': -65.25688934326172, 'ave_value': -39.6238519955929} step=15390
2022-04-20 18:18.12 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181528/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:18.16 [info     ] TD3PlusBC_20220420181528: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.0003482556482504683, 'time_algorithm_update': 0.008448477376971329, 'critic_loss': 90.95858141135054, 'actor_loss': 2.300339546817088, 'time_step': 0.00887417305282682, 'td_error': 8.449388185842347, 'init_value': -66.4930191040039, 'ave_value': -40.07644801765059} step=15732
2022-04-20 18:18.16 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181528/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:18.19 [info     ] TD3PlusBC_20220420181528: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.0003527096140454387, 'time_algorithm_update': 0.00889442817509523, 'critic_loss': 92.94434461537857, 'actor_loss': 2.3015251271208825, 'time_step': 0.00932568625399941, 'td_error': 8.661404130121571, 'init_value': -66.3094711303711, 'ave_value': -40.45788224659122} step=16074
2022-04-20 18:18.19 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181528/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:18.23 [info     ] TD3PlusBC_20220420181528: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.000350968879565858, 'time_algorithm_update': 0.008773248098049944, 'critic_loss': 94.89250972814727, 'actor_loss': 2.303629027472602, 'time_step': 0.009202303245053655, 'td_error': 8.780973370298584, 'init_value': -66.57084655761719, 'ave_value': -40.998328614638496} step=16416
2022-04-20 18:18.23 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181528/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:18.27 [info     ] TD3PlusBC_20220420181528: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.00035203479186833255, 'time_algorithm_update': 0.00850411744145622, 'critic_loss': 96.73700253447593, 'actor_loss': 2.302915409991616, 'time_step': 0.008932358340213173, 'td_error': 8.642133218059827, 'init_value': -67.18331146240234, 'ave_value': -41.576990460737164} step=16758
2022-04-20 18:18.27 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181528/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:18.30 [info     ] TD3PlusBC_20220420181528: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.0003523694144354926, 'time_algorithm_update': 0.008807822277671412, 'critic_loss': 98.98896562007435, 'actor_loss': 2.306146136501379, 'time_step': 0.00923659508688408, 'td_error': 8.884862183154972, 'init_value': -67.148193359375, 'ave_value': -41.57321628499172} step=17100
2022-04-20 18:18.30 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420181528/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 102 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.28189108e-01  9.800

Epoch 1/50:   0%|          | 0/177 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 18:18.32 [info     ] FQE_20220420181831: epoch=1 step=177 epoch=1 metrics={'time_sample_batch': 0.00015745459303344038, 'time_algorithm_update': 0.005055748136703578, 'loss': 0.008045516758210074, 'time_step': 0.005288586104657017, 'init_value': 0.006364563945680857, 'ave_value': 0.0499851488248185, 'soft_opc': nan} step=177




2022-04-20 18:18.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181831/model_177.pt


Epoch 2/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:18.32 [info     ] FQE_20220420181831: epoch=2 step=354 epoch=2 metrics={'time_sample_batch': 0.0001565157356908766, 'time_algorithm_update': 0.004623931680021986, 'loss': 0.006060958674209097, 'time_step': 0.00484926821821827, 'init_value': -0.15820640325546265, 'ave_value': -0.04974521656741609, 'soft_opc': nan} step=354




2022-04-20 18:18.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181831/model_354.pt


Epoch 3/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:18.33 [info     ] FQE_20220420181831: epoch=3 step=531 epoch=3 metrics={'time_sample_batch': 0.00015552703943629722, 'time_algorithm_update': 0.004572814467263087, 'loss': 0.005084818102798219, 'time_step': 0.004797450566695908, 'init_value': -0.2335323542356491, 'ave_value': -0.05867208502291738, 'soft_opc': nan} step=531




2022-04-20 18:18.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181831/model_531.pt


Epoch 4/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:18.34 [info     ] FQE_20220420181831: epoch=4 step=708 epoch=4 metrics={'time_sample_batch': 0.00015912487008477334, 'time_algorithm_update': 0.005114319634302861, 'loss': 0.004492492199280841, 'time_step': 0.005346071922172934, 'init_value': -0.2829517424106598, 'ave_value': -0.06865866119349683, 'soft_opc': nan} step=708




2022-04-20 18:18.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181831/model_708.pt


Epoch 5/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:18.35 [info     ] FQE_20220420181831: epoch=5 step=885 epoch=5 metrics={'time_sample_batch': 0.0001593794526353394, 'time_algorithm_update': 0.00503104824130818, 'loss': 0.0040958776378130675, 'time_step': 0.005261924980723925, 'init_value': -0.34692808985710144, 'ave_value': -0.09296581266453495, 'soft_opc': nan} step=885




2022-04-20 18:18.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181831/model_885.pt


Epoch 6/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:18.36 [info     ] FQE_20220420181831: epoch=6 step=1062 epoch=6 metrics={'time_sample_batch': 0.00016014858827752583, 'time_algorithm_update': 0.0050551514167570125, 'loss': 0.003870226658543961, 'time_step': 0.005281717763782221, 'init_value': -0.3780066967010498, 'ave_value': -0.10531095147535607, 'soft_opc': nan} step=1062




2022-04-20 18:18.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181831/model_1062.pt


Epoch 7/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:18.37 [info     ] FQE_20220420181831: epoch=7 step=1239 epoch=7 metrics={'time_sample_batch': 0.0001646812352756996, 'time_algorithm_update': 0.004996035732118423, 'loss': 0.0037519672851336035, 'time_step': 0.005231365645672642, 'init_value': -0.4726966917514801, 'ave_value': -0.1586789371171692, 'soft_opc': nan} step=1239




2022-04-20 18:18.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181831/model_1239.pt


Epoch 8/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:18.38 [info     ] FQE_20220420181831: epoch=8 step=1416 epoch=8 metrics={'time_sample_batch': 0.00016167204258805615, 'time_algorithm_update': 0.005028769121331684, 'loss': 0.0036415290133501427, 'time_step': 0.005267704947520111, 'init_value': -0.5216888785362244, 'ave_value': -0.16913702495776498, 'soft_opc': nan} step=1416




2022-04-20 18:18.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181831/model_1416.pt


Epoch 9/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:18.39 [info     ] FQE_20220420181831: epoch=9 step=1593 epoch=9 metrics={'time_sample_batch': 0.00016309986006742143, 'time_algorithm_update': 0.005063388307215803, 'loss': 0.003453310511316999, 'time_step': 0.005298598337981661, 'init_value': -0.6000251173973083, 'ave_value': -0.2080242822746615, 'soft_opc': nan} step=1593




2022-04-20 18:18.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181831/model_1593.pt


Epoch 10/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:18.40 [info     ] FQE_20220420181831: epoch=10 step=1770 epoch=10 metrics={'time_sample_batch': 0.0001639942664884578, 'time_algorithm_update': 0.0050510430740097825, 'loss': 0.003615499449115099, 'time_step': 0.0052877240261789095, 'init_value': -0.6998278498649597, 'ave_value': -0.2640301264251943, 'soft_opc': nan} step=1770




2022-04-20 18:18.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181831/model_1770.pt


Epoch 11/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:18.41 [info     ] FQE_20220420181831: epoch=11 step=1947 epoch=11 metrics={'time_sample_batch': 0.00015356446390098097, 'time_algorithm_update': 0.004260007944484215, 'loss': 0.0037086705695627267, 'time_step': 0.004480946535444529, 'init_value': -0.7511067986488342, 'ave_value': -0.2916117469178843, 'soft_opc': nan} step=1947




2022-04-20 18:18.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181831/model_1947.pt


Epoch 12/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:18.42 [info     ] FQE_20220420181831: epoch=12 step=2124 epoch=12 metrics={'time_sample_batch': 0.00015976065296237753, 'time_algorithm_update': 0.004916557484427414, 'loss': 0.0037316935075089167, 'time_step': 0.005145178002826238, 'init_value': -0.8863993287086487, 'ave_value': -0.38168480993435905, 'soft_opc': nan} step=2124




2022-04-20 18:18.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181831/model_2124.pt


Epoch 13/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:18.43 [info     ] FQE_20220420181831: epoch=13 step=2301 epoch=13 metrics={'time_sample_batch': 0.00016185792825989804, 'time_algorithm_update': 0.005103057387184962, 'loss': 0.004145191131306991, 'time_step': 0.0053374807713395455, 'init_value': -0.971837043762207, 'ave_value': -0.43648508882468884, 'soft_opc': nan} step=2301




2022-04-20 18:18.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181831/model_2301.pt


Epoch 14/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:18.44 [info     ] FQE_20220420181831: epoch=14 step=2478 epoch=14 metrics={'time_sample_batch': 0.00016227415052510923, 'time_algorithm_update': 0.005090075024103714, 'loss': 0.004434170756531132, 'time_step': 0.00532172359315689, 'init_value': -1.0733577013015747, 'ave_value': -0.4965752689665681, 'soft_opc': nan} step=2478




2022-04-20 18:18.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181831/model_2478.pt


Epoch 15/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:18.45 [info     ] FQE_20220420181831: epoch=15 step=2655 epoch=15 metrics={'time_sample_batch': 0.0001664727421130164, 'time_algorithm_update': 0.0050240896515927076, 'loss': 0.00493940742920537, 'time_step': 0.005266135695290431, 'init_value': -1.1360927820205688, 'ave_value': -0.5203508159106558, 'soft_opc': nan} step=2655




2022-04-20 18:18.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181831/model_2655.pt


Epoch 16/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:18.46 [info     ] FQE_20220420181831: epoch=16 step=2832 epoch=16 metrics={'time_sample_batch': 0.00016409125031724488, 'time_algorithm_update': 0.004996212188806911, 'loss': 0.005341214535621095, 'time_step': 0.00523220213119593, 'init_value': -1.2026430368423462, 'ave_value': -0.5698370855894532, 'soft_opc': nan} step=2832




2022-04-20 18:18.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181831/model_2832.pt


Epoch 17/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:18.47 [info     ] FQE_20220420181831: epoch=17 step=3009 epoch=17 metrics={'time_sample_batch': 0.0001617973133669061, 'time_algorithm_update': 0.005056743567946267, 'loss': 0.006131864037748826, 'time_step': 0.005290067802041264, 'init_value': -1.2528916597366333, 'ave_value': -0.5892488395107222, 'soft_opc': nan} step=3009




2022-04-20 18:18.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181831/model_3009.pt


Epoch 18/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:18.48 [info     ] FQE_20220420181831: epoch=18 step=3186 epoch=18 metrics={'time_sample_batch': 0.0001627954386048398, 'time_algorithm_update': 0.0050348467746023405, 'loss': 0.0066264346001885495, 'time_step': 0.005271479234857075, 'init_value': -1.3284529447555542, 'ave_value': -0.6421811369558176, 'soft_opc': nan} step=3186




2022-04-20 18:18.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181831/model_3186.pt


Epoch 19/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:18.49 [info     ] FQE_20220420181831: epoch=19 step=3363 epoch=19 metrics={'time_sample_batch': 0.00015907907216562389, 'time_algorithm_update': 0.0049996335627669, 'loss': 0.007465998392873102, 'time_step': 0.0052335935797395, 'init_value': -1.381558895111084, 'ave_value': -0.6818122917512157, 'soft_opc': nan} step=3363




2022-04-20 18:18.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181831/model_3363.pt


Epoch 20/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:18.50 [info     ] FQE_20220420181831: epoch=20 step=3540 epoch=20 metrics={'time_sample_batch': 0.00015745324603581832, 'time_algorithm_update': 0.004056440234857764, 'loss': 0.008417410315619519, 'time_step': 0.004288325875492419, 'init_value': -1.5060383081436157, 'ave_value': -0.7536042094006911, 'soft_opc': nan} step=3540




2022-04-20 18:18.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181831/model_3540.pt


Epoch 21/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:18.51 [info     ] FQE_20220420181831: epoch=21 step=3717 epoch=21 metrics={'time_sample_batch': 0.00016180539535263838, 'time_algorithm_update': 0.005086827412836969, 'loss': 0.008931256219676281, 'time_step': 0.005319417533227953, 'init_value': -1.6088703870773315, 'ave_value': -0.8260965183175899, 'soft_opc': nan} step=3717




2022-04-20 18:18.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181831/model_3717.pt


Epoch 22/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:18.52 [info     ] FQE_20220420181831: epoch=22 step=3894 epoch=22 metrics={'time_sample_batch': 0.0001685336484747418, 'time_algorithm_update': 0.0050417892003463485, 'loss': 0.009847569891704131, 'time_step': 0.005283106518330547, 'init_value': -1.6704074144363403, 'ave_value': -0.8646759749309079, 'soft_opc': nan} step=3894




2022-04-20 18:18.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181831/model_3894.pt


Epoch 23/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:18.53 [info     ] FQE_20220420181831: epoch=23 step=4071 epoch=23 metrics={'time_sample_batch': 0.00016183637629794536, 'time_algorithm_update': 0.004951407006904903, 'loss': 0.011225310097038114, 'time_step': 0.005185551562551725, 'init_value': -1.7936426401138306, 'ave_value': -0.9667181579838018, 'soft_opc': nan} step=4071




2022-04-20 18:18.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181831/model_4071.pt


Epoch 24/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:18.54 [info     ] FQE_20220420181831: epoch=24 step=4248 epoch=24 metrics={'time_sample_batch': 0.00015996135560806187, 'time_algorithm_update': 0.004986687568621447, 'loss': 0.012096983126080516, 'time_step': 0.005218666152092023, 'init_value': -1.8875104188919067, 'ave_value': -1.0277777443653917, 'soft_opc': nan} step=4248




2022-04-20 18:18.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181831/model_4248.pt


Epoch 25/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:18.55 [info     ] FQE_20220420181831: epoch=25 step=4425 epoch=25 metrics={'time_sample_batch': 0.00016387573069771806, 'time_algorithm_update': 0.005049216545234292, 'loss': 0.012715168446591808, 'time_step': 0.00528678112784348, 'init_value': -1.930794596672058, 'ave_value': -1.040579471882578, 'soft_opc': nan} step=4425




2022-04-20 18:18.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181831/model_4425.pt


Epoch 26/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:18.56 [info     ] FQE_20220420181831: epoch=26 step=4602 epoch=26 metrics={'time_sample_batch': 0.0001600529514463608, 'time_algorithm_update': 0.005013840346686584, 'loss': 0.01356475144535261, 'time_step': 0.005245173718296202, 'init_value': -2.0148630142211914, 'ave_value': -1.1064544289386846, 'soft_opc': nan} step=4602




2022-04-20 18:18.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181831/model_4602.pt


Epoch 27/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:18.57 [info     ] FQE_20220420181831: epoch=27 step=4779 epoch=27 metrics={'time_sample_batch': 0.0001644859206205034, 'time_algorithm_update': 0.004998123578432589, 'loss': 0.015305910369981144, 'time_step': 0.005236081484347414, 'init_value': -2.092099189758301, 'ave_value': -1.1341970523802547, 'soft_opc': nan} step=4779




2022-04-20 18:18.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181831/model_4779.pt


Epoch 28/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:18.58 [info     ] FQE_20220420181831: epoch=28 step=4956 epoch=28 metrics={'time_sample_batch': 0.00016166934859281206, 'time_algorithm_update': 0.004975465731432209, 'loss': 0.01594481548546987, 'time_step': 0.005211848997126865, 'init_value': -2.3352317810058594, 'ave_value': -1.3309245570039785, 'soft_opc': nan} step=4956




2022-04-20 18:18.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181831/model_4956.pt


Epoch 29/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:18.59 [info     ] FQE_20220420181831: epoch=29 step=5133 epoch=29 metrics={'time_sample_batch': 0.00016028463503735214, 'time_algorithm_update': 0.004114658819080072, 'loss': 0.01715305624569182, 'time_step': 0.0043447004199701515, 'init_value': -2.4720394611358643, 'ave_value': -1.4546798271773098, 'soft_opc': nan} step=5133




2022-04-20 18:18.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181831/model_5133.pt


Epoch 30/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:19.00 [info     ] FQE_20220420181831: epoch=30 step=5310 epoch=30 metrics={'time_sample_batch': 0.0001701325346521065, 'time_algorithm_update': 0.005067772784475553, 'loss': 0.018408404394857966, 'time_step': 0.005313434169790839, 'init_value': -2.5084943771362305, 'ave_value': -1.4553477507677521, 'soft_opc': nan} step=5310




2022-04-20 18:19.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181831/model_5310.pt


Epoch 31/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:19.01 [info     ] FQE_20220420181831: epoch=31 step=5487 epoch=31 metrics={'time_sample_batch': 0.0001654288189559333, 'time_algorithm_update': 0.0050001615858347405, 'loss': 0.019822976978806157, 'time_step': 0.00524314783387265, 'init_value': -2.673534393310547, 'ave_value': -1.5660151999007474, 'soft_opc': nan} step=5487




2022-04-20 18:19.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181831/model_5487.pt


Epoch 32/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:19.02 [info     ] FQE_20220420181831: epoch=32 step=5664 epoch=32 metrics={'time_sample_batch': 0.00016410876128633144, 'time_algorithm_update': 0.004966202428785421, 'loss': 0.02100719430149759, 'time_step': 0.00520184888678082, 'init_value': -2.795659303665161, 'ave_value': -1.6757615626440034, 'soft_opc': nan} step=5664




2022-04-20 18:19.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181831/model_5664.pt


Epoch 33/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:19.03 [info     ] FQE_20220420181831: epoch=33 step=5841 epoch=33 metrics={'time_sample_batch': 0.00016979713224421787, 'time_algorithm_update': 0.005033517287949384, 'loss': 0.022433169071673083, 'time_step': 0.005278619669251523, 'init_value': -2.845975875854492, 'ave_value': -1.679283850919735, 'soft_opc': nan} step=5841




2022-04-20 18:19.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181831/model_5841.pt


Epoch 34/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:19.04 [info     ] FQE_20220420181831: epoch=34 step=6018 epoch=34 metrics={'time_sample_batch': 0.00016050150165450102, 'time_algorithm_update': 0.004865321735877775, 'loss': 0.022976094228741688, 'time_step': 0.0050980977419406, 'init_value': -2.9730396270751953, 'ave_value': -1.778290175523486, 'soft_opc': nan} step=6018




2022-04-20 18:19.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181831/model_6018.pt


Epoch 35/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:19.05 [info     ] FQE_20220420181831: epoch=35 step=6195 epoch=35 metrics={'time_sample_batch': 0.00015738859014996026, 'time_algorithm_update': 0.00503211641042246, 'loss': 0.022990742541118235, 'time_step': 0.005261879182804775, 'init_value': -3.003618001937866, 'ave_value': -1.7977130398601742, 'soft_opc': nan} step=6195




2022-04-20 18:19.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181831/model_6195.pt


Epoch 36/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:19.06 [info     ] FQE_20220420181831: epoch=36 step=6372 epoch=36 metrics={'time_sample_batch': 0.00016727420569813184, 'time_algorithm_update': 0.005052873643778138, 'loss': 0.024922696046141835, 'time_step': 0.0052940791609597075, 'init_value': -3.124504327774048, 'ave_value': -1.8686386173596612, 'soft_opc': nan} step=6372




2022-04-20 18:19.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181831/model_6372.pt


Epoch 37/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:19.07 [info     ] FQE_20220420181831: epoch=37 step=6549 epoch=37 metrics={'time_sample_batch': 0.00016159661072122174, 'time_algorithm_update': 0.004999665890709828, 'loss': 0.025176705369636374, 'time_step': 0.00523660681342001, 'init_value': -3.2487809658050537, 'ave_value': -1.9211830837187824, 'soft_opc': nan} step=6549




2022-04-20 18:19.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181831/model_6549.pt


Epoch 38/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:19.08 [info     ] FQE_20220420181831: epoch=38 step=6726 epoch=38 metrics={'time_sample_batch': 0.00016004486946062852, 'time_algorithm_update': 0.004207283763562219, 'loss': 0.02680541347320281, 'time_step': 0.004440042258655958, 'init_value': -3.3617210388183594, 'ave_value': -1.9606241683299477, 'soft_opc': nan} step=6726




2022-04-20 18:19.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181831/model_6726.pt


Epoch 39/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:19.09 [info     ] FQE_20220420181831: epoch=39 step=6903 epoch=39 metrics={'time_sample_batch': 0.00016429464695817334, 'time_algorithm_update': 0.004973460051972987, 'loss': 0.027253006219253532, 'time_step': 0.00521278920146705, 'init_value': -3.4093921184539795, 'ave_value': -2.027525542461657, 'soft_opc': nan} step=6903




2022-04-20 18:19.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181831/model_6903.pt


Epoch 40/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:19.10 [info     ] FQE_20220420181831: epoch=40 step=7080 epoch=40 metrics={'time_sample_batch': 0.00016163702064988305, 'time_algorithm_update': 0.004949181766833289, 'loss': 0.028596250246947178, 'time_step': 0.005182566615821278, 'init_value': -3.618133544921875, 'ave_value': -2.1711278687278788, 'soft_opc': nan} step=7080




2022-04-20 18:19.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181831/model_7080.pt


Epoch 41/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:19.11 [info     ] FQE_20220420181831: epoch=41 step=7257 epoch=41 metrics={'time_sample_batch': 0.00016355245126842779, 'time_algorithm_update': 0.005087346006921456, 'loss': 0.029831806025441505, 'time_step': 0.00532336558325816, 'init_value': -3.7079241275787354, 'ave_value': -2.2221905191336666, 'soft_opc': nan} step=7257




2022-04-20 18:19.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181831/model_7257.pt


Epoch 42/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:19.12 [info     ] FQE_20220420181831: epoch=42 step=7434 epoch=42 metrics={'time_sample_batch': 0.00016298401827192577, 'time_algorithm_update': 0.005011686497488938, 'loss': 0.03153290297102057, 'time_step': 0.005251329497428937, 'init_value': -3.865086555480957, 'ave_value': -2.341569829631496, 'soft_opc': nan} step=7434




2022-04-20 18:19.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181831/model_7434.pt


Epoch 43/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:19.13 [info     ] FQE_20220420181831: epoch=43 step=7611 epoch=43 metrics={'time_sample_batch': 0.0001645330655372749, 'time_algorithm_update': 0.0050784154126873125, 'loss': 0.03238312345284884, 'time_step': 0.005317506143602275, 'init_value': -4.0731000900268555, 'ave_value': -2.4723609776020767, 'soft_opc': nan} step=7611




2022-04-20 18:19.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181831/model_7611.pt


Epoch 44/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:19.14 [info     ] FQE_20220420181831: epoch=44 step=7788 epoch=44 metrics={'time_sample_batch': 0.00016729036966959635, 'time_algorithm_update': 0.005018888893774, 'loss': 0.0331463019184684, 'time_step': 0.005258211308279953, 'init_value': -4.031643867492676, 'ave_value': -2.3887417350892908, 'soft_opc': nan} step=7788




2022-04-20 18:19.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181831/model_7788.pt


Epoch 45/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:19.15 [info     ] FQE_20220420181831: epoch=45 step=7965 epoch=45 metrics={'time_sample_batch': 0.00016674887662553517, 'time_algorithm_update': 0.005042006066963497, 'loss': 0.033782965353016586, 'time_step': 0.005281181658728648, 'init_value': -4.1434149742126465, 'ave_value': -2.46234727532298, 'soft_opc': nan} step=7965




2022-04-20 18:19.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181831/model_7965.pt


Epoch 46/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:19.16 [info     ] FQE_20220420181831: epoch=46 step=8142 epoch=46 metrics={'time_sample_batch': 0.000162644574871171, 'time_algorithm_update': 0.005131097836683025, 'loss': 0.03530772288506166, 'time_step': 0.005367068921105336, 'init_value': -4.2625412940979, 'ave_value': -2.5517143058257776, 'soft_opc': nan} step=8142




2022-04-20 18:19.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181831/model_8142.pt


Epoch 47/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:19.17 [info     ] FQE_20220420181831: epoch=47 step=8319 epoch=47 metrics={'time_sample_batch': 0.00015655883961478194, 'time_algorithm_update': 0.004154726610345356, 'loss': 0.03602620276443576, 'time_step': 0.004383238021936794, 'init_value': -4.3979973793029785, 'ave_value': -2.5645264649176385, 'soft_opc': nan} step=8319




2022-04-20 18:19.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181831/model_8319.pt


Epoch 48/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:19.18 [info     ] FQE_20220420181831: epoch=48 step=8496 epoch=48 metrics={'time_sample_batch': 0.0001635780442232466, 'time_algorithm_update': 0.005073214654868605, 'loss': 0.039302779244532605, 'time_step': 0.0053144888689288985, 'init_value': -4.544051647186279, 'ave_value': -2.6770870804428695, 'soft_opc': nan} step=8496




2022-04-20 18:19.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181831/model_8496.pt


Epoch 49/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:19.19 [info     ] FQE_20220420181831: epoch=49 step=8673 epoch=49 metrics={'time_sample_batch': 0.00017426647035415562, 'time_algorithm_update': 0.005055504330133988, 'loss': 0.039379189640025763, 'time_step': 0.005300865334979558, 'init_value': -4.647641658782959, 'ave_value': -2.663573336905545, 'soft_opc': nan} step=8673




2022-04-20 18:19.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181831/model_8673.pt


Epoch 50/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:19.20 [info     ] FQE_20220420181831: epoch=50 step=8850 epoch=50 metrics={'time_sample_batch': 0.00016998167091843773, 'time_algorithm_update': 0.005047746970828644, 'loss': 0.04011541760084614, 'time_step': 0.005291043028319623, 'init_value': -4.77529764175415, 'ave_value': -2.7368203490346046, 'soft_opc': nan} step=8850




2022-04-20 18:19.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181831/model_8850.pt
start
[ 0.00000000e+00  7.95731469e+08  1.43210892e-01 -3.25999953e-02
 -1.38000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.28049971e-01  6.00000000e-01  4.67532035e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.87189108e-01  2.46000047e-02
 -8.00013420e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.61927906e-02  2.08952959e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.53789108e-01  1.32000047e-02
  8.79998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -9.71586383e-02 -6.00000000e-01 -5.33093061e-02]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.39489108e-01 -4.75999953e-02
 -9.30001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  1.41298638e-01  5.43892365e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.9573146

2022-04-20 18:19.20 [info     ] Directory is created at d3rlpy_logs/FQE_20220420181920
2022-04-20 18:19.20 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 18:19.20 [debug    ] Building models...
2022-04-20 18:19.20 [debug    ] Models have been built.
2022-04-20 18:19.20 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420181920/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/355 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 18:19.22 [info     ] FQE_20220420181920: epoch=1 step=355 epoch=1 metrics={'time_sample_batch': 0.00016330665266010123, 'time_algorithm_update': 0.004957174247419331, 'loss': 0.03282373133574573, 'time_step': 0.005195264413323201, 'init_value': -1.0722910165786743, 'ave_value': -1.1002259663479976, 'soft_opc': nan} step=355




2022-04-20 18:19.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181920/model_355.pt


Epoch 2/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:19.24 [info     ] FQE_20220420181920: epoch=2 step=710 epoch=2 metrics={'time_sample_batch': 0.00016548129874215998, 'time_algorithm_update': 0.004982329086518624, 'loss': 0.027884988520871585, 'time_step': 0.005222303094998212, 'init_value': -1.76947820186615, 'ave_value': -1.787193941563713, 'soft_opc': nan} step=710




2022-04-20 18:19.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181920/model_710.pt


Epoch 3/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:19.26 [info     ] FQE_20220420181920: epoch=3 step=1065 epoch=3 metrics={'time_sample_batch': 0.00016765057201116858, 'time_algorithm_update': 0.004624315046928298, 'loss': 0.03234992069782506, 'time_step': 0.0048668397984034575, 'init_value': -2.020455837249756, 'ave_value': -2.0647562005776465, 'soft_opc': nan} step=1065




2022-04-20 18:19.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181920/model_1065.pt


Epoch 4/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:19.28 [info     ] FQE_20220420181920: epoch=4 step=1420 epoch=4 metrics={'time_sample_batch': 0.0001682637443005199, 'time_algorithm_update': 0.005099301942637269, 'loss': 0.042626627146358224, 'time_step': 0.005345460730539241, 'init_value': -2.289884090423584, 'ave_value': -2.4460766948735584, 'soft_opc': nan} step=1420




2022-04-20 18:19.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181920/model_1420.pt


Epoch 5/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:19.30 [info     ] FQE_20220420181920: epoch=5 step=1775 epoch=5 metrics={'time_sample_batch': 0.00016969224097023548, 'time_algorithm_update': 0.004971856130680568, 'loss': 0.050519508472315865, 'time_step': 0.00521981010974293, 'init_value': -2.412280797958374, 'ave_value': -2.6830288445497916, 'soft_opc': nan} step=1775




2022-04-20 18:19.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181920/model_1775.pt


Epoch 6/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:19.32 [info     ] FQE_20220420181920: epoch=6 step=2130 epoch=6 metrics={'time_sample_batch': 0.00016567874962175396, 'time_algorithm_update': 0.0050089480171740895, 'loss': 0.06649911630059212, 'time_step': 0.005253415712168519, 'init_value': -2.590449094772339, 'ave_value': -3.045315387616344, 'soft_opc': nan} step=2130




2022-04-20 18:19.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181920/model_2130.pt


Epoch 7/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:19.34 [info     ] FQE_20220420181920: epoch=7 step=2485 epoch=7 metrics={'time_sample_batch': 0.0001689286299154792, 'time_algorithm_update': 0.004595078213114134, 'loss': 0.07883272376698508, 'time_step': 0.004841241030625894, 'init_value': -2.916865587234497, 'ave_value': -3.471946962252547, 'soft_opc': nan} step=2485




2022-04-20 18:19.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181920/model_2485.pt


Epoch 8/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:19.36 [info     ] FQE_20220420181920: epoch=8 step=2840 epoch=8 metrics={'time_sample_batch': 0.00016897631363129952, 'time_algorithm_update': 0.00506904360274194, 'loss': 0.09554304241819281, 'time_step': 0.005312736941055512, 'init_value': -3.0873968601226807, 'ave_value': -3.835664904028407, 'soft_opc': nan} step=2840




2022-04-20 18:19.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181920/model_2840.pt


Epoch 9/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:19.38 [info     ] FQE_20220420181920: epoch=9 step=3195 epoch=9 metrics={'time_sample_batch': 0.00016630065273231184, 'time_algorithm_update': 0.004996503910548251, 'loss': 0.11550265249442047, 'time_step': 0.005237422190921407, 'init_value': -3.2522103786468506, 'ave_value': -4.131774531115456, 'soft_opc': nan} step=3195




2022-04-20 18:19.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181920/model_3195.pt


Epoch 10/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:19.40 [info     ] FQE_20220420181920: epoch=10 step=3550 epoch=10 metrics={'time_sample_batch': 0.00017351902706522337, 'time_algorithm_update': 0.005011960150490344, 'loss': 0.1414895202282449, 'time_step': 0.005262077358407034, 'init_value': -3.2728636264801025, 'ave_value': -4.300214606117903, 'soft_opc': nan} step=3550




2022-04-20 18:19.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181920/model_3550.pt


Epoch 11/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:19.42 [info     ] FQE_20220420181920: epoch=11 step=3905 epoch=11 metrics={'time_sample_batch': 0.00017016840652680734, 'time_algorithm_update': 0.0049983991703517, 'loss': 0.16107259041721553, 'time_step': 0.005243050212591467, 'init_value': -3.8617968559265137, 'ave_value': -5.079470784811631, 'soft_opc': nan} step=3905




2022-04-20 18:19.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181920/model_3905.pt


Epoch 12/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:19.44 [info     ] FQE_20220420181920: epoch=12 step=4260 epoch=12 metrics={'time_sample_batch': 0.00016409578457684585, 'time_algorithm_update': 0.004560263056150624, 'loss': 0.18424855519453404, 'time_step': 0.004796408935331962, 'init_value': -3.5154261589050293, 'ave_value': -4.9291622789897875, 'soft_opc': nan} step=4260




2022-04-20 18:19.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181920/model_4260.pt


Epoch 13/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:19.46 [info     ] FQE_20220420181920: epoch=13 step=4615 epoch=13 metrics={'time_sample_batch': 0.00016697494077010894, 'time_algorithm_update': 0.005078229098252847, 'loss': 0.20798213298786694, 'time_step': 0.005322171600771622, 'init_value': -3.943162202835083, 'ave_value': -5.542088993223075, 'soft_opc': nan} step=4615




2022-04-20 18:19.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181920/model_4615.pt


Epoch 14/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:19.48 [info     ] FQE_20220420181920: epoch=14 step=4970 epoch=14 metrics={'time_sample_batch': 0.0001690172813308071, 'time_algorithm_update': 0.00505697424982635, 'loss': 0.23339909664551978, 'time_step': 0.005302539341886278, 'init_value': -4.036876201629639, 'ave_value': -5.774935849035156, 'soft_opc': nan} step=4970




2022-04-20 18:19.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181920/model_4970.pt


Epoch 15/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:19.50 [info     ] FQE_20220420181920: epoch=15 step=5325 epoch=15 metrics={'time_sample_batch': 0.00016834299329301003, 'time_algorithm_update': 0.005016207359206508, 'loss': 0.25067397892265253, 'time_step': 0.005261386951930087, 'init_value': -4.169731140136719, 'ave_value': -6.141116154208741, 'soft_opc': nan} step=5325




2022-04-20 18:19.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181920/model_5325.pt


Epoch 16/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:19.52 [info     ] FQE_20220420181920: epoch=16 step=5680 epoch=16 metrics={'time_sample_batch': 0.00016836918575662962, 'time_algorithm_update': 0.004572038919153348, 'loss': 0.27213643767254453, 'time_step': 0.004814109667925767, 'init_value': -4.386107921600342, 'ave_value': -6.52138771526084, 'soft_opc': nan} step=5680




2022-04-20 18:19.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181920/model_5680.pt


Epoch 17/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:19.54 [info     ] FQE_20220420181920: epoch=17 step=6035 epoch=17 metrics={'time_sample_batch': 0.00016764318439322457, 'time_algorithm_update': 0.005068380060330243, 'loss': 0.3072628241399644, 'time_step': 0.005311358142906512, 'init_value': -4.653389930725098, 'ave_value': -6.937298922724015, 'soft_opc': nan} step=6035




2022-04-20 18:19.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181920/model_6035.pt


Epoch 18/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:19.56 [info     ] FQE_20220420181920: epoch=18 step=6390 epoch=18 metrics={'time_sample_batch': 0.00017196023967904105, 'time_algorithm_update': 0.005057342287520287, 'loss': 0.32687684701572, 'time_step': 0.005306965868237992, 'init_value': -4.4778900146484375, 'ave_value': -6.892154953501246, 'soft_opc': nan} step=6390




2022-04-20 18:19.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181920/model_6390.pt


Epoch 19/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:19.58 [info     ] FQE_20220420181920: epoch=19 step=6745 epoch=19 metrics={'time_sample_batch': 0.0001709266447685134, 'time_algorithm_update': 0.004978374696113694, 'loss': 0.3527912469974286, 'time_step': 0.005226980800360021, 'init_value': -4.84368371963501, 'ave_value': -7.430561145525929, 'soft_opc': nan} step=6745




2022-04-20 18:19.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181920/model_6745.pt


Epoch 20/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:20.00 [info     ] FQE_20220420181920: epoch=20 step=7100 epoch=20 metrics={'time_sample_batch': 0.0001711334980709452, 'time_algorithm_update': 0.00501080701048945, 'loss': 0.37341055311262605, 'time_step': 0.005259108207595181, 'init_value': -5.087940216064453, 'ave_value': -7.68065623440606, 'soft_opc': nan} step=7100




2022-04-20 18:20.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181920/model_7100.pt


Epoch 21/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:20.02 [info     ] FQE_20220420181920: epoch=21 step=7455 epoch=21 metrics={'time_sample_batch': 0.0001667600282481019, 'time_algorithm_update': 0.004639872027115083, 'loss': 0.3904654783314802, 'time_step': 0.004883305455597353, 'init_value': -5.195146083831787, 'ave_value': -7.819611927492969, 'soft_opc': nan} step=7455




2022-04-20 18:20.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181920/model_7455.pt


Epoch 22/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:20.04 [info     ] FQE_20220420181920: epoch=22 step=7810 epoch=22 metrics={'time_sample_batch': 0.00016714082637303313, 'time_algorithm_update': 0.005067937474855235, 'loss': 0.4181890856002418, 'time_step': 0.005311342024467361, 'init_value': -5.320952415466309, 'ave_value': -8.010556606155372, 'soft_opc': nan} step=7810




2022-04-20 18:20.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181920/model_7810.pt


Epoch 23/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:20.06 [info     ] FQE_20220420181920: epoch=23 step=8165 epoch=23 metrics={'time_sample_batch': 0.00016970030018981073, 'time_algorithm_update': 0.004985842234651807, 'loss': 0.4331063384321374, 'time_step': 0.005232677325396471, 'init_value': -5.361494541168213, 'ave_value': -8.032074268939192, 'soft_opc': nan} step=8165




2022-04-20 18:20.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181920/model_8165.pt


Epoch 24/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:20.08 [info     ] FQE_20220420181920: epoch=24 step=8520 epoch=24 metrics={'time_sample_batch': 0.00017402272828867738, 'time_algorithm_update': 0.005030636720254388, 'loss': 0.4452139643506265, 'time_step': 0.005282332192004567, 'init_value': -5.913308143615723, 'ave_value': -8.370919366093638, 'soft_opc': nan} step=8520




2022-04-20 18:20.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181920/model_8520.pt


Epoch 25/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:20.09 [info     ] FQE_20220420181920: epoch=25 step=8875 epoch=25 metrics={'time_sample_batch': 0.00016717172004807163, 'time_algorithm_update': 0.0045372418954338824, 'loss': 0.46625613104814373, 'time_step': 0.004781326105896855, 'init_value': -6.360771656036377, 'ave_value': -8.75271083213691, 'soft_opc': nan} step=8875




2022-04-20 18:20.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181920/model_8875.pt


Epoch 26/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:20.11 [info     ] FQE_20220420181920: epoch=26 step=9230 epoch=26 metrics={'time_sample_batch': 0.00016887490178497743, 'time_algorithm_update': 0.005088183577631561, 'loss': 0.48918501657079644, 'time_step': 0.005332766788106569, 'init_value': -6.5751800537109375, 'ave_value': -8.845858215167638, 'soft_opc': nan} step=9230




2022-04-20 18:20.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181920/model_9230.pt


Epoch 27/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:20.13 [info     ] FQE_20220420181920: epoch=27 step=9585 epoch=27 metrics={'time_sample_batch': 0.00017004751823317837, 'time_algorithm_update': 0.005048082244228309, 'loss': 0.49090346782140326, 'time_step': 0.005293104010568538, 'init_value': -6.674399375915527, 'ave_value': -8.810839546196872, 'soft_opc': nan} step=9585




2022-04-20 18:20.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181920/model_9585.pt


Epoch 28/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:20.16 [info     ] FQE_20220420181920: epoch=28 step=9940 epoch=28 metrics={'time_sample_batch': 0.00017091724234567562, 'time_algorithm_update': 0.005142498687959053, 'loss': 0.5111533772882442, 'time_step': 0.005390169251133019, 'init_value': -7.033488750457764, 'ave_value': -9.224213766723755, 'soft_opc': nan} step=9940




2022-04-20 18:20.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181920/model_9940.pt


Epoch 29/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:20.18 [info     ] FQE_20220420181920: epoch=29 step=10295 epoch=29 metrics={'time_sample_batch': 0.00017145385204906196, 'time_algorithm_update': 0.005112441828553106, 'loss': 0.5199783398966554, 'time_step': 0.005363779336633816, 'init_value': -7.398341655731201, 'ave_value': -9.451857470365258, 'soft_opc': nan} step=10295




2022-04-20 18:20.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181920/model_10295.pt


Epoch 30/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:20.19 [info     ] FQE_20220420181920: epoch=30 step=10650 epoch=30 metrics={'time_sample_batch': 0.00017195016565457196, 'time_algorithm_update': 0.004905244666086116, 'loss': 0.544570275055061, 'time_step': 0.005154917273722904, 'init_value': -7.89943790435791, 'ave_value': -9.7945334980353, 'soft_opc': nan} step=10650




2022-04-20 18:20.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181920/model_10650.pt


Epoch 31/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:20.22 [info     ] FQE_20220420181920: epoch=31 step=11005 epoch=31 metrics={'time_sample_batch': 0.00017443106208049077, 'time_algorithm_update': 0.005060195251249931, 'loss': 0.5545020624837825, 'time_step': 0.0053139478387966965, 'init_value': -8.465304374694824, 'ave_value': -10.301501713754032, 'soft_opc': nan} step=11005




2022-04-20 18:20.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181920/model_11005.pt


Epoch 32/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:20.24 [info     ] FQE_20220420181920: epoch=32 step=11360 epoch=32 metrics={'time_sample_batch': 0.00017481253180705325, 'time_algorithm_update': 0.0051552517313352775, 'loss': 0.5516338442780182, 'time_step': 0.005408900892230826, 'init_value': -8.748043060302734, 'ave_value': -10.389928237219824, 'soft_opc': nan} step=11360




2022-04-20 18:20.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181920/model_11360.pt


Epoch 33/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:20.26 [info     ] FQE_20220420181920: epoch=33 step=11715 epoch=33 metrics={'time_sample_batch': 0.00017108581435512488, 'time_algorithm_update': 0.005223669132716219, 'loss': 0.5717957459704976, 'time_step': 0.005470154990612621, 'init_value': -9.012476921081543, 'ave_value': -10.562828913659931, 'soft_opc': nan} step=11715




2022-04-20 18:20.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181920/model_11715.pt


Epoch 34/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:20.28 [info     ] FQE_20220420181920: epoch=34 step=12070 epoch=34 metrics={'time_sample_batch': 0.00017299853580098756, 'time_algorithm_update': 0.0048304195135412085, 'loss': 0.5875990865830805, 'time_step': 0.005082247290812748, 'init_value': -9.742539405822754, 'ave_value': -10.841006051915837, 'soft_opc': nan} step=12070




2022-04-20 18:20.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181920/model_12070.pt


Epoch 35/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:20.30 [info     ] FQE_20220420181920: epoch=35 step=12425 epoch=35 metrics={'time_sample_batch': 0.00017617319671201035, 'time_algorithm_update': 0.005224695340008803, 'loss': 0.6031707012474956, 'time_step': 0.005478134961195395, 'init_value': -10.378228187561035, 'ave_value': -11.125180764182529, 'soft_opc': nan} step=12425




2022-04-20 18:20.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181920/model_12425.pt


Epoch 36/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:20.32 [info     ] FQE_20220420181920: epoch=36 step=12780 epoch=36 metrics={'time_sample_batch': 0.00018541510675994444, 'time_algorithm_update': 0.005404516676781883, 'loss': 0.6139674492442692, 'time_step': 0.00566913107751121, 'init_value': -10.847466468811035, 'ave_value': -11.186609827395783, 'soft_opc': nan} step=12780




2022-04-20 18:20.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181920/model_12780.pt


Epoch 37/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:20.34 [info     ] FQE_20220420181920: epoch=37 step=13135 epoch=37 metrics={'time_sample_batch': 0.0001762866973876953, 'time_algorithm_update': 0.005411604088796696, 'loss': 0.6244708884350011, 'time_step': 0.005667159255121795, 'init_value': -11.466938972473145, 'ave_value': -11.482362873166167, 'soft_opc': nan} step=13135




2022-04-20 18:20.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181920/model_13135.pt


Epoch 38/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:20.36 [info     ] FQE_20220420181920: epoch=38 step=13490 epoch=38 metrics={'time_sample_batch': 0.00017652578756842814, 'time_algorithm_update': 0.005104964215990523, 'loss': 0.6207074284868341, 'time_step': 0.005360382375582843, 'init_value': -11.921920776367188, 'ave_value': -11.700108442740321, 'soft_opc': nan} step=13490




2022-04-20 18:20.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181920/model_13490.pt


Epoch 39/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:20.38 [info     ] FQE_20220420181920: epoch=39 step=13845 epoch=39 metrics={'time_sample_batch': 0.00017353850351253027, 'time_algorithm_update': 0.005127078042903416, 'loss': 0.6309726581082378, 'time_step': 0.0053801140315096144, 'init_value': -12.382071495056152, 'ave_value': -11.806930161347108, 'soft_opc': nan} step=13845




2022-04-20 18:20.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181920/model_13845.pt


Epoch 40/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:20.40 [info     ] FQE_20220420181920: epoch=40 step=14200 epoch=40 metrics={'time_sample_batch': 0.00017236723026759187, 'time_algorithm_update': 0.005306844979944363, 'loss': 0.6405804854935744, 'time_step': 0.00556065129562163, 'init_value': -12.985295295715332, 'ave_value': -12.218460479583904, 'soft_opc': nan} step=14200




2022-04-20 18:20.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181920/model_14200.pt


Epoch 41/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:20.42 [info     ] FQE_20220420181920: epoch=41 step=14555 epoch=41 metrics={'time_sample_batch': 0.00017395892613370653, 'time_algorithm_update': 0.005216848346549021, 'loss': 0.6477136124135323, 'time_step': 0.005469585472429302, 'init_value': -13.208261489868164, 'ave_value': -12.06984664948041, 'soft_opc': nan} step=14555




2022-04-20 18:20.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181920/model_14555.pt


Epoch 42/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:20.44 [info     ] FQE_20220420181920: epoch=42 step=14910 epoch=42 metrics={'time_sample_batch': 0.00017490118322238115, 'time_algorithm_update': 0.005397401057498556, 'loss': 0.6656505404223858, 'time_step': 0.005651334977485764, 'init_value': -13.498208999633789, 'ave_value': -12.263258260944463, 'soft_opc': nan} step=14910




2022-04-20 18:20.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181920/model_14910.pt


Epoch 43/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:20.46 [info     ] FQE_20220420181920: epoch=43 step=15265 epoch=43 metrics={'time_sample_batch': 0.00017518997192382811, 'time_algorithm_update': 0.004912965398439219, 'loss': 0.6695394146085625, 'time_step': 0.0051679174664994365, 'init_value': -13.925583839416504, 'ave_value': -12.326462403760607, 'soft_opc': nan} step=15265




2022-04-20 18:20.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181920/model_15265.pt


Epoch 44/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:20.49 [info     ] FQE_20220420181920: epoch=44 step=15620 epoch=44 metrics={'time_sample_batch': 0.00017396564215001927, 'time_algorithm_update': 0.005313674496932768, 'loss': 0.6781185180473496, 'time_step': 0.005566503632236534, 'init_value': -14.455689430236816, 'ave_value': -12.58781931072838, 'soft_opc': nan} step=15620




2022-04-20 18:20.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181920/model_15620.pt


Epoch 45/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:20.51 [info     ] FQE_20220420181920: epoch=45 step=15975 epoch=45 metrics={'time_sample_batch': 0.00017314427335497358, 'time_algorithm_update': 0.005196901106498611, 'loss': 0.6803841992163322, 'time_step': 0.005447854458446234, 'init_value': -14.971197128295898, 'ave_value': -12.801981030648845, 'soft_opc': nan} step=15975




2022-04-20 18:20.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181920/model_15975.pt


Epoch 46/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:20.53 [info     ] FQE_20220420181920: epoch=46 step=16330 epoch=46 metrics={'time_sample_batch': 0.0001727648184333049, 'time_algorithm_update': 0.005122184081816338, 'loss': 0.6725258701389104, 'time_step': 0.005372748576419454, 'init_value': -14.869376182556152, 'ave_value': -12.463578003185635, 'soft_opc': nan} step=16330




2022-04-20 18:20.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181920/model_16330.pt


Epoch 47/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:20.55 [info     ] FQE_20220420181920: epoch=47 step=16685 epoch=47 metrics={'time_sample_batch': 0.00017187293146697568, 'time_algorithm_update': 0.004726995548731844, 'loss': 0.6688499220240284, 'time_step': 0.004979136292363556, 'init_value': -15.105246543884277, 'ave_value': -12.769611018568641, 'soft_opc': nan} step=16685




2022-04-20 18:20.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181920/model_16685.pt


Epoch 48/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:20.57 [info     ] FQE_20220420181920: epoch=48 step=17040 epoch=48 metrics={'time_sample_batch': 0.0001738783339379539, 'time_algorithm_update': 0.005168264684542804, 'loss': 0.6725776604521023, 'time_step': 0.005420371176491321, 'init_value': -15.534332275390625, 'ave_value': -13.156979880181826, 'soft_opc': nan} step=17040




2022-04-20 18:20.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181920/model_17040.pt


Epoch 49/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:20.59 [info     ] FQE_20220420181920: epoch=49 step=17395 epoch=49 metrics={'time_sample_batch': 0.00017853119003940636, 'time_algorithm_update': 0.005115016749207403, 'loss': 0.6859911397571715, 'time_step': 0.005373359062302281, 'init_value': -15.81586742401123, 'ave_value': -13.214160298527563, 'soft_opc': nan} step=17395




2022-04-20 18:20.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181920/model_17395.pt


Epoch 50/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:21.01 [info     ] FQE_20220420181920: epoch=50 step=17750 epoch=50 metrics={'time_sample_batch': 0.00017239275112958022, 'time_algorithm_update': 0.005184369020059075, 'loss': 0.6786186187514957, 'time_step': 0.005435002018028582, 'init_value': -16.058244705200195, 'ave_value': -13.253221078549284, 'soft_opc': nan} step=17750




2022-04-20 18:21.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181920/model_17750.pt
search iteration:  34
using hyper params:  [0.005813721273255771, 0.007458549352169142, 1.2851269021978237e-05, 3]
2022-04-20 18:21.01 [debug    ] RoundIterator is selected.
2022-04-20 18:21.01 [info     ] Directory is created at d3rlpy_logs/TD3PlusBC_20220420182101
2022-04-20 18:21.01 [debug    ] Fitting scaler...              scaler=standard
2022-04-20 18:21.01 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 18:21.01 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 18:21.01 [info     ] Parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182101/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0058137212732

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:21.05 [info     ] TD3PlusBC_20220420182101: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.0003588896745826766, 'time_algorithm_update': 0.008532837120413084, 'critic_loss': 2.4922119487098784, 'actor_loss': 2.394523651279204, 'time_step': 0.008972048062330101, 'td_error': 0.8318961720293554, 'init_value': -4.349102973937988, 'ave_value': -2.468134421609166} step=342
2022-04-20 18:21.05 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182101/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:21.08 [info     ] TD3PlusBC_20220420182101: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.0003559143222563448, 'time_algorithm_update': 0.00905608712581166, 'critic_loss': 1.2564459049213699, 'actor_loss': 2.286400387858787, 'time_step': 0.009486063182005408, 'td_error': 0.8603455854787078, 'init_value': -6.167797565460205, 'ave_value': -3.48248276762507} step=684
2022-04-20 18:21.08 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182101/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:21.12 [info     ] TD3PlusBC_20220420182101: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.0003591420357687431, 'time_algorithm_update': 0.008928653092412224, 'critic_loss': 1.7942222728889587, 'actor_loss': 2.280255448748494, 'time_step': 0.009366920816968058, 'td_error': 0.9235227772876092, 'init_value': -8.175915718078613, 'ave_value': -4.644729726896496} step=1026
2022-04-20 18:21.12 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182101/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:21.16 [info     ] TD3PlusBC_20220420182101: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.00035807960911800985, 'time_algorithm_update': 0.008700867145382173, 'critic_loss': 2.4691645023767013, 'actor_loss': 2.2820587464940476, 'time_step': 0.009138228600485283, 'td_error': 1.0284957146796891, 'init_value': -10.415285110473633, 'ave_value': -5.858778239417221} step=1368
2022-04-20 18:21.16 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182101/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:21.19 [info     ] TD3PlusBC_20220420182101: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.0003546741273668077, 'time_algorithm_update': 0.00891766283247206, 'critic_loss': 3.3077709552837393, 'actor_loss': 2.283136126590751, 'time_step': 0.009350542436566269, 'td_error': 1.1332990691664022, 'init_value': -12.601434707641602, 'ave_value': -7.107322160995163} step=1710
2022-04-20 18:21.19 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182101/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:21.23 [info     ] TD3PlusBC_20220420182101: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.0003617290864910996, 'time_algorithm_update': 0.008809379666869403, 'critic_loss': 4.277651706982774, 'actor_loss': 2.2795857630277934, 'time_step': 0.009254466023361474, 'td_error': 1.2657269903268995, 'init_value': -14.720239639282227, 'ave_value': -8.314592944756528} step=2052
2022-04-20 18:21.23 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182101/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:21.27 [info     ] TD3PlusBC_20220420182101: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.00035648039209912395, 'time_algorithm_update': 0.00912880339817694, 'critic_loss': 5.287908569762581, 'actor_loss': 2.288384030436912, 'time_step': 0.009562792833785565, 'td_error': 1.4241153027496918, 'init_value': -17.052703857421875, 'ave_value': -9.602208314127214} step=2394
2022-04-20 18:21.27 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182101/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:21.31 [info     ] TD3PlusBC_20220420182101: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.0003556103734245077, 'time_algorithm_update': 0.00908337001912078, 'critic_loss': 6.341751310560438, 'actor_loss': 2.2896563672182855, 'time_step': 0.009519011653654756, 'td_error': 1.5862361307773851, 'init_value': -18.870655059814453, 'ave_value': -10.752990436789178} step=2736
2022-04-20 18:21.31 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182101/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:21.34 [info     ] TD3PlusBC_20220420182101: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.0003559449959916678, 'time_algorithm_update': 0.007676793817888226, 'critic_loss': 7.367005512031199, 'actor_loss': 2.2881802215910794, 'time_step': 0.008112713607431155, 'td_error': 1.7648499848455481, 'init_value': -20.997337341308594, 'ave_value': -11.85335763794135} step=3078
2022-04-20 18:21.34 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182101/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:21.37 [info     ] TD3PlusBC_20220420182101: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.0003530191399200618, 'time_algorithm_update': 0.006975464653550533, 'critic_loss': 8.87796357779475, 'actor_loss': 2.292062847237838, 'time_step': 0.007406874009740283, 'td_error': 1.893787518508499, 'init_value': -23.484235763549805, 'ave_value': -13.131879509890972} step=3420
2022-04-20 18:21.37 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182101/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:21.40 [info     ] TD3PlusBC_20220420182101: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.0003552722652056064, 'time_algorithm_update': 0.007026455555742944, 'critic_loss': 10.148448138906245, 'actor_loss': 2.2873627456308108, 'time_step': 0.007458200231630203, 'td_error': 2.016244679849953, 'init_value': -25.986724853515625, 'ave_value': -14.389089749891664} step=3762
2022-04-20 18:21.40 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182101/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:21.43 [info     ] TD3PlusBC_20220420182101: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.0003558020842702765, 'time_algorithm_update': 0.00706126954820421, 'critic_loss': 11.71779275637621, 'actor_loss': 2.2940682402828285, 'time_step': 0.00748732424618905, 'td_error': 2.2692035938134096, 'init_value': -27.460216522216797, 'ave_value': -15.355873357109255} step=4104
2022-04-20 18:21.43 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182101/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:21.46 [info     ] TD3PlusBC_20220420182101: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.0003513216275220726, 'time_algorithm_update': 0.00700333383348253, 'critic_loss': 13.529177017379226, 'actor_loss': 2.294593723196732, 'time_step': 0.007427794891491271, 'td_error': 2.506265518690869, 'init_value': -29.511520385742188, 'ave_value': -16.52003057240257} step=4446
2022-04-20 18:21.46 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182101/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:21.49 [info     ] TD3PlusBC_20220420182101: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.00035886806354188083, 'time_algorithm_update': 0.007096181836044579, 'critic_loss': 15.200769127460948, 'actor_loss': 2.295559469022249, 'time_step': 0.007533003015127796, 'td_error': 2.673874732017046, 'init_value': -31.7025203704834, 'ave_value': -17.62670215381993} step=4788
2022-04-20 18:21.49 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182101/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:21.52 [info     ] TD3PlusBC_20220420182101: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.000351002341822574, 'time_algorithm_update': 0.007041459892228333, 'critic_loss': 17.20441731096011, 'actor_loss': 2.2917770377376625, 'time_step': 0.007464314761914705, 'td_error': 2.868477404087018, 'init_value': -33.18686294555664, 'ave_value': -18.336926270724938} step=5130
2022-04-20 18:21.52 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182101/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:21.55 [info     ] TD3PlusBC_20220420182101: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.0003592222057587919, 'time_algorithm_update': 0.006956422538088079, 'critic_loss': 19.133192056800887, 'actor_loss': 2.290767070145635, 'time_step': 0.0073903297123156094, 'td_error': 3.0555997069161043, 'init_value': -34.15301513671875, 'ave_value': -19.39282230514177} step=5472
2022-04-20 18:21.55 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182101/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:21.58 [info     ] TD3PlusBC_20220420182101: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.0003548846607319793, 'time_algorithm_update': 0.007158588247689587, 'critic_loss': 21.51855665061906, 'actor_loss': 2.2988202265131545, 'time_step': 0.007592367847063388, 'td_error': 3.2166109315780282, 'init_value': -36.1723518371582, 'ave_value': -20.38544471703688} step=5814
2022-04-20 18:21.58 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182101/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:22.01 [info     ] TD3PlusBC_20220420182101: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.0003545890774643212, 'time_algorithm_update': 0.007078727783515439, 'critic_loss': 23.952216954259146, 'actor_loss': 2.293002976311578, 'time_step': 0.0075050571508574904, 'td_error': 3.541376202324255, 'init_value': -38.576515197753906, 'ave_value': -21.507423884769448} step=6156
2022-04-20 18:22.01 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182101/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:22.04 [info     ] TD3PlusBC_20220420182101: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.0003592305713229709, 'time_algorithm_update': 0.007016485894632618, 'critic_loss': 26.48905046362626, 'actor_loss': 2.2930313062946697, 'time_step': 0.007447005712498001, 'td_error': 3.8957533725335716, 'init_value': -40.73988723754883, 'ave_value': -22.602715887503347} step=6498
2022-04-20 18:22.04 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182101/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:22.07 [info     ] TD3PlusBC_20220420182101: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.0003887937780012164, 'time_algorithm_update': 0.008495911520126968, 'critic_loss': 28.991697411788138, 'actor_loss': 2.2919975330955102, 'time_step': 0.009001571532578497, 'td_error': 3.9837311927080794, 'init_value': -41.398616790771484, 'ave_value': -23.307776227135815} step=6840
2022-04-20 18:22.07 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182101/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:22.10 [info     ] TD3PlusBC_20220420182101: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.00035482540465237803, 'time_algorithm_update': 0.006964819473132752, 'critic_loss': 31.637087492914926, 'actor_loss': 2.2933411263583, 'time_step': 0.00739590326944987, 'td_error': 4.225275580589573, 'init_value': -43.5047607421875, 'ave_value': -24.249745178038488} step=7182
2022-04-20 18:22.10 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182101/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:22.13 [info     ] TD3PlusBC_20220420182101: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.0003529563981887193, 'time_algorithm_update': 0.007048951952080978, 'critic_loss': 34.26530666239778, 'actor_loss': 2.2990134891710783, 'time_step': 0.007479270299275716, 'td_error': 4.31577009472412, 'init_value': -43.965660095214844, 'ave_value': -24.872913507417532} step=7524
2022-04-20 18:22.13 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182101/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:22.16 [info     ] TD3PlusBC_20220420182101: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.0003523143411379809, 'time_algorithm_update': 0.006778172582213642, 'critic_loss': 36.93998243655378, 'actor_loss': 2.293438389984488, 'time_step': 0.007204600244934796, 'td_error': 4.574869173632003, 'init_value': -45.56977462768555, 'ave_value': -25.769253271261547} step=7866
2022-04-20 18:22.16 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182101/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:22.19 [info     ] TD3PlusBC_20220420182101: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.00035472362362153347, 'time_algorithm_update': 0.006997735179655734, 'critic_loss': 39.465836432942176, 'actor_loss': 2.2956389739499454, 'time_step': 0.007431733677958885, 'td_error': 4.913097400360052, 'init_value': -47.49355697631836, 'ave_value': -26.713296999641074} step=8208
2022-04-20 18:22.19 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182101/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:22.22 [info     ] TD3PlusBC_20220420182101: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.0003562447620414154, 'time_algorithm_update': 0.007128070669564587, 'critic_loss': 42.17788420783149, 'actor_loss': 2.2951956046255013, 'time_step': 0.007561059723123473, 'td_error': 4.969461997236967, 'init_value': -48.613197326660156, 'ave_value': -27.44666086725765} step=8550
2022-04-20 18:22.22 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182101/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:22.25 [info     ] TD3PlusBC_20220420182101: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.0003550345437568531, 'time_algorithm_update': 0.007068408162970292, 'critic_loss': 44.596163242183934, 'actor_loss': 2.2967175754190188, 'time_step': 0.007499543546933179, 'td_error': 5.1494536651169, 'init_value': -49.4990234375, 'ave_value': -28.21298562918429} step=8892
2022-04-20 18:22.25 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182101/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:22.28 [info     ] TD3PlusBC_20220420182101: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.00035212960159569455, 'time_algorithm_update': 0.007017834144726134, 'critic_loss': 47.122849525763975, 'actor_loss': 2.295190307829115, 'time_step': 0.007443576528314958, 'td_error': 5.432359462105358, 'init_value': -50.844329833984375, 'ave_value': -29.028986237765633} step=9234
2022-04-20 18:22.28 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182101/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:22.31 [info     ] TD3PlusBC_20220420182101: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.0003622003466065167, 'time_algorithm_update': 0.007053017616271973, 'critic_loss': 49.647258000067104, 'actor_loss': 2.2989673321707205, 'time_step': 0.007492938934013858, 'td_error': 5.673192448116662, 'init_value': -52.73118209838867, 'ave_value': -29.816126005169053} step=9576
2022-04-20 18:22.31 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182101/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:22.34 [info     ] TD3PlusBC_20220420182101: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.00036137424714384025, 'time_algorithm_update': 0.007120794022989552, 'critic_loss': 52.52868230719315, 'actor_loss': 2.296232283463952, 'time_step': 0.007561156624241879, 'td_error': 5.613281705754677, 'init_value': -52.584556579589844, 'ave_value': -30.31947313977162} step=9918
2022-04-20 18:22.34 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182101/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:22.37 [info     ] TD3PlusBC_20220420182101: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.000359520577547843, 'time_algorithm_update': 0.007155495080334401, 'critic_loss': 54.93730832819353, 'actor_loss': 2.290385309018587, 'time_step': 0.007596854577984726, 'td_error': 5.960335361893611, 'init_value': -54.359619140625, 'ave_value': -31.1753626471356} step=10260
2022-04-20 18:22.37 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182101/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:22.40 [info     ] TD3PlusBC_20220420182101: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.0003521498183757938, 'time_algorithm_update': 0.007040980963679085, 'critic_loss': 57.381670332791515, 'actor_loss': 2.2962290013742725, 'time_step': 0.007473102787084747, 'td_error': 6.067669472861553, 'init_value': -54.6989860534668, 'ave_value': -31.68503402309526} step=10602
2022-04-20 18:22.40 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182101/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:22.43 [info     ] TD3PlusBC_20220420182101: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.0003551739698265031, 'time_algorithm_update': 0.007060457391348499, 'critic_loss': 59.989685694376625, 'actor_loss': 2.2934742001762167, 'time_step': 0.007494603681285479, 'td_error': 6.304887857723175, 'init_value': -56.109901428222656, 'ave_value': -32.52313146343933} step=10944
2022-04-20 18:22.43 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182101/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:22.46 [info     ] TD3PlusBC_20220420182101: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.00035328962649518284, 'time_algorithm_update': 0.007106661099439476, 'critic_loss': 62.30527443913689, 'actor_loss': 2.2941971592038697, 'time_step': 0.007538161082574498, 'td_error': 6.504917925644811, 'init_value': -58.12696075439453, 'ave_value': -33.28118804335367} step=11286
2022-04-20 18:22.46 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182101/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:22.49 [info     ] TD3PlusBC_20220420182101: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.0003565570764374315, 'time_algorithm_update': 0.007037481369330869, 'critic_loss': 64.76092263829638, 'actor_loss': 2.298814815387391, 'time_step': 0.007472550659848933, 'td_error': 6.461651560712722, 'init_value': -57.13848114013672, 'ave_value': -33.57692025009165} step=11628
2022-04-20 18:22.49 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182101/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:22.52 [info     ] TD3PlusBC_20220420182101: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.00035826853144238567, 'time_algorithm_update': 0.0071049761353877555, 'critic_loss': 66.99438956188179, 'actor_loss': 2.297002347589236, 'time_step': 0.007544141066701789, 'td_error': 6.5429344974313945, 'init_value': -57.796958923339844, 'ave_value': -34.19935396875389} step=11970
2022-04-20 18:22.52 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182101/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:22.55 [info     ] TD3PlusBC_20220420182101: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.00035841562594586643, 'time_algorithm_update': 0.007176490555032652, 'critic_loss': 69.3325430329083, 'actor_loss': 2.2939432015893053, 'time_step': 0.007616176242716828, 'td_error': 6.860427541832517, 'init_value': -59.648231506347656, 'ave_value': -35.010362853341746} step=12312
2022-04-20 18:22.55 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182101/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:22.58 [info     ] TD3PlusBC_20220420182101: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.00035240217956186037, 'time_algorithm_update': 0.007138749312239083, 'critic_loss': 71.64037147878904, 'actor_loss': 2.2984059233414498, 'time_step': 0.007570869741384049, 'td_error': 6.713335794777817, 'init_value': -58.63532638549805, 'ave_value': -35.28493689541218} step=12654
2022-04-20 18:22.58 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182101/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:23.01 [info     ] TD3PlusBC_20220420182101: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.0003575428187498572, 'time_algorithm_update': 0.00710130086419178, 'critic_loss': 73.85938143590738, 'actor_loss': 2.2982315333963137, 'time_step': 0.007543633555808262, 'td_error': 7.133466076988096, 'init_value': -61.24065399169922, 'ave_value': -36.13913154822061} step=12996
2022-04-20 18:23.01 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182101/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:23.04 [info     ] TD3PlusBC_20220420182101: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.0003534206870006539, 'time_algorithm_update': 0.007085563146580032, 'critic_loss': 76.0024277982656, 'actor_loss': 2.2965990618655554, 'time_step': 0.007519946460835418, 'td_error': 7.017346961972065, 'init_value': -60.156471252441406, 'ave_value': -36.369904429829674} step=13338
2022-04-20 18:23.04 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182101/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:23.07 [info     ] TD3PlusBC_20220420182101: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.00035748356267025595, 'time_algorithm_update': 0.007021480136447483, 'critic_loss': 78.23981251632958, 'actor_loss': 2.2992704277150113, 'time_step': 0.0074564804110610694, 'td_error': 7.348011859572467, 'init_value': -61.76446533203125, 'ave_value': -36.93403550266775} step=13680
2022-04-20 18:23.07 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182101/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:23.10 [info     ] TD3PlusBC_20220420182101: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.00035281209220663147, 'time_algorithm_update': 0.007066761541087725, 'critic_loss': 80.01924695466694, 'actor_loss': 2.300248624288548, 'time_step': 0.007502003719932155, 'td_error': 7.325168825574773, 'init_value': -61.597877502441406, 'ave_value': -37.48517232880701} step=14022
2022-04-20 18:23.10 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182101/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:23.13 [info     ] TD3PlusBC_20220420182101: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.0003531188295598616, 'time_algorithm_update': 0.007155120024207043, 'critic_loss': 82.02447278876053, 'actor_loss': 2.296015775691696, 'time_step': 0.007584843022084376, 'td_error': 7.61890834707713, 'init_value': -63.57080078125, 'ave_value': -38.173646791261866} step=14364
2022-04-20 18:23.13 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182101/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:23.16 [info     ] TD3PlusBC_20220420182101: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.0003603627110085292, 'time_algorithm_update': 0.007066801277517575, 'critic_loss': 83.9874905368738, 'actor_loss': 2.3011191309544077, 'time_step': 0.007510689266941003, 'td_error': 7.386421547439322, 'init_value': -62.57997512817383, 'ave_value': -38.43088551373531} step=14706
2022-04-20 18:23.16 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182101/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:23.19 [info     ] TD3PlusBC_20220420182101: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.0003582176409269634, 'time_algorithm_update': 0.007037364251432363, 'critic_loss': 85.86777631302326, 'actor_loss': 2.298816184551395, 'time_step': 0.0074753057189852175, 'td_error': 7.701985970402002, 'init_value': -63.51141357421875, 'ave_value': -38.962207720223226} step=15048
2022-04-20 18:23.19 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182101/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:23.22 [info     ] TD3PlusBC_20220420182101: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.0003558836485210218, 'time_algorithm_update': 0.0071947428218105385, 'critic_loss': 87.77680063526533, 'actor_loss': 2.3007046869623733, 'time_step': 0.007631260052061917, 'td_error': 7.940794270105112, 'init_value': -65.65695190429688, 'ave_value': -39.61599459129017} step=15390
2022-04-20 18:23.22 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182101/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:23.25 [info     ] TD3PlusBC_20220420182101: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.00035791578348617106, 'time_algorithm_update': 0.007155406544780174, 'critic_loss': 89.56191768980862, 'actor_loss': 2.2997137105952925, 'time_step': 0.007592730354844478, 'td_error': 8.230121268621156, 'init_value': -65.4051284790039, 'ave_value': -40.02283403084118} step=15732
2022-04-20 18:23.25 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182101/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:23.28 [info     ] TD3PlusBC_20220420182101: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.00035961747866624977, 'time_algorithm_update': 0.007164403011924342, 'critic_loss': 91.60623496875428, 'actor_loss': 2.304599819127579, 'time_step': 0.00760566979123835, 'td_error': 8.429509795049523, 'init_value': -66.00373840332031, 'ave_value': -40.389557196635685} step=16074
2022-04-20 18:23.28 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182101/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:23.31 [info     ] TD3PlusBC_20220420182101: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.0003555671513429162, 'time_algorithm_update': 0.007072038817823979, 'critic_loss': 93.21060450592934, 'actor_loss': 2.3032484668040136, 'time_step': 0.00750533251734505, 'td_error': 8.539268580417648, 'init_value': -66.45716094970703, 'ave_value': -40.87687920812987} step=16416
2022-04-20 18:23.31 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182101/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:23.34 [info     ] TD3PlusBC_20220420182101: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.0003534750631678174, 'time_algorithm_update': 0.007125014450117859, 'critic_loss': 94.90547841613056, 'actor_loss': 2.305763067557798, 'time_step': 0.007557474381742421, 'td_error': 8.481763898871282, 'init_value': -65.38212585449219, 'ave_value': -40.95858997941495} step=16758
2022-04-20 18:23.34 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182101/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:23.37 [info     ] TD3PlusBC_20220420182101: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.00036007549330505014, 'time_algorithm_update': 0.0070909477813899165, 'critic_loss': 96.4079315341704, 'actor_loss': 2.3030184123948305, 'time_step': 0.007530270264162655, 'td_error': 8.6188310381113, 'init_value': -67.22823333740234, 'ave_value': -41.59826601470829} step=17100
2022-04-20 18:23.37 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182101/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 102 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.28189108e-01  9.

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 18:23.38 [info     ] FQE_20220420182337: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.0001680405743150826, 'time_algorithm_update': 0.0036609158458479917, 'loss': 0.007738677558982857, 'time_step': 0.0039037178797894216, 'init_value': -0.23840196430683136, 'ave_value': -0.1746037125721708, 'soft_opc': nan} step=166




2022-04-20 18:23.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182337/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:23.38 [info     ] FQE_20220420182337: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.0001690761152520237, 'time_algorithm_update': 0.003591623650975974, 'loss': 0.005407242620565805, 'time_step': 0.003835369305438306, 'init_value': -0.3498222231864929, 'ave_value': -0.2303500369627465, 'soft_opc': nan} step=332




2022-04-20 18:23.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182337/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:23.39 [info     ] FQE_20220420182337: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.0001735198928649167, 'time_algorithm_update': 0.0037973429783281073, 'loss': 0.00498252121047173, 'time_step': 0.004048301512936512, 'init_value': -0.3726953864097595, 'ave_value': -0.22988743684170684, 'soft_opc': nan} step=498




2022-04-20 18:23.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182337/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:23.40 [info     ] FQE_20220420182337: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00017352563789091915, 'time_algorithm_update': 0.0037739578499851457, 'loss': 0.004978574381815831, 'time_step': 0.004017368856682835, 'init_value': -0.4514193534851074, 'ave_value': -0.2808527829662502, 'soft_opc': nan} step=664




2022-04-20 18:23.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182337/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:23.41 [info     ] FQE_20220420182337: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00017660209931522967, 'time_algorithm_update': 0.0053723878171070515, 'loss': 0.004566998703472586, 'time_step': 0.005627617778548275, 'init_value': -0.44392818212509155, 'ave_value': -0.270686222846947, 'soft_opc': nan} step=830




2022-04-20 18:23.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182337/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:23.42 [info     ] FQE_20220420182337: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00017472778458193125, 'time_algorithm_update': 0.005175876330180341, 'loss': 0.004195370150348508, 'time_step': 0.005425171679761036, 'init_value': -0.4705903232097626, 'ave_value': -0.2931252325792705, 'soft_opc': nan} step=996




2022-04-20 18:23.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182337/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:23.43 [info     ] FQE_20220420182337: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.0001744936747723315, 'time_algorithm_update': 0.005271588463381112, 'loss': 0.004243424893020237, 'time_step': 0.005519489207899714, 'init_value': -0.48462367057800293, 'ave_value': -0.29536617487199135, 'soft_opc': nan} step=1162




2022-04-20 18:23.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182337/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:23.44 [info     ] FQE_20220420182337: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.0001818315092339573, 'time_algorithm_update': 0.005307810852326542, 'loss': 0.004006093618297298, 'time_step': 0.005566278135920146, 'init_value': -0.5722657442092896, 'ave_value': -0.3680416524880104, 'soft_opc': nan} step=1328




2022-04-20 18:23.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182337/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:23.45 [info     ] FQE_20220420182337: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00017882255186517555, 'time_algorithm_update': 0.005225804914911109, 'loss': 0.004320660293949431, 'time_step': 0.005480858216802758, 'init_value': -0.5591197609901428, 'ave_value': -0.3475231435686887, 'soft_opc': nan} step=1494




2022-04-20 18:23.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182337/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:23.46 [info     ] FQE_20220420182337: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00017522903810064476, 'time_algorithm_update': 0.005180456552160792, 'loss': 0.004353170073450913, 'time_step': 0.005434268928435911, 'init_value': -0.6062970161437988, 'ave_value': -0.38016019484850466, 'soft_opc': nan} step=1660




2022-04-20 18:23.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182337/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:23.47 [info     ] FQE_20220420182337: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00017716080309396767, 'time_algorithm_update': 0.005247904593686023, 'loss': 0.004308352168335254, 'time_step': 0.0054989636662494705, 'init_value': -0.6545048952102661, 'ave_value': -0.4111652570222882, 'soft_opc': nan} step=1826




2022-04-20 18:23.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182337/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:23.48 [info     ] FQE_20220420182337: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.0001747364021209349, 'time_algorithm_update': 0.00516188288309488, 'loss': 0.004401780688764909, 'time_step': 0.005411112164876547, 'init_value': -0.6830488443374634, 'ave_value': -0.41664228921353413, 'soft_opc': nan} step=1992




2022-04-20 18:23.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182337/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:23.49 [info     ] FQE_20220420182337: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00017740353044257107, 'time_algorithm_update': 0.004800879811666098, 'loss': 0.005173423694283982, 'time_step': 0.005057087863784239, 'init_value': -0.7474908828735352, 'ave_value': -0.45859781805811833, 'soft_opc': nan} step=2158




2022-04-20 18:23.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182337/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:23.50 [info     ] FQE_20220420182337: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.0001769209482583655, 'time_algorithm_update': 0.005076113953647843, 'loss': 0.00583115181428414, 'time_step': 0.005331846604864281, 'init_value': -0.8493474721908569, 'ave_value': -0.5193361627890284, 'soft_opc': nan} step=2324




2022-04-20 18:23.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182337/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:23.51 [info     ] FQE_20220420182337: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00017866169113710703, 'time_algorithm_update': 0.00527853132730507, 'loss': 0.006276460551005979, 'time_step': 0.005533902041883354, 'init_value': -0.8974440097808838, 'ave_value': -0.5528574800068462, 'soft_opc': nan} step=2490




2022-04-20 18:23.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182337/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:23.52 [info     ] FQE_20220420182337: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00017426674624523484, 'time_algorithm_update': 0.005222642278096762, 'loss': 0.006711652700884649, 'time_step': 0.0054788144238023875, 'init_value': -0.8890131711959839, 'ave_value': -0.528569844638224, 'soft_opc': nan} step=2656




2022-04-20 18:23.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182337/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:23.53 [info     ] FQE_20220420182337: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00017419924218970608, 'time_algorithm_update': 0.005207939320299999, 'loss': 0.00724081135234313, 'time_step': 0.005458807370748864, 'init_value': -0.9360363483428955, 'ave_value': -0.5452968659789746, 'soft_opc': nan} step=2822




2022-04-20 18:23.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182337/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:23.54 [info     ] FQE_20220420182337: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00018205700150455338, 'time_algorithm_update': 0.005208170557596597, 'loss': 0.007721993835861738, 'time_step': 0.005465391170547669, 'init_value': -1.0105282068252563, 'ave_value': -0.5947984429487438, 'soft_opc': nan} step=2988




2022-04-20 18:23.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182337/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:23.55 [info     ] FQE_20220420182337: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00017469187816941594, 'time_algorithm_update': 0.0052148362240159365, 'loss': 0.008318620423943433, 'time_step': 0.005463223859488246, 'init_value': -1.0569639205932617, 'ave_value': -0.6161808099501204, 'soft_opc': nan} step=3154




2022-04-20 18:23.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182337/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:23.56 [info     ] FQE_20220420182337: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00018256112753626812, 'time_algorithm_update': 0.005230044744100915, 'loss': 0.009219286852538675, 'time_step': 0.005490903394768037, 'init_value': -1.1007275581359863, 'ave_value': -0.6225037013574898, 'soft_opc': nan} step=3320




2022-04-20 18:23.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182337/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:23.57 [info     ] FQE_20220420182337: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.0001786128584160862, 'time_algorithm_update': 0.00513716634497585, 'loss': 0.009961916743188602, 'time_step': 0.0053941643381693275, 'init_value': -1.1631286144256592, 'ave_value': -0.6642363315519483, 'soft_opc': nan} step=3486




2022-04-20 18:23.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182337/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:23.58 [info     ] FQE_20220420182337: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.000174576977649367, 'time_algorithm_update': 0.00508427619934082, 'loss': 0.010181171813957304, 'time_step': 0.005334069929927228, 'init_value': -1.25853431224823, 'ave_value': -0.7315111046271012, 'soft_opc': nan} step=3652




2022-04-20 18:23.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182337/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:23.59 [info     ] FQE_20220420182337: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00017772956066820994, 'time_algorithm_update': 0.004766020430139749, 'loss': 0.01179903207405706, 'time_step': 0.005025179989366646, 'init_value': -1.4307079315185547, 'ave_value': -0.8361733933335992, 'soft_opc': nan} step=3818




2022-04-20 18:23.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182337/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:24.00 [info     ] FQE_20220420182337: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00018186023436396955, 'time_algorithm_update': 0.005216778042804764, 'loss': 0.01222102692019733, 'time_step': 0.005476718925567995, 'init_value': -1.383921504020691, 'ave_value': -0.7993562912392254, 'soft_opc': nan} step=3984




2022-04-20 18:24.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182337/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:24.01 [info     ] FQE_20220420182337: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.0001759227499904403, 'time_algorithm_update': 0.005283005266304475, 'loss': 0.013509730619086648, 'time_step': 0.005535151585038886, 'init_value': -1.5541843175888062, 'ave_value': -0.9066956420110219, 'soft_opc': nan} step=4150




2022-04-20 18:24.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182337/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:24.02 [info     ] FQE_20220420182337: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00017678594014730798, 'time_algorithm_update': 0.005211759762591626, 'loss': 0.01458891423584624, 'time_step': 0.005468112876616329, 'init_value': -1.60837984085083, 'ave_value': -0.9361654352556142, 'soft_opc': nan} step=4316




2022-04-20 18:24.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182337/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:24.03 [info     ] FQE_20220420182337: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.000177178038171975, 'time_algorithm_update': 0.005229098251067012, 'loss': 0.015579963194806668, 'time_step': 0.005488048116844821, 'init_value': -1.734857439994812, 'ave_value': -1.0300722589036702, 'soft_opc': nan} step=4482




2022-04-20 18:24.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182337/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:24.04 [info     ] FQE_20220420182337: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00017782435359725034, 'time_algorithm_update': 0.005325218281113958, 'loss': 0.01663805941310543, 'time_step': 0.005588288766792022, 'init_value': -1.7800219058990479, 'ave_value': -1.0415890611724945, 'soft_opc': nan} step=4648




2022-04-20 18:24.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182337/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:24.05 [info     ] FQE_20220420182337: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.0001781101686408721, 'time_algorithm_update': 0.005304292023900044, 'loss': 0.017561511503369827, 'time_step': 0.005560942443020372, 'init_value': -1.8623524904251099, 'ave_value': -1.0813576150517743, 'soft_opc': nan} step=4814




2022-04-20 18:24.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182337/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:24.06 [info     ] FQE_20220420182337: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.0001774825245501047, 'time_algorithm_update': 0.005205417253884925, 'loss': 0.01823546350035951, 'time_step': 0.005457642566726868, 'init_value': -1.9685866832733154, 'ave_value': -1.1567471494126293, 'soft_opc': nan} step=4980




2022-04-20 18:24.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182337/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:24.07 [info     ] FQE_20220420182337: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00017438021050878317, 'time_algorithm_update': 0.005178181521863823, 'loss': 0.01937541418992185, 'time_step': 0.005429764828049993, 'init_value': -2.0314865112304688, 'ave_value': -1.1729029313250035, 'soft_opc': nan} step=5146




2022-04-20 18:24.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182337/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:24.07 [info     ] FQE_20220420182337: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.0001751945679446301, 'time_algorithm_update': 0.004345577883433147, 'loss': 0.0203481730514105, 'time_step': 0.004597342157938394, 'init_value': -2.0894808769226074, 'ave_value': -1.2063756195626951, 'soft_opc': nan} step=5312




2022-04-20 18:24.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182337/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:24.08 [info     ] FQE_20220420182337: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.000175694385206843, 'time_algorithm_update': 0.005229310817029102, 'loss': 0.020498517672082477, 'time_step': 0.005480918539575784, 'init_value': -2.104952812194824, 'ave_value': -1.2136451400774488, 'soft_opc': nan} step=5478




2022-04-20 18:24.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182337/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:24.09 [info     ] FQE_20220420182337: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00017919741481183524, 'time_algorithm_update': 0.005293478448706937, 'loss': 0.02229515904159543, 'time_step': 0.0055512951081057626, 'init_value': -2.2614173889160156, 'ave_value': -1.3132173290341957, 'soft_opc': nan} step=5644




2022-04-20 18:24.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182337/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:24.10 [info     ] FQE_20220420182337: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00017271415296807346, 'time_algorithm_update': 0.005145516740270408, 'loss': 0.022453978106502938, 'time_step': 0.0053970957376870765, 'init_value': -2.325951337814331, 'ave_value': -1.3420841334815565, 'soft_opc': nan} step=5810




2022-04-20 18:24.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182337/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:24.11 [info     ] FQE_20220420182337: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00017489439033600222, 'time_algorithm_update': 0.00522470330617514, 'loss': 0.023463105466581476, 'time_step': 0.005477030593228628, 'init_value': -2.4249930381774902, 'ave_value': -1.4353337322999429, 'soft_opc': nan} step=5976




2022-04-20 18:24.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182337/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:24.12 [info     ] FQE_20220420182337: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00017709473529493953, 'time_algorithm_update': 0.005232584045593997, 'loss': 0.024940295066370303, 'time_step': 0.00548778959067471, 'init_value': -2.5434961318969727, 'ave_value': -1.5094344994443394, 'soft_opc': nan} step=6142




2022-04-20 18:24.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182337/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:24.13 [info     ] FQE_20220420182337: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00018016976046274943, 'time_algorithm_update': 0.005116382277155497, 'loss': 0.026334044294213002, 'time_step': 0.005374071109725769, 'init_value': -2.623178005218506, 'ave_value': -1.521082884576675, 'soft_opc': nan} step=6308




2022-04-20 18:24.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182337/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:24.14 [info     ] FQE_20220420182337: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.0001763134117586067, 'time_algorithm_update': 0.005157778062016131, 'loss': 0.0272772213477776, 'time_step': 0.005413292402244476, 'init_value': -2.708127975463867, 'ave_value': -1.5994816013888733, 'soft_opc': nan} step=6474




2022-04-20 18:24.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182337/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:24.15 [info     ] FQE_20220420182337: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.0001732527491558029, 'time_algorithm_update': 0.005250883389668292, 'loss': 0.02797630725846435, 'time_step': 0.0055013780134269994, 'init_value': -2.7561545372009277, 'ave_value': -1.6179850150252113, 'soft_opc': nan} step=6640




2022-04-20 18:24.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182337/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:24.16 [info     ] FQE_20220420182337: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00017575183546686746, 'time_algorithm_update': 0.004429990986743605, 'loss': 0.02877555236343788, 'time_step': 0.004680370709982263, 'init_value': -2.754098892211914, 'ave_value': -1.6254308959473394, 'soft_opc': nan} step=6806




2022-04-20 18:24.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182337/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:24.17 [info     ] FQE_20220420182337: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.0001759371125554464, 'time_algorithm_update': 0.0052886554993778825, 'loss': 0.030298090420652134, 'time_step': 0.005538844200501959, 'init_value': -2.825549364089966, 'ave_value': -1.6419089880125883, 'soft_opc': nan} step=6972




2022-04-20 18:24.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182337/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:24.18 [info     ] FQE_20220420182337: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.0001823399440351739, 'time_algorithm_update': 0.005264802151415722, 'loss': 0.03195566214980131, 'time_step': 0.005524964217680046, 'init_value': -2.8592638969421387, 'ave_value': -1.6495417464692312, 'soft_opc': nan} step=7138




2022-04-20 18:24.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182337/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:24.19 [info     ] FQE_20220420182337: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00017815325633589043, 'time_algorithm_update': 0.005327636937060988, 'loss': 0.03249574978330947, 'time_step': 0.0055840159037027015, 'init_value': -3.0229086875915527, 'ave_value': -1.8340016611033698, 'soft_opc': nan} step=7304




2022-04-20 18:24.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182337/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:24.20 [info     ] FQE_20220420182337: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00017480246991996305, 'time_algorithm_update': 0.005198643868228039, 'loss': 0.032679886324331164, 'time_step': 0.0054524189018341435, 'init_value': -2.9905426502227783, 'ave_value': -1.7814839179208082, 'soft_opc': nan} step=7470




2022-04-20 18:24.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182337/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:24.21 [info     ] FQE_20220420182337: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00017643980233066054, 'time_algorithm_update': 0.005263710596475257, 'loss': 0.03402827225950917, 'time_step': 0.005516287792159851, 'init_value': -2.9886741638183594, 'ave_value': -1.7269924704170039, 'soft_opc': nan} step=7636




2022-04-20 18:24.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182337/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:24.22 [info     ] FQE_20220420182337: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.0001771636756069689, 'time_algorithm_update': 0.005172496818634401, 'loss': 0.03529823310561029, 'time_step': 0.005427415112414992, 'init_value': -3.1537132263183594, 'ave_value': -1.8722757507950436, 'soft_opc': nan} step=7802




2022-04-20 18:24.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182337/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:24.23 [info     ] FQE_20220420182337: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00017719240073698112, 'time_algorithm_update': 0.00518904536603445, 'loss': 0.037169872305541106, 'time_step': 0.00544435863035271, 'init_value': -3.2699966430664062, 'ave_value': -1.9516350366607276, 'soft_opc': nan} step=7968




2022-04-20 18:24.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182337/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:24.24 [info     ] FQE_20220420182337: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00017719527324998235, 'time_algorithm_update': 0.005267319909061294, 'loss': 0.03784774378480681, 'time_step': 0.005521260112164968, 'init_value': -3.330658435821533, 'ave_value': -1.9674526578050342, 'soft_opc': nan} step=8134




2022-04-20 18:24.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182337/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:24.25 [info     ] FQE_20220420182337: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.0001755378332482763, 'time_algorithm_update': 0.0048327718872621835, 'loss': 0.04030659770682813, 'time_step': 0.005084661116082984, 'init_value': -3.3752098083496094, 'ave_value': -2.0039442577986635, 'soft_opc': nan} step=8300




2022-04-20 18:24.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182337/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.43210892e-01 -3.25999953e-02
 -1.38000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.28049971e-01  6.00000000e-01  4.67532035e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.87189108e-01  2.46000047e-02
 -8.00013420e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.61927906e-02  2.08952959e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.53789108e-01  1.32000047e-02
  8.79998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -9.71586383e-02 -6.00000000e-01 -5.33093061e-02]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.39489108e-01 -4.75999953e-02
 -9.30001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  1.41298638e-01  5.43892365e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.9573146

2022-04-20 18:24.26 [info     ] Directory is created at d3rlpy_logs/FQE_20220420182426
2022-04-20 18:24.26 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 18:24.26 [debug    ] Building models...
2022-04-20 18:24.26 [debug    ] Models have been built.
2022-04-20 18:24.26 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420182426/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 18:24.28 [info     ] FQE_20220420182426: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00017829412637754928, 'time_algorithm_update': 0.005259757125100424, 'loss': 0.028499576292910375, 'time_step': 0.0055169102757476095, 'init_value': -1.156045913696289, 'ave_value': -1.1179846318858164, 'soft_opc': nan} step=344




2022-04-20 18:24.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182426/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:24.30 [info     ] FQE_20220420182426: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.0001783433348633522, 'time_algorithm_update': 0.005226540704106175, 'loss': 0.023914976245831956, 'time_step': 0.005486219428306402, 'init_value': -1.9040565490722656, 'ave_value': -1.8376600473213034, 'soft_opc': nan} step=688




2022-04-20 18:24.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182426/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:24.32 [info     ] FQE_20220420182426: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00017473656077717626, 'time_algorithm_update': 0.005124261905980664, 'loss': 0.02637867050429503, 'time_step': 0.00537723510764366, 'init_value': -2.8625519275665283, 'ave_value': -2.7294246150113577, 'soft_opc': nan} step=1032




2022-04-20 18:24.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182426/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:24.34 [info     ] FQE_20220420182426: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.000181090000063874, 'time_algorithm_update': 0.005135394806085631, 'loss': 0.030161749049046532, 'time_step': 0.005396438199420308, 'init_value': -3.5817360877990723, 'ave_value': -3.4115628360586054, 'soft_opc': nan} step=1376




2022-04-20 18:24.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182426/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:24.36 [info     ] FQE_20220420182426: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00017436368520869764, 'time_algorithm_update': 0.004824546187422996, 'loss': 0.03963880632397567, 'time_step': 0.0050796048585758655, 'init_value': -4.447214603424072, 'ave_value': -4.254008567573184, 'soft_opc': nan} step=1720




2022-04-20 18:24.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182426/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:24.38 [info     ] FQE_20220420182426: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00018101237541021301, 'time_algorithm_update': 0.0052227072937544, 'loss': 0.05136300941799269, 'time_step': 0.005483589200086372, 'init_value': -5.157138824462891, 'ave_value': -4.905350313500107, 'soft_opc': nan} step=2064




2022-04-20 18:24.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182426/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:24.40 [info     ] FQE_20220420182426: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00017956315084945325, 'time_algorithm_update': 0.00523478485817133, 'loss': 0.06610163182648289, 'time_step': 0.00549395902212276, 'init_value': -5.963363170623779, 'ave_value': -5.678078507637052, 'soft_opc': nan} step=2408




2022-04-20 18:24.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182426/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:24.42 [info     ] FQE_20220420182426: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00018021672271018805, 'time_algorithm_update': 0.005151423603989357, 'loss': 0.08545757615722196, 'time_step': 0.005412663831267246, 'init_value': -6.8154520988464355, 'ave_value': -6.4780906811356544, 'soft_opc': nan} step=2752




2022-04-20 18:24.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182426/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:24.44 [info     ] FQE_20220420182426: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00017380229262418525, 'time_algorithm_update': 0.004887592653895534, 'loss': 0.10354772523901057, 'time_step': 0.005139440298080444, 'init_value': -7.6155548095703125, 'ave_value': -7.2702804073834, 'soft_opc': nan} step=3096




2022-04-20 18:24.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182426/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:24.46 [info     ] FQE_20220420182426: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00017800026161726132, 'time_algorithm_update': 0.005208376535149508, 'loss': 0.13131848105424365, 'time_step': 0.0054622327172478965, 'init_value': -8.402254104614258, 'ave_value': -8.021133695966522, 'soft_opc': nan} step=3440




2022-04-20 18:24.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182426/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:24.48 [info     ] FQE_20220420182426: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00018430795780448027, 'time_algorithm_update': 0.005242755246716876, 'loss': 0.1587002485622327, 'time_step': 0.00551001554311708, 'init_value': -8.95685863494873, 'ave_value': -8.491459180681069, 'soft_opc': nan} step=3784




2022-04-20 18:24.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182426/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:24.50 [info     ] FQE_20220420182426: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00018180525580117869, 'time_algorithm_update': 0.0051993734614793645, 'loss': 0.1900106731119977, 'time_step': 0.005461542412292126, 'init_value': -9.869176864624023, 'ave_value': -9.345625067583645, 'soft_opc': nan} step=4128




2022-04-20 18:24.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182426/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:24.52 [info     ] FQE_20220420182426: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00017682133718978528, 'time_algorithm_update': 0.005215280970861745, 'loss': 0.22450361434413596, 'time_step': 0.005469319432280784, 'init_value': -10.562519073486328, 'ave_value': -9.957749643017799, 'soft_opc': nan} step=4472




2022-04-20 18:24.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182426/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:24.54 [info     ] FQE_20220420182426: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00018175396808358125, 'time_algorithm_update': 0.004730402730232061, 'loss': 0.26232320173686846, 'time_step': 0.004989852738934894, 'init_value': -11.275392532348633, 'ave_value': -10.574321617493155, 'soft_opc': nan} step=4816




2022-04-20 18:24.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182426/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:24.56 [info     ] FQE_20220420182426: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00018298833869224372, 'time_algorithm_update': 0.0052348590174386665, 'loss': 0.2963158004852228, 'time_step': 0.0054963806340860765, 'init_value': -11.840940475463867, 'ave_value': -11.109864420060337, 'soft_opc': nan} step=5160




2022-04-20 18:24.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182426/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:24.58 [info     ] FQE_20220420182426: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00018348250278206758, 'time_algorithm_update': 0.005230505106061004, 'loss': 0.3270960182633771, 'time_step': 0.00549473388250484, 'init_value': -12.491466522216797, 'ave_value': -11.68551259612631, 'soft_opc': nan} step=5504




2022-04-20 18:24.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182426/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:25.00 [info     ] FQE_20220420182426: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00017964216165764388, 'time_algorithm_update': 0.0051383708798608116, 'loss': 0.36842112980653036, 'time_step': 0.005398515351983004, 'init_value': -12.836661338806152, 'ave_value': -12.086420575929553, 'soft_opc': nan} step=5848




2022-04-20 18:25.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182426/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:25.02 [info     ] FQE_20220420182426: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00018371676289758018, 'time_algorithm_update': 0.005097884078358495, 'loss': 0.3971560626442349, 'time_step': 0.005359741144402083, 'init_value': -13.379106521606445, 'ave_value': -12.65726422778423, 'soft_opc': nan} step=6192




2022-04-20 18:25.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182426/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:25.04 [info     ] FQE_20220420182426: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00017698421034702036, 'time_algorithm_update': 0.004977158335752265, 'loss': 0.4385907188826782, 'time_step': 0.005234585944996323, 'init_value': -14.058390617370605, 'ave_value': -13.334506727001202, 'soft_opc': nan} step=6536




2022-04-20 18:25.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182426/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:25.06 [info     ] FQE_20220420182426: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00018332309501115665, 'time_algorithm_update': 0.005219670229179915, 'loss': 0.47455153887181784, 'time_step': 0.0054834713769513505, 'init_value': -14.597808837890625, 'ave_value': -13.920643788631502, 'soft_opc': nan} step=6880




2022-04-20 18:25.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182426/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:25.08 [info     ] FQE_20220420182426: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00018199307973994764, 'time_algorithm_update': 0.005194877469262412, 'loss': 0.5082233883061468, 'time_step': 0.005459379318148591, 'init_value': -15.10611343383789, 'ave_value': -14.391018827883778, 'soft_opc': nan} step=7224




2022-04-20 18:25.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182426/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:25.10 [info     ] FQE_20220420182426: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00018371191135672636, 'time_algorithm_update': 0.005117133606311886, 'loss': 0.5342578925084063, 'time_step': 0.005380408015362052, 'init_value': -15.414274215698242, 'ave_value': -14.791777551519424, 'soft_opc': nan} step=7568




2022-04-20 18:25.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182426/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:25.12 [info     ] FQE_20220420182426: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.0001803421696951223, 'time_algorithm_update': 0.004723054725070333, 'loss': 0.567296851718755, 'time_step': 0.004980053319487461, 'init_value': -15.765189170837402, 'ave_value': -15.078169097452685, 'soft_opc': nan} step=7912




2022-04-20 18:25.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182426/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:25.14 [info     ] FQE_20220420182426: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00018125911091649256, 'time_algorithm_update': 0.00522530009580213, 'loss': 0.5852734639071101, 'time_step': 0.0054857730865478516, 'init_value': -16.33220100402832, 'ave_value': -15.860612732853305, 'soft_opc': nan} step=8256




2022-04-20 18:25.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182426/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:25.16 [info     ] FQE_20220420182426: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00018132703248844591, 'time_algorithm_update': 0.005226192086241966, 'loss': 0.607685397384515, 'time_step': 0.005485041196956191, 'init_value': -16.809146881103516, 'ave_value': -16.369334038385183, 'soft_opc': nan} step=8600




2022-04-20 18:25.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182426/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:25.18 [info     ] FQE_20220420182426: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.0001868259075076081, 'time_algorithm_update': 0.005207228106121684, 'loss': 0.6271160769189686, 'time_step': 0.005475042171256487, 'init_value': -17.201250076293945, 'ave_value': -16.843271010070367, 'soft_opc': nan} step=8944




2022-04-20 18:25.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182426/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:25.20 [info     ] FQE_20220420182426: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.0001764768777891647, 'time_algorithm_update': 0.005281498958898145, 'loss': 0.6179208926488321, 'time_step': 0.00553628871607226, 'init_value': -16.974023818969727, 'ave_value': -16.85652643517289, 'soft_opc': nan} step=9288




2022-04-20 18:25.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182426/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:25.22 [info     ] FQE_20220420182426: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00018137624097424885, 'time_algorithm_update': 0.004903312339339145, 'loss': 0.6242792299125604, 'time_step': 0.005163682754649673, 'init_value': -17.428030014038086, 'ave_value': -17.437765139921964, 'soft_opc': nan} step=9632




2022-04-20 18:25.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182426/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:25.24 [info     ] FQE_20220420182426: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00017760867296263229, 'time_algorithm_update': 0.005196407090785892, 'loss': 0.6431758207767162, 'time_step': 0.005452946868053702, 'init_value': -17.755596160888672, 'ave_value': -18.00664327354674, 'soft_opc': nan} step=9976




2022-04-20 18:25.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182426/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:25.26 [info     ] FQE_20220420182426: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00018087445303451184, 'time_algorithm_update': 0.005135344211445298, 'loss': 0.6468918725285072, 'time_step': 0.005394680555476699, 'init_value': -18.037105560302734, 'ave_value': -18.249255712484302, 'soft_opc': nan} step=10320




2022-04-20 18:25.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182426/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:25.28 [info     ] FQE_20220420182426: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00018030543660008631, 'time_algorithm_update': 0.005160690740097401, 'loss': 0.6483295697144904, 'time_step': 0.005422392556833667, 'init_value': -18.442440032958984, 'ave_value': -18.92414300483987, 'soft_opc': nan} step=10664




2022-04-20 18:25.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182426/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:25.30 [info     ] FQE_20220420182426: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00017687816952550135, 'time_algorithm_update': 0.004733626232590786, 'loss': 0.6490808374703277, 'time_step': 0.0049874567708303764, 'init_value': -18.176368713378906, 'ave_value': -18.73492164358849, 'soft_opc': nan} step=11008




2022-04-20 18:25.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182426/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:25.32 [info     ] FQE_20220420182426: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00017876264660857444, 'time_algorithm_update': 0.005187791447306789, 'loss': 0.6530836173387374, 'time_step': 0.005445374998935434, 'init_value': -18.448211669921875, 'ave_value': -19.082239049149525, 'soft_opc': nan} step=11352




2022-04-20 18:25.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182426/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:25.34 [info     ] FQE_20220420182426: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00017935453459273938, 'time_algorithm_update': 0.005179166100745977, 'loss': 0.6360989514370124, 'time_step': 0.005438610564830692, 'init_value': -18.65146827697754, 'ave_value': -19.384206802734, 'soft_opc': nan} step=11696




2022-04-20 18:25.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182426/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:25.36 [info     ] FQE_20220420182426: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00017703688421914744, 'time_algorithm_update': 0.005086835733679838, 'loss': 0.6306372936815023, 'time_step': 0.005343512740246085, 'init_value': -18.405052185058594, 'ave_value': -19.09029415995302, 'soft_opc': nan} step=12040




2022-04-20 18:25.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182426/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:25.38 [info     ] FQE_20220420182426: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.0001813776271287785, 'time_algorithm_update': 0.005190036324567573, 'loss': 0.6290899666491896, 'time_step': 0.005451624476632407, 'init_value': -18.503692626953125, 'ave_value': -19.40283834867364, 'soft_opc': nan} step=12384




2022-04-20 18:25.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182426/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:25.39 [info     ] FQE_20220420182426: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.0001697859098744947, 'time_algorithm_update': 0.004611926023350205, 'loss': 0.6161269045131671, 'time_step': 0.0048573509205219354, 'init_value': -18.57018280029297, 'ave_value': -19.58832257592648, 'soft_opc': nan} step=12728




2022-04-20 18:25.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182426/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:25.41 [info     ] FQE_20220420182426: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.0001644506010898324, 'time_algorithm_update': 0.005100729160530623, 'loss': 0.6127331217826713, 'time_step': 0.005336053149644719, 'init_value': -18.759075164794922, 'ave_value': -19.944011878845206, 'soft_opc': nan} step=13072




2022-04-20 18:25.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182426/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:25.43 [info     ] FQE_20220420182426: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.0001576584438944972, 'time_algorithm_update': 0.004986184280972148, 'loss': 0.5947592107763211, 'time_step': 0.005211788554524266, 'init_value': -18.733810424804688, 'ave_value': -20.047482506396367, 'soft_opc': nan} step=13416




2022-04-20 18:25.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182426/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:25.45 [info     ] FQE_20220420182426: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00015216164810712948, 'time_algorithm_update': 0.004992922378140826, 'loss': 0.5852188665723038, 'time_step': 0.00521114191343618, 'init_value': -19.16084098815918, 'ave_value': -20.55535778422244, 'soft_opc': nan} step=13760




2022-04-20 18:25.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182426/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:25.47 [info     ] FQE_20220420182426: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00016386564387831578, 'time_algorithm_update': 0.0046263177727544035, 'loss': 0.5836480290109162, 'time_step': 0.004861521166424418, 'init_value': -19.243820190429688, 'ave_value': -20.786312175815638, 'soft_opc': nan} step=14104




2022-04-20 18:25.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182426/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:25.49 [info     ] FQE_20220420182426: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00016465297965116279, 'time_algorithm_update': 0.005130313163579896, 'loss': 0.572905184587377, 'time_step': 0.005367127268813377, 'init_value': -19.331100463867188, 'ave_value': -21.04522066941812, 'soft_opc': nan} step=14448




2022-04-20 18:25.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182426/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:25.51 [info     ] FQE_20220420182426: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00017833917639976325, 'time_algorithm_update': 0.005183253177376681, 'loss': 0.5626034378019963, 'time_step': 0.005440970492917438, 'init_value': -19.501850128173828, 'ave_value': -21.106415695822083, 'soft_opc': nan} step=14792




2022-04-20 18:25.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182426/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:25.53 [info     ] FQE_20220420182426: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00018552569455878678, 'time_algorithm_update': 0.005199547423872837, 'loss': 0.5706405427447672, 'time_step': 0.005466000978336777, 'init_value': -19.60944175720215, 'ave_value': -21.41632730914112, 'soft_opc': nan} step=15136




2022-04-20 18:25.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182426/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:25.55 [info     ] FQE_20220420182426: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00018037751663562863, 'time_algorithm_update': 0.00508182963659597, 'loss': 0.5674518838329891, 'time_step': 0.005343960468159165, 'init_value': -19.612775802612305, 'ave_value': -21.610090423310762, 'soft_opc': nan} step=15480




2022-04-20 18:25.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182426/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:25.57 [info     ] FQE_20220420182426: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00017966434013011844, 'time_algorithm_update': 0.004770076552102732, 'loss': 0.5522422658138757, 'time_step': 0.0050283212994420255, 'init_value': -19.34738540649414, 'ave_value': -21.71789902468707, 'soft_opc': nan} step=15824




2022-04-20 18:25.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182426/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:25.59 [info     ] FQE_20220420182426: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.0001762883607731309, 'time_algorithm_update': 0.00509612851364668, 'loss': 0.5452748505117069, 'time_step': 0.005351449168005655, 'init_value': -19.190820693969727, 'ave_value': -21.819897224787656, 'soft_opc': nan} step=16168




2022-04-20 18:25.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182426/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:26.01 [info     ] FQE_20220420182426: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00018011622650678769, 'time_algorithm_update': 0.005045564368713734, 'loss': 0.5372200448574975, 'time_step': 0.005303084850311279, 'init_value': -18.92679786682129, 'ave_value': -21.819984422784785, 'soft_opc': nan} step=16512




2022-04-20 18:26.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182426/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:26.03 [info     ] FQE_20220420182426: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.0001756417196850444, 'time_algorithm_update': 0.004976075055987336, 'loss': 0.5250325237920638, 'time_step': 0.005229270042375077, 'init_value': -18.58458709716797, 'ave_value': -21.635065190358976, 'soft_opc': nan} step=16856




2022-04-20 18:26.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182426/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:26.05 [info     ] FQE_20220420182426: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.0001766910386639972, 'time_algorithm_update': 0.004559254923532176, 'loss': 0.5224152889351772, 'time_step': 0.004812742388525674, 'init_value': -18.36180877685547, 'ave_value': -21.55008462645315, 'soft_opc': nan} step=17200




2022-04-20 18:26.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182426/model_17200.pt
search iteration:  35
using hyper params:  [0.002036294432220638, 0.005914984087523811, 4.35474453998058e-05, 7]
2022-04-20 18:26.05 [debug    ] RoundIterator is selected.
2022-04-20 18:26.05 [info     ] Directory is created at d3rlpy_logs/TD3PlusBC_20220420182605
2022-04-20 18:26.05 [debug    ] Fitting scaler...              scaler=standard
2022-04-20 18:26.05 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 18:26.05 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 18:26.05 [info     ] Parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182605/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.002036294432220

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:26.09 [info     ] TD3PlusBC_20220420182605: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.0003984804041901527, 'time_algorithm_update': 0.00896047918420089, 'critic_loss': 10.307803909506713, 'actor_loss': 2.66691944194816, 'time_step': 0.009448885220533226, 'td_error': 1.0364443485547483, 'init_value': -11.282504081726074, 'ave_value': -7.236732843904965} step=342
2022-04-20 18:26.09 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182605/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:26.13 [info     ] TD3PlusBC_20220420182605: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.00040058573784186824, 'time_algorithm_update': 0.008893903933073344, 'critic_loss': 6.02633178547809, 'actor_loss': 2.5769801404741077, 'time_step': 0.00937284433353714, 'td_error': 1.3098261979869021, 'init_value': -15.935470581054688, 'ave_value': -10.278901042361694} step=684
2022-04-20 18:26.13 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182605/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:26.16 [info     ] TD3PlusBC_20220420182605: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.00039820503770259387, 'time_algorithm_update': 0.008552828030279505, 'critic_loss': 9.669147706171225, 'actor_loss': 2.568157787211457, 'time_step': 0.009029453957987111, 'td_error': 1.7378648525607252, 'init_value': -21.54732894897461, 'ave_value': -13.992051125719875} step=1026
2022-04-20 18:26.16 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182605/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:26.20 [info     ] TD3PlusBC_20220420182605: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.00039540117944193167, 'time_algorithm_update': 0.008904364373948839, 'critic_loss': 14.22337978485732, 'actor_loss': 2.5633305513370805, 'time_step': 0.009371954098082426, 'td_error': 2.1792386517049604, 'init_value': -25.992755889892578, 'ave_value': -16.851745512045348} step=1368
2022-04-20 18:26.20 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182605/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:26.23 [info     ] TD3PlusBC_20220420182605: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.00039880596406278556, 'time_algorithm_update': 0.008424593691240278, 'critic_loss': 19.22184802077667, 'actor_loss': 2.562750108060781, 'time_step': 0.008896984552082262, 'td_error': 2.682287592687331, 'init_value': -30.436016082763672, 'ave_value': -19.929855739177782} step=1710
2022-04-20 18:26.23 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182605/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:26.27 [info     ] TD3PlusBC_20220420182605: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.0004007460778219658, 'time_algorithm_update': 0.008944480739838896, 'critic_loss': 24.468592191997327, 'actor_loss': 2.5611158058657284, 'time_step': 0.009420374680680839, 'td_error': 3.212572876732415, 'init_value': -35.28862762451172, 'ave_value': -23.01046182103648} step=2052
2022-04-20 18:26.27 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182605/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:26.31 [info     ] TD3PlusBC_20220420182605: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.00040223305685478346, 'time_algorithm_update': 0.008900032405964813, 'critic_loss': 29.80194617990862, 'actor_loss': 2.5604908312970434, 'time_step': 0.009378368394416674, 'td_error': 3.6944359023886495, 'init_value': -39.79926300048828, 'ave_value': -26.039979756994274} step=2394
2022-04-20 18:26.31 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182605/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:26.34 [info     ] TD3PlusBC_20220420182605: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.00039518785755536707, 'time_algorithm_update': 0.008484352401822631, 'critic_loss': 35.502799056426824, 'actor_loss': 2.559154000198632, 'time_step': 0.008960210789016813, 'td_error': 4.3190434268848765, 'init_value': -43.75214385986328, 'ave_value': -28.892243271992786} step=2736
2022-04-20 18:26.34 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182605/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:26.38 [info     ] TD3PlusBC_20220420182605: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.0003949382848906935, 'time_algorithm_update': 0.008971795701144035, 'critic_loss': 41.54853519640471, 'actor_loss': 2.558835524564598, 'time_step': 0.00944369996500294, 'td_error': 4.826256278077956, 'init_value': -48.4116096496582, 'ave_value': -31.765620783121484} step=3078
2022-04-20 18:26.38 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182605/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:26.42 [info     ] TD3PlusBC_20220420182605: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.00039269352516932795, 'time_algorithm_update': 0.008459100249217965, 'critic_loss': 47.2489709463733, 'actor_loss': 2.557478821068479, 'time_step': 0.0089293997190152, 'td_error': 5.295002271371878, 'init_value': -52.6567268371582, 'ave_value': -34.218041373983965} step=3420
2022-04-20 18:26.42 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182605/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:26.46 [info     ] TD3PlusBC_20220420182605: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.00039111661632158605, 'time_algorithm_update': 0.008877844141240706, 'critic_loss': 53.57426910511931, 'actor_loss': 2.5574318010207504, 'time_step': 0.00934452620166087, 'td_error': 6.1482200469591834, 'init_value': -56.454734802246094, 'ave_value': -36.827822584637715} step=3762
2022-04-20 18:26.46 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182605/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:26.49 [info     ] TD3PlusBC_20220420182605: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.00039277578655042147, 'time_algorithm_update': 0.008827119542841325, 'critic_loss': 59.8683559071948, 'actor_loss': 2.5561954863587317, 'time_step': 0.009296323820861459, 'td_error': 6.477870833871846, 'init_value': -58.71491241455078, 'ave_value': -38.55553228737803} step=4104
2022-04-20 18:26.49 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182605/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:26.53 [info     ] TD3PlusBC_20220420182605: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.00039654795886480323, 'time_algorithm_update': 0.008391756760446649, 'critic_loss': 65.30300486715217, 'actor_loss': 2.5560931016130057, 'time_step': 0.008864397891083656, 'td_error': 7.168886929045737, 'init_value': -61.87223434448242, 'ave_value': -40.79069636047432} step=4446
2022-04-20 18:26.53 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182605/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:26.56 [info     ] TD3PlusBC_20220420182605: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.0004025502511632373, 'time_algorithm_update': 0.008809969439144022, 'critic_loss': 71.2866204691212, 'actor_loss': 2.5560015084450707, 'time_step': 0.009289627883866516, 'td_error': 8.080181013112234, 'init_value': -67.03010559082031, 'ave_value': -43.331239380592244} step=4788
2022-04-20 18:26.56 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182605/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:27.00 [info     ] TD3PlusBC_20220420182605: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.0003977979135792158, 'time_algorithm_update': 0.008710867480227822, 'critic_loss': 77.36760678207665, 'actor_loss': 2.5567850606483327, 'time_step': 0.009183507913734481, 'td_error': 8.399765673198822, 'init_value': -68.34199523925781, 'ave_value': -45.08844585661306} step=5130
2022-04-20 18:27.00 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182605/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:27.04 [info     ] TD3PlusBC_20220420182605: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.0003926503030877364, 'time_algorithm_update': 0.008987937057227419, 'critic_loss': 83.19350591737624, 'actor_loss': 2.5570358393485084, 'time_step': 0.009458100586606745, 'td_error': 8.617863426670338, 'init_value': -68.50955963134766, 'ave_value': -46.3299665809953} step=5472
2022-04-20 18:27.04 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182605/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:27.07 [info     ] TD3PlusBC_20220420182605: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.0003971579479195221, 'time_algorithm_update': 0.008852076809308683, 'critic_loss': 89.31292103326808, 'actor_loss': 2.55642226843806, 'time_step': 0.009326688727440193, 'td_error': 9.724643830663629, 'init_value': -73.18860626220703, 'ave_value': -48.61189053193699} step=5814
2022-04-20 18:27.07 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182605/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:27.11 [info     ] TD3PlusBC_20220420182605: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.00040184963516324586, 'time_algorithm_update': 0.00863963330698292, 'critic_loss': 94.9485642505668, 'actor_loss': 2.5571655348727576, 'time_step': 0.009119156508417854, 'td_error': 9.47661952413449, 'init_value': -72.83573913574219, 'ave_value': -49.48077043878997} step=6156
2022-04-20 18:27.11 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182605/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:27.15 [info     ] TD3PlusBC_20220420182605: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.00039527011893646064, 'time_algorithm_update': 0.00887037996660199, 'critic_loss': 100.33777078550462, 'actor_loss': 2.558507857964053, 'time_step': 0.009337957839519656, 'td_error': 10.042324397548384, 'init_value': -75.97804260253906, 'ave_value': -51.23371658522166} step=6498
2022-04-20 18:27.15 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182605/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:27.18 [info     ] TD3PlusBC_20220420182605: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.00039451024685686794, 'time_algorithm_update': 0.008717963570042661, 'critic_loss': 106.51999553881194, 'actor_loss': 2.5577907353116753, 'time_step': 0.009174310673050017, 'td_error': 10.976692674695443, 'init_value': -77.74019622802734, 'ave_value': -52.85200391147177} step=6840
2022-04-20 18:27.18 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182605/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:27.22 [info     ] TD3PlusBC_20220420182605: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.00039337113586782713, 'time_algorithm_update': 0.008900913578725, 'critic_loss': 111.74863487377502, 'actor_loss': 2.5590567477265296, 'time_step': 0.00935910389437313, 'td_error': 11.319749636605325, 'init_value': -78.39344787597656, 'ave_value': -53.95714512447214} step=7182
2022-04-20 18:27.22 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182605/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:27.26 [info     ] TD3PlusBC_20220420182605: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.000396697841889677, 'time_algorithm_update': 0.008791750634622853, 'critic_loss': 116.85223569368061, 'actor_loss': 2.560172614995499, 'time_step': 0.00925107308995654, 'td_error': 11.884216693480852, 'init_value': -78.96002197265625, 'ave_value': -55.08365992362771} step=7524
2022-04-20 18:27.26 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182605/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:27.29 [info     ] TD3PlusBC_20220420182605: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.00039724369495235687, 'time_algorithm_update': 0.008465450409560175, 'critic_loss': 122.25217464513946, 'actor_loss': 2.560214377286141, 'time_step': 0.008928980743675902, 'td_error': 11.740149780995734, 'init_value': -80.61974334716797, 'ave_value': -56.20606038450054} step=7866
2022-04-20 18:27.29 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182605/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:27.33 [info     ] TD3PlusBC_20220420182605: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.0003926942222996762, 'time_algorithm_update': 0.008870082291943289, 'critic_loss': 126.6971297905459, 'actor_loss': 2.560525843971654, 'time_step': 0.009326030636391444, 'td_error': 11.800760364632577, 'init_value': -79.21202087402344, 'ave_value': -56.6201208541153} step=8208
2022-04-20 18:27.33 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182605/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:27.37 [info     ] TD3PlusBC_20220420182605: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.0003955677935951634, 'time_algorithm_update': 0.008876498679668582, 'critic_loss': 130.2503074512147, 'actor_loss': 2.5603512337333276, 'time_step': 0.009338819492630095, 'td_error': 12.668668997551961, 'init_value': -81.17747497558594, 'ave_value': -58.14800145534832} step=8550
2022-04-20 18:27.37 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182605/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:27.40 [info     ] TD3PlusBC_20220420182605: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.000398620527390151, 'time_algorithm_update': 0.008705667584960223, 'critic_loss': 133.9117039686058, 'actor_loss': 2.5602761918341206, 'time_step': 0.009166944793790405, 'td_error': 12.617532958149738, 'init_value': -83.41556549072266, 'ave_value': -59.371808474781915} step=8892
2022-04-20 18:27.40 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182605/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:27.44 [info     ] TD3PlusBC_20220420182605: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.0003893368425425033, 'time_algorithm_update': 0.008829480723330849, 'critic_loss': 137.52735642104122, 'actor_loss': 2.560203391906114, 'time_step': 0.00928019640738504, 'td_error': 12.802489060547341, 'init_value': -82.86795043945312, 'ave_value': -59.83362465307419} step=9234
2022-04-20 18:27.44 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182605/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:27.48 [info     ] TD3PlusBC_20220420182605: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.00039648173148171945, 'time_algorithm_update': 0.0084552416327404, 'critic_loss': 140.3542440314042, 'actor_loss': 2.56030906013578, 'time_step': 0.008912709024217393, 'td_error': 12.914163607425248, 'init_value': -84.28878784179688, 'ave_value': -61.243002356683654} step=9576
2022-04-20 18:27.48 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182605/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:27.51 [info     ] TD3PlusBC_20220420182605: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.0004008046367712188, 'time_algorithm_update': 0.00901430670978033, 'critic_loss': 143.5735291598136, 'actor_loss': 2.5616660452725593, 'time_step': 0.009477609082272178, 'td_error': 13.729364593436392, 'init_value': -84.04723358154297, 'ave_value': -61.80493183490225} step=9918
2022-04-20 18:27.51 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182605/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:27.55 [info     ] TD3PlusBC_20220420182605: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.00040213964138811794, 'time_algorithm_update': 0.008929965788857979, 'critic_loss': 146.27471604821278, 'actor_loss': 2.561474461304514, 'time_step': 0.009396249787849292, 'td_error': 13.903022508304208, 'init_value': -82.02273559570312, 'ave_value': -61.87817968429578} step=10260
2022-04-20 18:27.55 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182605/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:27.59 [info     ] TD3PlusBC_20220420182605: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.0003968254167434068, 'time_algorithm_update': 0.008634372761375025, 'critic_loss': 148.57072995280663, 'actor_loss': 2.5624454858010277, 'time_step': 0.009100259396067838, 'td_error': 14.314990227476141, 'init_value': -85.01683044433594, 'ave_value': -63.33651871419503} step=10602
2022-04-20 18:27.59 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182605/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:28.03 [info     ] TD3PlusBC_20220420182605: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.000392740930033009, 'time_algorithm_update': 0.009137252617997733, 'critic_loss': 150.93784468355236, 'actor_loss': 2.5619236126280667, 'time_step': 0.009595022564045867, 'td_error': 14.402343130802134, 'init_value': -85.55589294433594, 'ave_value': -63.75995023515428} step=10944
2022-04-20 18:28.03 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182605/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:28.06 [info     ] TD3PlusBC_20220420182605: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.00040842636286863806, 'time_algorithm_update': 0.008680044559010287, 'critic_loss': 153.72567173472623, 'actor_loss': 2.5623052329347846, 'time_step': 0.009154791023299011, 'td_error': 13.750796581742136, 'init_value': -81.77702331542969, 'ave_value': -63.63545229463519} step=11286
2022-04-20 18:28.06 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182605/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:28.10 [info     ] TD3PlusBC_20220420182605: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.0004019737243652344, 'time_algorithm_update': 0.009053055305927121, 'critic_loss': 155.74158783405147, 'actor_loss': 2.5622872664914493, 'time_step': 0.009521931235553229, 'td_error': 14.850992296170155, 'init_value': -84.38314819335938, 'ave_value': -64.76298580489633} step=11628
2022-04-20 18:28.10 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182605/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:28.14 [info     ] TD3PlusBC_20220420182605: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.00040623040227165, 'time_algorithm_update': 0.009019959739774291, 'critic_loss': 156.80495423880237, 'actor_loss': 2.5639316826535943, 'time_step': 0.009485486655207405, 'td_error': 14.727930845118546, 'init_value': -85.40650939941406, 'ave_value': -65.33275114863288} step=11970
2022-04-20 18:28.14 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182605/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:28.17 [info     ] TD3PlusBC_20220420182605: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.00039832424699214465, 'time_algorithm_update': 0.008665966708757723, 'critic_loss': 158.86409096968802, 'actor_loss': 2.5633096123299404, 'time_step': 0.009129994096811752, 'td_error': 15.178772708800794, 'init_value': -84.71214294433594, 'ave_value': -65.91540886550924} step=12312
2022-04-20 18:28.17 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182605/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:28.21 [info     ] TD3PlusBC_20220420182605: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.0004030466079711914, 'time_algorithm_update': 0.009030823819121422, 'critic_loss': 160.30765972918238, 'actor_loss': 2.5642050553483573, 'time_step': 0.009502422739887795, 'td_error': 16.137532791923448, 'init_value': -90.12750244140625, 'ave_value': -66.99308626295931} step=12654
2022-04-20 18:28.21 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182605/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:28.25 [info     ] TD3PlusBC_20220420182605: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.00039735453867772866, 'time_algorithm_update': 0.008685910910890814, 'critic_loss': 161.55543758994654, 'actor_loss': 2.5636822591748154, 'time_step': 0.009147999579446358, 'td_error': 15.945628167949737, 'init_value': -83.88838958740234, 'ave_value': -66.49472211364505} step=12996
2022-04-20 18:28.25 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182605/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:28.28 [info     ] TD3PlusBC_20220420182605: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.0004013741922657392, 'time_algorithm_update': 0.009002089500427246, 'critic_loss': 162.95178028575162, 'actor_loss': 2.5639953390199537, 'time_step': 0.009463944630316126, 'td_error': 14.948037404330897, 'init_value': -84.80398559570312, 'ave_value': -66.96578901367481} step=13338
2022-04-20 18:28.29 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182605/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:28.32 [info     ] TD3PlusBC_20220420182605: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.0004057354397243924, 'time_algorithm_update': 0.009030272389015956, 'critic_loss': 164.1350162573028, 'actor_loss': 2.5638088078526726, 'time_step': 0.009508621622944436, 'td_error': 16.29207364879185, 'init_value': -85.58362579345703, 'ave_value': -67.54648029027808} step=13680
2022-04-20 18:28.32 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182605/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:28.36 [info     ] TD3PlusBC_20220420182605: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.00040236272309955795, 'time_algorithm_update': 0.00867334234784221, 'critic_loss': 165.92651362725866, 'actor_loss': 2.5645305943070795, 'time_step': 0.009138967558654428, 'td_error': 16.449141785698604, 'init_value': -85.21116638183594, 'ave_value': -68.30218183690867} step=14022
2022-04-20 18:28.36 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182605/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:28.40 [info     ] TD3PlusBC_20220420182605: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.0003982259516130414, 'time_algorithm_update': 0.009025787052355315, 'critic_loss': 166.91394384283768, 'actor_loss': 2.564189749154431, 'time_step': 0.009485694400051184, 'td_error': 16.083351271308523, 'init_value': -85.67240905761719, 'ave_value': -67.88474367545557} step=14364
2022-04-20 18:28.40 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182605/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:28.43 [info     ] TD3PlusBC_20220420182605: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.00040285838277716383, 'time_algorithm_update': 0.008953578988013909, 'critic_loss': 167.9360684623495, 'actor_loss': 2.5640119376935457, 'time_step': 0.009420148810448005, 'td_error': 16.198321475216662, 'init_value': -84.18157958984375, 'ave_value': -68.30304742215385} step=14706
2022-04-20 18:28.43 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182605/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:28.47 [info     ] TD3PlusBC_20220420182605: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.0004018196585582711, 'time_algorithm_update': 0.009012186736391302, 'critic_loss': 168.59091699611375, 'actor_loss': 2.564913129248814, 'time_step': 0.009475307157862257, 'td_error': 16.241552817993373, 'init_value': -84.7873306274414, 'ave_value': -68.88050250683298} step=15048
2022-04-20 18:28.47 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182605/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:28.51 [info     ] TD3PlusBC_20220420182605: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.00040884533820793645, 'time_algorithm_update': 0.008883220410486411, 'critic_loss': 169.94124908893428, 'actor_loss': 2.56523016740007, 'time_step': 0.009359989947045755, 'td_error': 17.228653445335556, 'init_value': -86.2504653930664, 'ave_value': -69.43174937731948} step=15390
2022-04-20 18:28.51 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182605/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:28.54 [info     ] TD3PlusBC_20220420182605: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.00040228185597916097, 'time_algorithm_update': 0.008505744543689036, 'critic_loss': 170.52405021622863, 'actor_loss': 2.565639342480933, 'time_step': 0.00897675299505044, 'td_error': 16.84358060609955, 'init_value': -86.85820770263672, 'ave_value': -69.66619831265461} step=15732
2022-04-20 18:28.54 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182605/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:28.58 [info     ] TD3PlusBC_20220420182605: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.0004007649003413686, 'time_algorithm_update': 0.008998704235456144, 'critic_loss': 171.34332485087432, 'actor_loss': 2.565074598580076, 'time_step': 0.009464090330558911, 'td_error': 16.62497373279663, 'init_value': -83.64738464355469, 'ave_value': -69.30682484804292} step=16074
2022-04-20 18:28.58 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182605/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:29.02 [info     ] TD3PlusBC_20220420182605: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.00040912628173828125, 'time_algorithm_update': 0.009093992194237068, 'critic_loss': 171.82004295036808, 'actor_loss': 2.56545623143514, 'time_step': 0.00956852394237853, 'td_error': 17.777980244159856, 'init_value': -85.60652923583984, 'ave_value': -70.33719991611183} step=16416
2022-04-20 18:29.02 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182605/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:29.06 [info     ] TD3PlusBC_20220420182605: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.0004036928478040193, 'time_algorithm_update': 0.009056552808884292, 'critic_loss': 172.3939553868701, 'actor_loss': 2.5655968217124716, 'time_step': 0.009525753601252684, 'td_error': 16.06675060834038, 'init_value': -82.11299896240234, 'ave_value': -69.57290724188894} step=16758
2022-04-20 18:29.06 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182605/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:29.09 [info     ] TD3PlusBC_20220420182605: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.00039788784339414004, 'time_algorithm_update': 0.008977739434493216, 'critic_loss': 172.96328849123236, 'actor_loss': 2.5660033226013184, 'time_step': 0.009443557750411898, 'td_error': 15.99970543919232, 'init_value': -81.7270736694336, 'ave_value': -69.38771291561667} step=17100
2022-04-20 18:29.09 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420182605/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 102 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.28189108e-01  

Epoch 1/50:   0%|          | 0/177 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 18:29.11 [info     ] FQE_20220420182910: epoch=1 step=177 epoch=1 metrics={'time_sample_batch': 0.0001627927446095957, 'time_algorithm_update': 0.004917503076758089, 'loss': 0.007937328571632589, 'time_step': 0.0051571056667694265, 'init_value': -0.46057501435279846, 'ave_value': -0.4189481137035129, 'soft_opc': nan} step=177




2022-04-20 18:29.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182910/model_177.pt


Epoch 2/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:29.11 [info     ] FQE_20220420182910: epoch=2 step=354 epoch=2 metrics={'time_sample_batch': 0.00015847292323570468, 'time_algorithm_update': 0.004212934418586688, 'loss': 0.005902040883961876, 'time_step': 0.004440061116622666, 'init_value': -0.612031877040863, 'ave_value': -0.5134517099674758, 'soft_opc': nan} step=354




2022-04-20 18:29.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182910/model_354.pt


Epoch 3/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:29.12 [info     ] FQE_20220420182910: epoch=3 step=531 epoch=3 metrics={'time_sample_batch': 0.0001642313380699373, 'time_algorithm_update': 0.005165314270278155, 'loss': 0.005377498133065926, 'time_step': 0.005399772676370911, 'init_value': -0.645610511302948, 'ave_value': -0.5070339634835541, 'soft_opc': nan} step=531




2022-04-20 18:29.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182910/model_531.pt


Epoch 4/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:29.13 [info     ] FQE_20220420182910: epoch=4 step=708 epoch=4 metrics={'time_sample_batch': 0.00016496141078108448, 'time_algorithm_update': 0.004971474577478097, 'loss': 0.005097124703217951, 'time_step': 0.00520960355209092, 'init_value': -0.6805098652839661, 'ave_value': -0.5202170978661056, 'soft_opc': nan} step=708




2022-04-20 18:29.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182910/model_708.pt


Epoch 5/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:29.14 [info     ] FQE_20220420182910: epoch=5 step=885 epoch=5 metrics={'time_sample_batch': 0.000164788995085463, 'time_algorithm_update': 0.005153622330918824, 'loss': 0.004899442762841528, 'time_step': 0.005392163486803992, 'init_value': -0.7282276749610901, 'ave_value': -0.5435156809227603, 'soft_opc': nan} step=885




2022-04-20 18:29.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182910/model_885.pt


Epoch 6/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:29.15 [info     ] FQE_20220420182910: epoch=6 step=1062 epoch=6 metrics={'time_sample_batch': 0.0001659460660427977, 'time_algorithm_update': 0.0051250255714028565, 'loss': 0.004697725509614342, 'time_step': 0.005366981366259903, 'init_value': -0.7671175599098206, 'ave_value': -0.5530580855659895, 'soft_opc': nan} step=1062




2022-04-20 18:29.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182910/model_1062.pt


Epoch 7/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:29.16 [info     ] FQE_20220420182910: epoch=7 step=1239 epoch=7 metrics={'time_sample_batch': 0.00016358882020422295, 'time_algorithm_update': 0.0050513784764176705, 'loss': 0.004633431773406511, 'time_step': 0.005287131347225211, 'init_value': -0.8012102246284485, 'ave_value': -0.5672966455956837, 'soft_opc': nan} step=1239




2022-04-20 18:29.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182910/model_1239.pt


Epoch 8/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:29.17 [info     ] FQE_20220420182910: epoch=8 step=1416 epoch=8 metrics={'time_sample_batch': 0.00016338677056091654, 'time_algorithm_update': 0.005042928760334597, 'loss': 0.0043752660782807795, 'time_step': 0.005279283739079189, 'init_value': -0.8381879925727844, 'ave_value': -0.5844351095435497, 'soft_opc': nan} step=1416




2022-04-20 18:29.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182910/model_1416.pt


Epoch 9/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:29.18 [info     ] FQE_20220420182910: epoch=9 step=1593 epoch=9 metrics={'time_sample_batch': 0.00016752070626296567, 'time_algorithm_update': 0.0050462410274872, 'loss': 0.004319392375537526, 'time_step': 0.005289332341339628, 'init_value': -0.8804630637168884, 'ave_value': -0.6127949765673629, 'soft_opc': nan} step=1593




2022-04-20 18:29.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182910/model_1593.pt


Epoch 10/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:29.20 [info     ] FQE_20220420182910: epoch=10 step=1770 epoch=10 metrics={'time_sample_batch': 0.0001724749635168388, 'time_algorithm_update': 0.005042263343509308, 'loss': 0.004584509042738858, 'time_step': 0.005291914535781084, 'init_value': -0.8974582552909851, 'ave_value': -0.6075050421036757, 'soft_opc': nan} step=1770




2022-04-20 18:29.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182910/model_1770.pt


Epoch 11/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:29.20 [info     ] FQE_20220420182910: epoch=11 step=1947 epoch=11 metrics={'time_sample_batch': 0.00016409798530535508, 'time_algorithm_update': 0.004241325087466483, 'loss': 0.0045895909694533814, 'time_step': 0.004481498804469567, 'init_value': -0.9352334141731262, 'ave_value': -0.6323655161279458, 'soft_opc': nan} step=1947




2022-04-20 18:29.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182910/model_1947.pt


Epoch 12/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:29.21 [info     ] FQE_20220420182910: epoch=12 step=2124 epoch=12 metrics={'time_sample_batch': 0.00016930278411692818, 'time_algorithm_update': 0.005049157277338922, 'loss': 0.004757837550898772, 'time_step': 0.005291154829122253, 'init_value': -0.9946195483207703, 'ave_value': -0.6590309172465995, 'soft_opc': nan} step=2124




2022-04-20 18:29.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182910/model_2124.pt


Epoch 13/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:29.22 [info     ] FQE_20220420182910: epoch=13 step=2301 epoch=13 metrics={'time_sample_batch': 0.00016444146969897598, 'time_algorithm_update': 0.005044964073741503, 'loss': 0.004727090316256351, 'time_step': 0.005284259548295016, 'init_value': -0.9786067008972168, 'ave_value': -0.6371513523899757, 'soft_opc': nan} step=2301




2022-04-20 18:29.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182910/model_2301.pt


Epoch 14/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:29.23 [info     ] FQE_20220420182910: epoch=14 step=2478 epoch=14 metrics={'time_sample_batch': 0.00016460984440173134, 'time_algorithm_update': 0.005045112243479928, 'loss': 0.005081335124086147, 'time_step': 0.00528553515504309, 'init_value': -1.059051513671875, 'ave_value': -0.6824502940739955, 'soft_opc': nan} step=2478




2022-04-20 18:29.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182910/model_2478.pt


Epoch 15/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:29.24 [info     ] FQE_20220420182910: epoch=15 step=2655 epoch=15 metrics={'time_sample_batch': 0.00016621007757671808, 'time_algorithm_update': 0.005140359792332191, 'loss': 0.005716406560692368, 'time_step': 0.005382544576784986, 'init_value': -1.0702942609786987, 'ave_value': -0.6923683510752053, 'soft_opc': nan} step=2655




2022-04-20 18:29.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182910/model_2655.pt


Epoch 16/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:29.25 [info     ] FQE_20220420182910: epoch=16 step=2832 epoch=16 metrics={'time_sample_batch': 0.00016965300349865929, 'time_algorithm_update': 0.004970049453993975, 'loss': 0.0058531885562822395, 'time_step': 0.005217150779767225, 'init_value': -1.145671010017395, 'ave_value': -0.7351039987739858, 'soft_opc': nan} step=2832




2022-04-20 18:29.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182910/model_2832.pt


Epoch 17/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:29.26 [info     ] FQE_20220420182910: epoch=17 step=3009 epoch=17 metrics={'time_sample_batch': 0.0001696907194320765, 'time_algorithm_update': 0.005018218088958223, 'loss': 0.006222460992465922, 'time_step': 0.005264788697668388, 'init_value': -1.1899107694625854, 'ave_value': -0.7684810194077792, 'soft_opc': nan} step=3009




2022-04-20 18:29.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182910/model_3009.pt


Epoch 18/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:29.27 [info     ] FQE_20220420182910: epoch=18 step=3186 epoch=18 metrics={'time_sample_batch': 0.00016937148099565235, 'time_algorithm_update': 0.0050109160148491295, 'loss': 0.007147637077496718, 'time_step': 0.0052591919225488, 'init_value': -1.2431691884994507, 'ave_value': -0.7944093749687836, 'soft_opc': nan} step=3186




2022-04-20 18:29.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182910/model_3186.pt


Epoch 19/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:29.28 [info     ] FQE_20220420182910: epoch=19 step=3363 epoch=19 metrics={'time_sample_batch': 0.00017152128920043256, 'time_algorithm_update': 0.005052509954420186, 'loss': 0.007641945049392933, 'time_step': 0.0052995210313527595, 'init_value': -1.329149842262268, 'ave_value': -0.8474695753227841, 'soft_opc': nan} step=3363




2022-04-20 18:29.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182910/model_3363.pt


Epoch 20/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:29.29 [info     ] FQE_20220420182910: epoch=20 step=3540 epoch=20 metrics={'time_sample_batch': 0.00016896334071617343, 'time_algorithm_update': 0.004223740033510714, 'loss': 0.007976988480442156, 'time_step': 0.004469862092012739, 'init_value': -1.4052785634994507, 'ave_value': -0.9213953065353113, 'soft_opc': nan} step=3540




2022-04-20 18:29.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182910/model_3540.pt


Epoch 21/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:29.30 [info     ] FQE_20220420182910: epoch=21 step=3717 epoch=21 metrics={'time_sample_batch': 0.00017347982374288267, 'time_algorithm_update': 0.0051674236685542736, 'loss': 0.008913932733848101, 'time_step': 0.005417873630415922, 'init_value': -1.5524519681930542, 'ave_value': -1.0190057078057582, 'soft_opc': nan} step=3717




2022-04-20 18:29.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182910/model_3717.pt


Epoch 22/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:29.31 [info     ] FQE_20220420182910: epoch=22 step=3894 epoch=22 metrics={'time_sample_batch': 0.00016465833631612486, 'time_algorithm_update': 0.005104591617476469, 'loss': 0.009499627051711881, 'time_step': 0.005343850723094186, 'init_value': -1.5595546960830688, 'ave_value': -1.0100265403633362, 'soft_opc': nan} step=3894




2022-04-20 18:29.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182910/model_3894.pt


Epoch 23/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:29.32 [info     ] FQE_20220420182910: epoch=23 step=4071 epoch=23 metrics={'time_sample_batch': 0.00016393230459784384, 'time_algorithm_update': 0.005033833832390564, 'loss': 0.009821767370890046, 'time_step': 0.005272529893002267, 'init_value': -1.6971746683120728, 'ave_value': -1.0925965864871356, 'soft_opc': nan} step=4071




2022-04-20 18:29.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182910/model_4071.pt


Epoch 24/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:29.33 [info     ] FQE_20220420182910: epoch=24 step=4248 epoch=24 metrics={'time_sample_batch': 0.0001673833125055173, 'time_algorithm_update': 0.0050609259955627095, 'loss': 0.010982899433975292, 'time_step': 0.005301122611525369, 'init_value': -1.7729054689407349, 'ave_value': -1.1518057172720855, 'soft_opc': nan} step=4248




2022-04-20 18:29.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182910/model_4248.pt


Epoch 25/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:29.34 [info     ] FQE_20220420182910: epoch=25 step=4425 epoch=25 metrics={'time_sample_batch': 0.00016679871553755077, 'time_algorithm_update': 0.00502651155331714, 'loss': 0.01151994822052239, 'time_step': 0.005267155372490317, 'init_value': -1.8709889650344849, 'ave_value': -1.206498405578974, 'soft_opc': nan} step=4425




2022-04-20 18:29.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182910/model_4425.pt


Epoch 26/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:29.35 [info     ] FQE_20220420182910: epoch=26 step=4602 epoch=26 metrics={'time_sample_batch': 0.00016944691286248676, 'time_algorithm_update': 0.005149315979521153, 'loss': 0.01237322759188417, 'time_step': 0.005395545797832942, 'init_value': -1.9691081047058105, 'ave_value': -1.2512345226140351, 'soft_opc': nan} step=4602




2022-04-20 18:29.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182910/model_4602.pt


Epoch 27/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:29.36 [info     ] FQE_20220420182910: epoch=27 step=4779 epoch=27 metrics={'time_sample_batch': 0.00016865083726785952, 'time_algorithm_update': 0.005062764647316798, 'loss': 0.01320979941707357, 'time_step': 0.005305579826656708, 'init_value': -2.0448005199432373, 'ave_value': -1.2851123947370517, 'soft_opc': nan} step=4779




2022-04-20 18:29.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182910/model_4779.pt


Epoch 28/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:29.37 [info     ] FQE_20220420182910: epoch=28 step=4956 epoch=28 metrics={'time_sample_batch': 0.00016879900700628422, 'time_algorithm_update': 0.005057829248029633, 'loss': 0.013963680431551937, 'time_step': 0.0053034825513591875, 'init_value': -2.1909255981445312, 'ave_value': -1.4023914491808092, 'soft_opc': nan} step=4956




2022-04-20 18:29.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182910/model_4956.pt


Epoch 29/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:29.38 [info     ] FQE_20220420182910: epoch=29 step=5133 epoch=29 metrics={'time_sample_batch': 0.00016588410415218374, 'time_algorithm_update': 0.00420227966739633, 'loss': 0.015086752182335756, 'time_step': 0.004438508028364451, 'init_value': -2.293992757797241, 'ave_value': -1.4473518855578906, 'soft_opc': nan} step=5133




2022-04-20 18:29.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182910/model_5133.pt


Epoch 30/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:29.39 [info     ] FQE_20220420182910: epoch=30 step=5310 epoch=30 metrics={'time_sample_batch': 0.00016613733970512778, 'time_algorithm_update': 0.0050770414751128284, 'loss': 0.016808004912362555, 'time_step': 0.005315978648298878, 'init_value': -2.234940528869629, 'ave_value': -1.3902716424730088, 'soft_opc': nan} step=5310




2022-04-20 18:29.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182910/model_5310.pt


Epoch 31/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:29.40 [info     ] FQE_20220420182910: epoch=31 step=5487 epoch=31 metrics={'time_sample_batch': 0.0001690832235045352, 'time_algorithm_update': 0.005095454932606153, 'loss': 0.016653459260008684, 'time_step': 0.005338055939324158, 'init_value': -2.3837640285491943, 'ave_value': -1.4967040878247928, 'soft_opc': nan} step=5487




2022-04-20 18:29.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182910/model_5487.pt


Epoch 32/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:29.41 [info     ] FQE_20220420182910: epoch=32 step=5664 epoch=32 metrics={'time_sample_batch': 0.00016585177620925473, 'time_algorithm_update': 0.005124704985968811, 'loss': 0.01735858216297702, 'time_step': 0.0053681249672410175, 'init_value': -2.4504029750823975, 'ave_value': -1.4957450643539787, 'soft_opc': nan} step=5664




2022-04-20 18:29.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182910/model_5664.pt


Epoch 33/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:29.42 [info     ] FQE_20220420182910: epoch=33 step=5841 epoch=33 metrics={'time_sample_batch': 0.00018105129737638485, 'time_algorithm_update': 0.005011865648172669, 'loss': 0.01805018750808628, 'time_step': 0.005267474610926741, 'init_value': -2.613450050354004, 'ave_value': -1.602346595387917, 'soft_opc': nan} step=5841




2022-04-20 18:29.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182910/model_5841.pt


Epoch 34/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:29.43 [info     ] FQE_20220420182910: epoch=34 step=6018 epoch=34 metrics={'time_sample_batch': 0.00016604439686920683, 'time_algorithm_update': 0.005039511427367475, 'loss': 0.01913660787911507, 'time_step': 0.005282410120559951, 'init_value': -2.7489521503448486, 'ave_value': -1.7124615876166311, 'soft_opc': nan} step=6018




2022-04-20 18:29.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182910/model_6018.pt


Epoch 35/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:29.44 [info     ] FQE_20220420182910: epoch=35 step=6195 epoch=35 metrics={'time_sample_batch': 0.00016715836390263618, 'time_algorithm_update': 0.00511616502104506, 'loss': 0.02059801189507763, 'time_step': 0.005358378092447917, 'init_value': -2.728870153427124, 'ave_value': -1.660601580671004, 'soft_opc': nan} step=6195




2022-04-20 18:29.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182910/model_6195.pt


Epoch 36/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:29.45 [info     ] FQE_20220420182910: epoch=36 step=6372 epoch=36 metrics={'time_sample_batch': 0.00017069962065098648, 'time_algorithm_update': 0.005136011684008237, 'loss': 0.020761962582115188, 'time_step': 0.005382436816975222, 'init_value': -2.8803980350494385, 'ave_value': -1.7245095311610907, 'soft_opc': nan} step=6372




2022-04-20 18:29.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182910/model_6372.pt


Epoch 37/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:29.46 [info     ] FQE_20220420182910: epoch=37 step=6549 epoch=37 metrics={'time_sample_batch': 0.00016586928717834127, 'time_algorithm_update': 0.0050019961965959625, 'loss': 0.020419149565520975, 'time_step': 0.005244528506435244, 'init_value': -3.0393221378326416, 'ave_value': -1.839916509022942, 'soft_opc': nan} step=6549




2022-04-20 18:29.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182910/model_6549.pt


Epoch 38/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:29.47 [info     ] FQE_20220420182910: epoch=38 step=6726 epoch=38 metrics={'time_sample_batch': 0.00016307292011498057, 'time_algorithm_update': 0.0041652008638543596, 'loss': 0.02208288796351286, 'time_step': 0.004402994436059294, 'init_value': -3.1688263416290283, 'ave_value': -1.9244085840992742, 'soft_opc': nan} step=6726




2022-04-20 18:29.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182910/model_6726.pt


Epoch 39/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:29.48 [info     ] FQE_20220420182910: epoch=39 step=6903 epoch=39 metrics={'time_sample_batch': 0.00016880170100152828, 'time_algorithm_update': 0.005172268819000761, 'loss': 0.02302838963493284, 'time_step': 0.005413905375421384, 'init_value': -3.3107783794403076, 'ave_value': -2.034954778210179, 'soft_opc': nan} step=6903




2022-04-20 18:29.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182910/model_6903.pt


Epoch 40/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:29.49 [info     ] FQE_20220420182910: epoch=40 step=7080 epoch=40 metrics={'time_sample_batch': 0.00016549078084654727, 'time_algorithm_update': 0.005181845972093486, 'loss': 0.021333351489919432, 'time_step': 0.0054220506700418765, 'init_value': -3.3691577911376953, 'ave_value': -2.011635441259221, 'soft_opc': nan} step=7080




2022-04-20 18:29.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182910/model_7080.pt


Epoch 41/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:29.50 [info     ] FQE_20220420182910: epoch=41 step=7257 epoch=41 metrics={'time_sample_batch': 0.0001682224920240499, 'time_algorithm_update': 0.005085850839560988, 'loss': 0.025336445659125423, 'time_step': 0.005326564702610511, 'init_value': -3.3718013763427734, 'ave_value': -2.0351584094691204, 'soft_opc': nan} step=7257




2022-04-20 18:29.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182910/model_7257.pt


Epoch 42/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:29.51 [info     ] FQE_20220420182910: epoch=42 step=7434 epoch=42 metrics={'time_sample_batch': 0.00016675022362315722, 'time_algorithm_update': 0.005124019364179191, 'loss': 0.0257414096655945, 'time_step': 0.00536940596197958, 'init_value': -3.3467819690704346, 'ave_value': -1.97316724111189, 'soft_opc': nan} step=7434




2022-04-20 18:29.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182910/model_7434.pt


Epoch 43/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:29.52 [info     ] FQE_20220420182910: epoch=43 step=7611 epoch=43 metrics={'time_sample_batch': 0.00016674887662553517, 'time_algorithm_update': 0.005008428110241217, 'loss': 0.025733554291165313, 'time_step': 0.005252310111697784, 'init_value': -3.532447576522827, 'ave_value': -2.0900249356174614, 'soft_opc': nan} step=7611




2022-04-20 18:29.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182910/model_7611.pt


Epoch 44/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:29.53 [info     ] FQE_20220420182910: epoch=44 step=7788 epoch=44 metrics={'time_sample_batch': 0.00017061880079366392, 'time_algorithm_update': 0.005112431143636758, 'loss': 0.02640683441962205, 'time_step': 0.005360172293280478, 'init_value': -3.509378433227539, 'ave_value': -2.0182981515431906, 'soft_opc': nan} step=7788




2022-04-20 18:29.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182910/model_7788.pt


Epoch 45/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:29.54 [info     ] FQE_20220420182910: epoch=45 step=7965 epoch=45 metrics={'time_sample_batch': 0.00016541534897971289, 'time_algorithm_update': 0.004979365289548022, 'loss': 0.027368589860813346, 'time_step': 0.005219416429767501, 'init_value': -3.591639757156372, 'ave_value': -2.081986162415496, 'soft_opc': nan} step=7965




2022-04-20 18:29.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182910/model_7965.pt


Epoch 46/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:29.55 [info     ] FQE_20220420182910: epoch=46 step=8142 epoch=46 metrics={'time_sample_batch': 0.00016930143711930615, 'time_algorithm_update': 0.005024997527989964, 'loss': 0.02748692620119427, 'time_step': 0.005269426410481081, 'init_value': -3.7065484523773193, 'ave_value': -2.169317121718739, 'soft_opc': nan} step=8142




2022-04-20 18:29.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182910/model_8142.pt


Epoch 47/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:29.56 [info     ] FQE_20220420182910: epoch=47 step=8319 epoch=47 metrics={'time_sample_batch': 0.0001666963437182755, 'time_algorithm_update': 0.004520560388511183, 'loss': 0.028571834137905285, 'time_step': 0.004761515364135053, 'init_value': -3.8094663619995117, 'ave_value': -2.232588069369127, 'soft_opc': nan} step=8319




2022-04-20 18:29.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182910/model_8319.pt


Epoch 48/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:29.57 [info     ] FQE_20220420182910: epoch=48 step=8496 epoch=48 metrics={'time_sample_batch': 0.0001726891361387436, 'time_algorithm_update': 0.005106802040574241, 'loss': 0.029290019589776204, 'time_step': 0.005353578739920578, 'init_value': -3.809300661087036, 'ave_value': -2.2128157839671267, 'soft_opc': nan} step=8496




2022-04-20 18:29.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182910/model_8496.pt


Epoch 49/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:29.58 [info     ] FQE_20220420182910: epoch=49 step=8673 epoch=49 metrics={'time_sample_batch': 0.00017190922451558086, 'time_algorithm_update': 0.005155234687072409, 'loss': 0.029552883406085525, 'time_step': 0.005404862980384611, 'init_value': -3.7988321781158447, 'ave_value': -2.1758376313669903, 'soft_opc': nan} step=8673




2022-04-20 18:29.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182910/model_8673.pt


Epoch 50/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:29.59 [info     ] FQE_20220420182910: epoch=50 step=8850 epoch=50 metrics={'time_sample_batch': 0.00016814032516910532, 'time_algorithm_update': 0.005133950777646512, 'loss': 0.028407850726323444, 'time_step': 0.005374520511950477, 'init_value': -3.932847023010254, 'ave_value': -2.2282133239078092, 'soft_opc': nan} step=8850




2022-04-20 18:29.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182910/model_8850.pt
start
[ 0.00000000e+00  7.95731469e+08  1.43210892e-01 -3.25999953e-02
 -1.38000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.28049971e-01  6.00000000e-01  4.67532035e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.87189108e-01  2.46000047e-02
 -8.00013420e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.61927906e-02  2.08952959e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.53789108e-01  1.32000047e-02
  8.79998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -9.71586383e-02 -6.00000000e-01 -5.33093061e-02]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.39489108e-01 -4.75999953e-02
 -9.30001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  1.41298638e-01  5.43892365e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.9573146

2022-04-20 18:30.00 [info     ] Directory is created at d3rlpy_logs/FQE_20220420183000
2022-04-20 18:30.00 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 18:30.00 [debug    ] Building models...
2022-04-20 18:30.00 [debug    ] Models have been built.
2022-04-20 18:30.00 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420183000/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 18:30.02 [info     ] FQE_20220420183000: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00016576398250668547, 'time_algorithm_update': 0.005032139462093974, 'loss': 0.03009903030253427, 'time_step': 0.005273319954095885, 'init_value': -1.476562738418579, 'ave_value': -1.457188087513855, 'soft_opc': nan} step=344




2022-04-20 18:30.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183000/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:30.04 [info     ] FQE_20220420183000: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.0001728714898575184, 'time_algorithm_update': 0.005021211019782133, 'loss': 0.02582587086498131, 'time_step': 0.005269189906674762, 'init_value': -2.328310012817383, 'ave_value': -2.2828434031691636, 'soft_opc': nan} step=688




2022-04-20 18:30.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183000/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:30.06 [info     ] FQE_20220420183000: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00017065087030100268, 'time_algorithm_update': 0.004728682512460753, 'loss': 0.029188856975240415, 'time_step': 0.004979918862498084, 'init_value': -3.4344825744628906, 'ave_value': -3.337392247944802, 'soft_opc': nan} step=1032




2022-04-20 18:30.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183000/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:30.08 [info     ] FQE_20220420183000: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00017064116721929505, 'time_algorithm_update': 0.005063687646111777, 'loss': 0.03358200165919613, 'time_step': 0.005312497532644937, 'init_value': -4.275895118713379, 'ave_value': -4.106468211020435, 'soft_opc': nan} step=1376




2022-04-20 18:30.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183000/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:30.10 [info     ] FQE_20220420183000: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00017129196677097055, 'time_algorithm_update': 0.0051276316476422685, 'loss': 0.04254446219284694, 'time_step': 0.005373006643250931, 'init_value': -5.311366081237793, 'ave_value': -5.06794542934712, 'soft_opc': nan} step=1720




2022-04-20 18:30.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183000/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:30.11 [info     ] FQE_20220420183000: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00017348001169603924, 'time_algorithm_update': 0.0050270501957383265, 'loss': 0.05181656331148778, 'time_step': 0.005279848741930585, 'init_value': -5.971861839294434, 'ave_value': -5.68885046334954, 'soft_opc': nan} step=2064




2022-04-20 18:30.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183000/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:30.13 [info     ] FQE_20220420183000: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00017047067021214686, 'time_algorithm_update': 0.005058279564214307, 'loss': 0.06333193469651822, 'time_step': 0.005305843297825303, 'init_value': -6.737141132354736, 'ave_value': -6.438968718897652, 'soft_opc': nan} step=2408




2022-04-20 18:30.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183000/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:30.15 [info     ] FQE_20220420183000: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00017329218775727028, 'time_algorithm_update': 0.004805548246516738, 'loss': 0.07286567722587999, 'time_step': 0.005059502845586732, 'init_value': -7.226952075958252, 'ave_value': -6.964141910966184, 'soft_opc': nan} step=2752




2022-04-20 18:30.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183000/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:30.17 [info     ] FQE_20220420183000: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00017331575238427451, 'time_algorithm_update': 0.005118112924487092, 'loss': 0.0841360509498478, 'time_step': 0.00536987116170484, 'init_value': -7.831671714782715, 'ave_value': -7.634191041480045, 'soft_opc': nan} step=3096




2022-04-20 18:30.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183000/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:30.19 [info     ] FQE_20220420183000: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00016730469326640284, 'time_algorithm_update': 0.005007987105569174, 'loss': 0.09723697069404257, 'time_step': 0.005252376545307248, 'init_value': -8.498246192932129, 'ave_value': -8.43113109016296, 'soft_opc': nan} step=3440




2022-04-20 18:30.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183000/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:30.21 [info     ] FQE_20220420183000: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00017417031665181005, 'time_algorithm_update': 0.005098847455756609, 'loss': 0.1098141381544126, 'time_step': 0.005351959965949835, 'init_value': -8.804047584533691, 'ave_value': -8.90227578834102, 'soft_opc': nan} step=3784




2022-04-20 18:30.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183000/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:30.23 [info     ] FQE_20220420183000: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00016886550326680028, 'time_algorithm_update': 0.004615644382876019, 'loss': 0.12313963297949454, 'time_step': 0.004859006682107615, 'init_value': -9.177989959716797, 'ave_value': -9.478578912186109, 'soft_opc': nan} step=4128




2022-04-20 18:30.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183000/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:30.25 [info     ] FQE_20220420183000: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00017235306806342546, 'time_algorithm_update': 0.005109759957291359, 'loss': 0.13547202730876243, 'time_step': 0.005357246759325959, 'init_value': -9.398981094360352, 'ave_value': -9.956876813186131, 'soft_opc': nan} step=4472




2022-04-20 18:30.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183000/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:30.27 [info     ] FQE_20220420183000: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00017574637435203375, 'time_algorithm_update': 0.005041021247242772, 'loss': 0.1528126941289926, 'time_step': 0.005295201789501102, 'init_value': -9.85204792022705, 'ave_value': -10.693037073345653, 'soft_opc': nan} step=4816




2022-04-20 18:30.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183000/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:30.29 [info     ] FQE_20220420183000: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00016960363055384435, 'time_algorithm_update': 0.005004520333090494, 'loss': 0.1606815579498923, 'time_step': 0.005249119082162547, 'init_value': -9.871283531188965, 'ave_value': -10.906624130089558, 'soft_opc': nan} step=5160




2022-04-20 18:30.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183000/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:30.31 [info     ] FQE_20220420183000: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00017363941946695017, 'time_algorithm_update': 0.0050701464331427285, 'loss': 0.16820917621818046, 'time_step': 0.005320173363352931, 'init_value': -10.329133987426758, 'ave_value': -11.656804490261838, 'soft_opc': nan} step=5504




2022-04-20 18:30.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183000/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:30.33 [info     ] FQE_20220420183000: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00017002710076265557, 'time_algorithm_update': 0.004699298808741015, 'loss': 0.17932312198113218, 'time_step': 0.0049452067807663315, 'init_value': -10.263358116149902, 'ave_value': -11.806203151603338, 'soft_opc': nan} step=5848




2022-04-20 18:30.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183000/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:30.35 [info     ] FQE_20220420183000: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00017598895139472428, 'time_algorithm_update': 0.0051506140897440355, 'loss': 0.18897136670623926, 'time_step': 0.0054035519444665244, 'init_value': -10.649349212646484, 'ave_value': -12.404822599012741, 'soft_opc': nan} step=6192




2022-04-20 18:30.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183000/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:30.37 [info     ] FQE_20220420183000: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00017679361409919206, 'time_algorithm_update': 0.005277474259221276, 'loss': 0.20469766341929518, 'time_step': 0.005536509807719741, 'init_value': -10.827735900878906, 'ave_value': -12.783537458604084, 'soft_opc': nan} step=6536




2022-04-20 18:30.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183000/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:30.39 [info     ] FQE_20220420183000: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00017278000365856082, 'time_algorithm_update': 0.005262165568595709, 'loss': 0.21283012762839018, 'time_step': 0.005514214898264686, 'init_value': -11.02021598815918, 'ave_value': -13.148747437223289, 'soft_opc': nan} step=6880




2022-04-20 18:30.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183000/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:30.41 [info     ] FQE_20220420183000: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00017078602036764456, 'time_algorithm_update': 0.005202158939006717, 'loss': 0.23012494108488046, 'time_step': 0.005452795084132705, 'init_value': -11.331881523132324, 'ave_value': -13.701438343545489, 'soft_opc': nan} step=7224




2022-04-20 18:30.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183000/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:30.43 [info     ] FQE_20220420183000: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.000172346830368042, 'time_algorithm_update': 0.004854930694713149, 'loss': 0.23675361244267848, 'time_step': 0.0051075601300527886, 'init_value': -11.528665542602539, 'ave_value': -14.06706814252612, 'soft_opc': nan} step=7568




2022-04-20 18:30.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183000/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:30.45 [info     ] FQE_20220420183000: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.0001690242179604464, 'time_algorithm_update': 0.00513211447139119, 'loss': 0.24354081087594115, 'time_step': 0.005378831264584563, 'init_value': -12.068260192871094, 'ave_value': -14.762200059065059, 'soft_opc': nan} step=7912




2022-04-20 18:30.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183000/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:30.47 [info     ] FQE_20220420183000: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.0001758004343786905, 'time_algorithm_update': 0.005189923352973406, 'loss': 0.26816248106986806, 'time_step': 0.0054476593816003135, 'init_value': -12.338821411132812, 'ave_value': -15.153757066586735, 'soft_opc': nan} step=8256




2022-04-20 18:30.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183000/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:30.49 [info     ] FQE_20220420183000: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00017437477444493495, 'time_algorithm_update': 0.005232427009316378, 'loss': 0.28283347893874483, 'time_step': 0.005492674056873765, 'init_value': -12.792587280273438, 'ave_value': -15.567376762014804, 'soft_opc': nan} step=8600




2022-04-20 18:30.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183000/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:30.51 [info     ] FQE_20220420183000: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00017058294872904933, 'time_algorithm_update': 0.005157930906428848, 'loss': 0.3042380323438624, 'time_step': 0.005408418039942897, 'init_value': -13.045848846435547, 'ave_value': -15.800737859388846, 'soft_opc': nan} step=8944




2022-04-20 18:30.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183000/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:30.53 [info     ] FQE_20220420183000: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.0001774340174918951, 'time_algorithm_update': 0.004775486020154731, 'loss': 0.3059104462262503, 'time_step': 0.005033845818319986, 'init_value': -13.25851058959961, 'ave_value': -15.955160981697777, 'soft_opc': nan} step=9288




2022-04-20 18:30.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183000/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:30.55 [info     ] FQE_20220420183000: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00016877540322237237, 'time_algorithm_update': 0.005100242620290712, 'loss': 0.3226000245032442, 'time_step': 0.005345965540686319, 'init_value': -13.489151000976562, 'ave_value': -16.08159162827457, 'soft_opc': nan} step=9632




2022-04-20 18:30.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183000/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:30.57 [info     ] FQE_20220420183000: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.0001768317333487577, 'time_algorithm_update': 0.005250418602034103, 'loss': 0.3439439275561897, 'time_step': 0.005512247251909833, 'init_value': -14.027299880981445, 'ave_value': -16.657472351577702, 'soft_opc': nan} step=9976




2022-04-20 18:30.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183000/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:30.59 [info     ] FQE_20220420183000: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00017481002696724825, 'time_algorithm_update': 0.0051398124805716585, 'loss': 0.3609323430438201, 'time_step': 0.005396482556365257, 'init_value': -14.39242172241211, 'ave_value': -17.003798757287935, 'soft_opc': nan} step=10320




2022-04-20 18:30.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183000/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:31.01 [info     ] FQE_20220420183000: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.0001710195874058923, 'time_algorithm_update': 0.0051687567733054935, 'loss': 0.3882371247637757, 'time_step': 0.005419468463853348, 'init_value': -15.09915542602539, 'ave_value': -17.642290559257507, 'soft_opc': nan} step=10664




2022-04-20 18:31.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183000/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:31.03 [info     ] FQE_20220420183000: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00016777182734289834, 'time_algorithm_update': 0.004729679157567578, 'loss': 0.407402697019279, 'time_step': 0.004974209291990413, 'init_value': -15.419129371643066, 'ave_value': -17.866740803280425, 'soft_opc': nan} step=11008




2022-04-20 18:31.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183000/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:31.05 [info     ] FQE_20220420183000: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00017001046690829966, 'time_algorithm_update': 0.005140930414199829, 'loss': 0.4327834253307692, 'time_step': 0.005389173363530358, 'init_value': -15.692859649658203, 'ave_value': -18.117567438180853, 'soft_opc': nan} step=11352




2022-04-20 18:31.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183000/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:31.07 [info     ] FQE_20220420183000: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00017440804215364679, 'time_algorithm_update': 0.005197814037633496, 'loss': 0.443323926805237, 'time_step': 0.005448738502901654, 'init_value': -16.016281127929688, 'ave_value': -18.461517910102618, 'soft_opc': nan} step=11696




2022-04-20 18:31.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183000/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:31.09 [info     ] FQE_20220420183000: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.0001737343710522319, 'time_algorithm_update': 0.005190224841583607, 'loss': 0.46179466576553707, 'time_step': 0.00544216327889021, 'init_value': -16.128196716308594, 'ave_value': -18.381718882980866, 'soft_opc': nan} step=12040




2022-04-20 18:31.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183000/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:31.11 [info     ] FQE_20220420183000: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00017007769540298816, 'time_algorithm_update': 0.005104660294776739, 'loss': 0.48736235410581497, 'time_step': 0.005354910395866217, 'init_value': -16.654550552368164, 'ave_value': -18.695894644688813, 'soft_opc': nan} step=12384




2022-04-20 18:31.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183000/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:31.13 [info     ] FQE_20220420183000: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00017212712487509085, 'time_algorithm_update': 0.005047052405601324, 'loss': 0.4932059820763074, 'time_step': 0.005300131648085838, 'init_value': -16.623077392578125, 'ave_value': -18.507102112740547, 'soft_opc': nan} step=12728




2022-04-20 18:31.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183000/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:31.15 [info     ] FQE_20220420183000: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.0001718339531920677, 'time_algorithm_update': 0.00513466291649397, 'loss': 0.516778916965217, 'time_step': 0.005385418270909509, 'init_value': -16.438987731933594, 'ave_value': -18.18691351297605, 'soft_opc': nan} step=13072




2022-04-20 18:31.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183000/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:31.17 [info     ] FQE_20220420183000: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00017440111138099847, 'time_algorithm_update': 0.005148472480995711, 'loss': 0.5431469795077519, 'time_step': 0.005402291236921798, 'init_value': -16.670604705810547, 'ave_value': -18.272361928752314, 'soft_opc': nan} step=13416




2022-04-20 18:31.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183000/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:31.19 [info     ] FQE_20220420183000: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00016688330228938612, 'time_algorithm_update': 0.005038263492806013, 'loss': 0.5697755322380121, 'time_step': 0.005282420751660369, 'init_value': -17.381553649902344, 'ave_value': -18.80959146571312, 'soft_opc': nan} step=13760




2022-04-20 18:31.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183000/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:31.21 [info     ] FQE_20220420183000: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.0001722553441690844, 'time_algorithm_update': 0.004764349654663441, 'loss': 0.6052038158441699, 'time_step': 0.005016595125198364, 'init_value': -17.877439498901367, 'ave_value': -19.095306173823605, 'soft_opc': nan} step=14104




2022-04-20 18:31.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183000/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:31.22 [info     ] FQE_20220420183000: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.0001698552176009777, 'time_algorithm_update': 0.0037317061147024463, 'loss': 0.6372971664897578, 'time_step': 0.003982310378274252, 'init_value': -17.786762237548828, 'ave_value': -18.907404258252786, 'soft_opc': nan} step=14448




2022-04-20 18:31.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183000/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:31.24 [info     ] FQE_20220420183000: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00016858134158821993, 'time_algorithm_update': 0.00373564972433933, 'loss': 0.6640201221371806, 'time_step': 0.00398040164348691, 'init_value': -18.0661678314209, 'ave_value': -19.07400887954544, 'soft_opc': nan} step=14792




2022-04-20 18:31.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183000/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:31.25 [info     ] FQE_20220420183000: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00016789796740509743, 'time_algorithm_update': 0.0037559111450993737, 'loss': 0.71551488047509, 'time_step': 0.00400157376777294, 'init_value': -18.550289154052734, 'ave_value': -19.49775964072318, 'soft_opc': nan} step=15136




2022-04-20 18:31.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183000/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:31.27 [info     ] FQE_20220420183000: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.0001683990622675696, 'time_algorithm_update': 0.0037813553976458174, 'loss': 0.7570515955733352, 'time_step': 0.004026024840598883, 'init_value': -18.622568130493164, 'ave_value': -19.506288072987047, 'soft_opc': nan} step=15480




2022-04-20 18:31.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183000/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:31.28 [info     ] FQE_20220420183000: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.0001645497111387031, 'time_algorithm_update': 0.0036530674890030263, 'loss': 0.7850028969737333, 'time_step': 0.003894882839779521, 'init_value': -18.596681594848633, 'ave_value': -19.23662265335201, 'soft_opc': nan} step=15824




2022-04-20 18:31.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183000/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:31.29 [info     ] FQE_20220420183000: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.0001658838848735011, 'time_algorithm_update': 0.003789521233980046, 'loss': 0.805468013185228, 'time_step': 0.00403362512588501, 'init_value': -18.940465927124023, 'ave_value': -19.457686635519604, 'soft_opc': nan} step=16168




2022-04-20 18:31.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183000/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:31.31 [info     ] FQE_20220420183000: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00016507575678270916, 'time_algorithm_update': 0.003733233656994132, 'loss': 0.8413558769213079, 'time_step': 0.0039754135664119275, 'init_value': -19.251731872558594, 'ave_value': -19.647350356979548, 'soft_opc': nan} step=16512




2022-04-20 18:31.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183000/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:31.32 [info     ] FQE_20220420183000: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.0001655297223911729, 'time_algorithm_update': 0.0036819923755734467, 'loss': 0.8648402204000672, 'time_step': 0.003922927518223607, 'init_value': -19.257076263427734, 'ave_value': -19.471714385815368, 'soft_opc': nan} step=16856




2022-04-20 18:31.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183000/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:31.34 [info     ] FQE_20220420183000: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.0001666407252466956, 'time_algorithm_update': 0.0037183692288953202, 'loss': 0.8972300221753675, 'time_step': 0.003963417092034983, 'init_value': -19.491153717041016, 'ave_value': -19.490729618805403, 'soft_opc': nan} step=17200




2022-04-20 18:31.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183000/model_17200.pt
search iteration:  36
using hyper params:  [0.009405397152397018, 0.001588563066816578, 3.547561909502586e-05, 5]
2022-04-20 18:31.34 [debug    ] RoundIterator is selected.
2022-04-20 18:31.34 [info     ] Directory is created at d3rlpy_logs/TD3PlusBC_20220420183134
2022-04-20 18:31.34 [debug    ] Fitting scaler...              scaler=standard
2022-04-20 18:31.34 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 18:31.34 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 18:31.34 [info     ] Parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183134/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.00940539715239

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:31.37 [info     ] TD3PlusBC_20220420183134: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.0003854831059773763, 'time_algorithm_update': 0.0071005270495052225, 'critic_loss': 12.222975184345803, 'actor_loss': 2.684385388915302, 'time_step': 0.007566083244412963, 'td_error': 0.9347296374335784, 'init_value': -8.021628379821777, 'ave_value': -4.944172991633516} step=342
2022-04-20 18:31.37 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183134/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:31.40 [info     ] TD3PlusBC_20220420183134: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.00036831487689101906, 'time_algorithm_update': 0.007028189318919042, 'critic_loss': 3.3491898712358976, 'actor_loss': 2.5593602964055466, 'time_step': 0.00747554762321606, 'td_error': 1.0467630356347863, 'init_value': -11.247973442077637, 'ave_value': -6.9912101404482145} step=684
2022-04-20 18:31.40 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183134/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:31.43 [info     ] TD3PlusBC_20220420183134: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.00037719492326702985, 'time_algorithm_update': 0.0071105357499150505, 'critic_loss': 4.765219600228538, 'actor_loss': 2.5451772324523034, 'time_step': 0.007565182552003024, 'td_error': 1.2639658493394392, 'init_value': -14.919076919555664, 'ave_value': -9.288685553057313} step=1026
2022-04-20 18:31.43 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183134/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:31.46 [info     ] TD3PlusBC_20220420183134: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.0003766093337744997, 'time_algorithm_update': 0.007134533765023215, 'critic_loss': 6.597449094928496, 'actor_loss': 2.5359282870041695, 'time_step': 0.007591148566084298, 'td_error': 1.5503263775792617, 'init_value': -18.646564483642578, 'ave_value': -11.660489297546237} step=1368
2022-04-20 18:31.46 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183134/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:31.49 [info     ] TD3PlusBC_20220420183134: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.00037466085445114045, 'time_algorithm_update': 0.007075897434301544, 'critic_loss': 8.627129547777232, 'actor_loss': 2.53187020062006, 'time_step': 0.007527777326037312, 'td_error': 1.9319407037094634, 'init_value': -22.427555084228516, 'ave_value': -14.01483110002628} step=1710
2022-04-20 18:31.49 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183134/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:31.52 [info     ] TD3PlusBC_20220420183134: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.0003714986711914776, 'time_algorithm_update': 0.006965287247596428, 'critic_loss': 10.830842231449328, 'actor_loss': 2.5276816658109253, 'time_step': 0.007407505609835797, 'td_error': 2.347266479900535, 'init_value': -25.92569351196289, 'ave_value': -16.262235411501464} step=2052
2022-04-20 18:31.52 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183134/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:31.55 [info     ] TD3PlusBC_20220420183134: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.0003707150966800444, 'time_algorithm_update': 0.007045634308753655, 'critic_loss': 13.291863602504396, 'actor_loss': 2.5274943050585295, 'time_step': 0.007489845069528323, 'td_error': 2.814588692299128, 'init_value': -29.390268325805664, 'ave_value': -18.429452510458912} step=2394
2022-04-20 18:31.55 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183134/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:31.58 [info     ] TD3PlusBC_20220420183134: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.0003808862284610146, 'time_algorithm_update': 0.007070395681593154, 'critic_loss': 15.750824033168325, 'actor_loss': 2.525079714624505, 'time_step': 0.007522051794487133, 'td_error': 3.2836224162413754, 'init_value': -32.67388153076172, 'ave_value': -20.6165577486144} step=2736
2022-04-20 18:31.58 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183134/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:32.01 [info     ] TD3PlusBC_20220420183134: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.0003764385368391784, 'time_algorithm_update': 0.006464537821317974, 'critic_loss': 18.40078478110464, 'actor_loss': 2.5239751910605626, 'time_step': 0.00691925921635321, 'td_error': 3.783032919494297, 'init_value': -35.88646697998047, 'ave_value': -22.646556197613904} step=3078
2022-04-20 18:32.01 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183134/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:32.04 [info     ] TD3PlusBC_20220420183134: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.0003689562368114092, 'time_algorithm_update': 0.00690375364314743, 'critic_loss': 21.344915444390814, 'actor_loss': 2.524082765244601, 'time_step': 0.007349143948471337, 'td_error': 4.288263481557546, 'init_value': -38.784393310546875, 'ave_value': -24.511948844051105} step=3420
2022-04-20 18:32.04 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183134/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:32.07 [info     ] TD3PlusBC_20220420183134: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.0003731292590760348, 'time_algorithm_update': 0.006973862648010254, 'critic_loss': 24.544729943861043, 'actor_loss': 2.5226732039312174, 'time_step': 0.007424350370440567, 'td_error': 4.802391649253687, 'init_value': -42.05820083618164, 'ave_value': -26.619666952343096} step=3762
2022-04-20 18:32.07 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183134/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:32.10 [info     ] TD3PlusBC_20220420183134: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.0003804505219933582, 'time_algorithm_update': 0.007063899821008158, 'critic_loss': 27.866954382400067, 'actor_loss': 2.523298680433753, 'time_step': 0.007519299523872242, 'td_error': 5.29021662691993, 'init_value': -44.82500457763672, 'ave_value': -28.440869435097117} step=4104
2022-04-20 18:32.10 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183134/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:32.13 [info     ] TD3PlusBC_20220420183134: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.00037277790538051673, 'time_algorithm_update': 0.006995331474214966, 'critic_loss': 31.518141568055626, 'actor_loss': 2.524815270775243, 'time_step': 0.007442977693345811, 'td_error': 5.6819802875033085, 'init_value': -46.90557098388672, 'ave_value': -29.91324219016803} step=4446
2022-04-20 18:32.13 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183134/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:32.15 [info     ] TD3PlusBC_20220420183134: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.00037172523855465893, 'time_algorithm_update': 0.006955726104870177, 'critic_loss': 35.40294199937966, 'actor_loss': 2.5230016750201845, 'time_step': 0.007402414466902527, 'td_error': 6.231627745101877, 'init_value': -50.10817337036133, 'ave_value': -31.810234937203862} step=4788
2022-04-20 18:32.15 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183134/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:32.18 [info     ] TD3PlusBC_20220420183134: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.000379644639310781, 'time_algorithm_update': 0.006985350659019069, 'critic_loss': 39.219929209926676, 'actor_loss': 2.5223389898824413, 'time_step': 0.007444342674567685, 'td_error': 6.554201446192467, 'init_value': -52.27814865112305, 'ave_value': -33.37792130668421} step=5130
2022-04-20 18:32.18 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183134/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:32.21 [info     ] TD3PlusBC_20220420183134: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.0003800698888232136, 'time_algorithm_update': 0.006893019927175422, 'critic_loss': 43.48264718195151, 'actor_loss': 2.5243537216855767, 'time_step': 0.0073519401382981684, 'td_error': 7.021853462830593, 'init_value': -54.84412384033203, 'ave_value': -34.953110881869485} step=5472
2022-04-20 18:32.21 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183134/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:32.24 [info     ] TD3PlusBC_20220420183134: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.00037003051467806273, 'time_algorithm_update': 0.0069124405844169755, 'critic_loss': 48.0243439088788, 'actor_loss': 2.5222447782929183, 'time_step': 0.0073579619502463535, 'td_error': 7.379666986978017, 'init_value': -57.00421142578125, 'ave_value': -36.4435764052958} step=5814
2022-04-20 18:32.24 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183134/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:32.27 [info     ] TD3PlusBC_20220420183134: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.0003745290968153212, 'time_algorithm_update': 0.006987292864169293, 'critic_loss': 52.73619887703344, 'actor_loss': 2.5235038068559437, 'time_step': 0.007436043337771767, 'td_error': 7.75329539448783, 'init_value': -59.29335403442383, 'ave_value': -38.00529656536592} step=6156
2022-04-20 18:32.27 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183134/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:32.30 [info     ] TD3PlusBC_20220420183134: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.00037461205532676294, 'time_algorithm_update': 0.0069176962501124335, 'critic_loss': 57.58281364775541, 'actor_loss': 2.5236702244184173, 'time_step': 0.007369326569183528, 'td_error': 7.965662319864096, 'init_value': -60.386329650878906, 'ave_value': -38.83469451549949} step=6498
2022-04-20 18:32.30 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183134/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:32.33 [info     ] TD3PlusBC_20220420183134: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.00037657238586604245, 'time_algorithm_update': 0.006904507938184236, 'critic_loss': 62.487287381936234, 'actor_loss': 2.523467181021707, 'time_step': 0.007356297202974732, 'td_error': 8.429222347787803, 'init_value': -62.3382682800293, 'ave_value': -40.38247834507368} step=6840
2022-04-20 18:32.33 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183134/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:32.36 [info     ] TD3PlusBC_20220420183134: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.0003743387802302489, 'time_algorithm_update': 0.007001341434947231, 'critic_loss': 67.42445029989321, 'actor_loss': 2.522225438502797, 'time_step': 0.007452902040983501, 'td_error': 8.706884802846663, 'init_value': -64.30522155761719, 'ave_value': -41.451148659802705} step=7182
2022-04-20 18:32.36 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183134/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:32.39 [info     ] TD3PlusBC_20220420183134: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.00037056521365517065, 'time_algorithm_update': 0.006907320859139426, 'critic_loss': 72.64890864857456, 'actor_loss': 2.5228264345760234, 'time_step': 0.007354040592037446, 'td_error': 9.123739331631857, 'init_value': -66.7109603881836, 'ave_value': -42.761196172574856} step=7524
2022-04-20 18:32.39 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183134/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:32.42 [info     ] TD3PlusBC_20220420183134: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.00037560128329093, 'time_algorithm_update': 0.006858127158984803, 'critic_loss': 77.55493487530981, 'actor_loss': 2.523445682916028, 'time_step': 0.0073095838925991835, 'td_error': 9.247789880768057, 'init_value': -67.88858032226562, 'ave_value': -43.9315233321133} step=7866
2022-04-20 18:32.42 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183134/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:32.45 [info     ] TD3PlusBC_20220420183134: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.0003775413970501102, 'time_algorithm_update': 0.0070506320362202606, 'critic_loss': 82.38910676164237, 'actor_loss': 2.5230936850720678, 'time_step': 0.007508574173464413, 'td_error': 9.569717163047057, 'init_value': -69.58074951171875, 'ave_value': -44.91950872663215} step=8208
2022-04-20 18:32.45 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183134/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:32.48 [info     ] TD3PlusBC_20220420183134: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.0003760753319277401, 'time_algorithm_update': 0.006929772639135171, 'critic_loss': 87.07809061195418, 'actor_loss': 2.5243532824934576, 'time_step': 0.00738448497147588, 'td_error': 9.935878839798846, 'init_value': -71.11612701416016, 'ave_value': -45.98502138339335} step=8550
2022-04-20 18:32.48 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183134/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:32.51 [info     ] TD3PlusBC_20220420183134: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.0003682291298581843, 'time_algorithm_update': 0.006796753894515902, 'critic_loss': 91.98491060803508, 'actor_loss': 2.523841887189631, 'time_step': 0.007242353338944285, 'td_error': 10.098831203460074, 'init_value': -72.14421081542969, 'ave_value': -46.94963962265273} step=8892
2022-04-20 18:32.51 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183134/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:32.54 [info     ] TD3PlusBC_20220420183134: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.0003745751074183057, 'time_algorithm_update': 0.00685313222003959, 'critic_loss': 96.18401159319961, 'actor_loss': 2.5234996087369863, 'time_step': 0.007304077259978356, 'td_error': 10.40053366431314, 'init_value': -73.55725860595703, 'ave_value': -47.98617806753085} step=9234
2022-04-20 18:32.54 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183134/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:32.57 [info     ] TD3PlusBC_20220420183134: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.00036930410485518604, 'time_algorithm_update': 0.00690668577339217, 'critic_loss': 100.27869080660636, 'actor_loss': 2.5229948863648533, 'time_step': 0.007344513608698259, 'td_error': 10.677629965335402, 'init_value': -75.17179107666016, 'ave_value': -48.937304759117616} step=9576
2022-04-20 18:32.57 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183134/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:32.59 [info     ] TD3PlusBC_20220420183134: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.0003712637382641173, 'time_algorithm_update': 0.006901689440186261, 'critic_loss': 104.58852156700446, 'actor_loss': 2.524488852038021, 'time_step': 0.007332760688157109, 'td_error': 10.967065966645379, 'init_value': -75.7802734375, 'ave_value': -49.68298808713905} step=9918
2022-04-20 18:32.59 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183134/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:33.02 [info     ] TD3PlusBC_20220420183134: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.0003694783874422486, 'time_algorithm_update': 0.006887636686626233, 'critic_loss': 107.9939248603687, 'actor_loss': 2.5237044744324266, 'time_step': 0.007322654389498527, 'td_error': 10.929913549267189, 'init_value': -76.2405014038086, 'ave_value': -50.40576773134807} step=10260
2022-04-20 18:33.02 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183134/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:33.05 [info     ] TD3PlusBC_20220420183134: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.0003715586244014272, 'time_algorithm_update': 0.006829866191797089, 'critic_loss': 111.49412480850665, 'actor_loss': 2.5251445909689743, 'time_step': 0.007272015538131981, 'td_error': 11.189236408056322, 'init_value': -78.12010192871094, 'ave_value': -51.38701082181404} step=10602
2022-04-20 18:33.05 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183134/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:33.08 [info     ] TD3PlusBC_20220420183134: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.0003732442855834961, 'time_algorithm_update': 0.006816479197719641, 'critic_loss': 114.86076644986693, 'actor_loss': 2.5236117365764597, 'time_step': 0.00725274546104565, 'td_error': 11.270380469984053, 'init_value': -78.36415100097656, 'ave_value': -51.97370023739029} step=10944
2022-04-20 18:33.08 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183134/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:33.11 [info     ] TD3PlusBC_20220420183134: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.0003733349125287686, 'time_algorithm_update': 0.006892339527955529, 'critic_loss': 117.88359745761804, 'actor_loss': 2.5250892555504514, 'time_step': 0.007329117484957154, 'td_error': 11.699448366183054, 'init_value': -80.15933990478516, 'ave_value': -52.96830044077063} step=11286
2022-04-20 18:33.11 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183134/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:33.14 [info     ] TD3PlusBC_20220420183134: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.00037404249983224254, 'time_algorithm_update': 0.0068332556395502815, 'critic_loss': 121.09786904764454, 'actor_loss': 2.524589850888615, 'time_step': 0.007270553655791701, 'td_error': 11.943819690171617, 'init_value': -81.61790466308594, 'ave_value': -53.84858872899945} step=11628
2022-04-20 18:33.14 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183134/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:33.17 [info     ] TD3PlusBC_20220420183134: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.0003776794288590638, 'time_algorithm_update': 0.006798518331427323, 'critic_loss': 123.89833416855126, 'actor_loss': 2.5259120408554523, 'time_step': 0.007239730734574168, 'td_error': 12.067155583226493, 'init_value': -82.20409393310547, 'ave_value': -54.63646098169985} step=11970
2022-04-20 18:33.17 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183134/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:33.20 [info     ] TD3PlusBC_20220420183134: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.00037868399369089226, 'time_algorithm_update': 0.006855505251745034, 'critic_loss': 126.4787295380531, 'actor_loss': 2.5266119694849203, 'time_step': 0.007301216934159485, 'td_error': 12.38491644552696, 'init_value': -82.86878967285156, 'ave_value': -55.14786681158183} step=12312
2022-04-20 18:33.20 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183134/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:33.23 [info     ] TD3PlusBC_20220420183134: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.00037297728466011626, 'time_algorithm_update': 0.0067881136609796895, 'critic_loss': 128.90361712271707, 'actor_loss': 2.526714829673544, 'time_step': 0.007224530504460921, 'td_error': 12.382860515689735, 'init_value': -83.5291519165039, 'ave_value': -55.7898506150398} step=12654
2022-04-20 18:33.23 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183134/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:33.26 [info     ] TD3PlusBC_20220420183134: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.00038219683351572493, 'time_algorithm_update': 0.007882755402235956, 'critic_loss': 131.44079719231144, 'actor_loss': 2.528121262265925, 'time_step': 0.008331938793784693, 'td_error': 12.036887678309336, 'init_value': -82.95536804199219, 'ave_value': -56.247036551281575} step=12996
2022-04-20 18:33.26 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183134/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:33.30 [info     ] TD3PlusBC_20220420183134: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.0003774688954938922, 'time_algorithm_update': 0.009011923221119662, 'critic_loss': 133.86068203574732, 'actor_loss': 2.527850842615317, 'time_step': 0.009456064965989854, 'td_error': 12.592739399416438, 'init_value': -84.32003021240234, 'ave_value': -57.001184053602415} step=13338
2022-04-20 18:33.30 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183134/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:33.33 [info     ] TD3PlusBC_20220420183134: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.00037494458650287827, 'time_algorithm_update': 0.008848186821965446, 'critic_loss': 135.92406425699156, 'actor_loss': 2.52777487910979, 'time_step': 0.00928211630436412, 'td_error': 12.768431028987957, 'init_value': -84.24160766601562, 'ave_value': -57.01741485813968} step=13680
2022-04-20 18:33.33 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183134/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:33.37 [info     ] TD3PlusBC_20220420183134: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.00037241260907803364, 'time_algorithm_update': 0.00881884739412899, 'critic_loss': 138.5664043761136, 'actor_loss': 2.527088004943223, 'time_step': 0.009256353155214187, 'td_error': 12.777412715616704, 'init_value': -85.384033203125, 'ave_value': -57.95040141900867} step=14022
2022-04-20 18:33.37 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183134/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:33.41 [info     ] TD3PlusBC_20220420183134: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.0003808681030719601, 'time_algorithm_update': 0.008913179587202462, 'critic_loss': 140.79939096015795, 'actor_loss': 2.5272000296074046, 'time_step': 0.009363614327726309, 'td_error': 13.327495021447463, 'init_value': -87.23756408691406, 'ave_value': -58.88724917786196} step=14364
2022-04-20 18:33.41 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183134/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:33.44 [info     ] TD3PlusBC_20220420183134: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.00038090086819832783, 'time_algorithm_update': 0.008591209238732767, 'critic_loss': 142.66552056206598, 'actor_loss': 2.529607293201469, 'time_step': 0.009036937652275576, 'td_error': 12.991583080408098, 'init_value': -85.92787170410156, 'ave_value': -58.8997979540737} step=14706
2022-04-20 18:33.45 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183134/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:33.48 [info     ] TD3PlusBC_20220420183134: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.0003733997456511559, 'time_algorithm_update': 0.00889901738417776, 'critic_loss': 144.5915531136139, 'actor_loss': 2.52912758247197, 'time_step': 0.009334310453537612, 'td_error': 13.012307824391076, 'init_value': -86.10372161865234, 'ave_value': -59.269335413718814} step=15048
2022-04-20 18:33.48 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183134/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:33.52 [info     ] TD3PlusBC_20220420183134: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.0003750142995377033, 'time_algorithm_update': 0.00894825151789258, 'critic_loss': 146.51741409301758, 'actor_loss': 2.530025503091645, 'time_step': 0.009389065162480226, 'td_error': 13.296951384739158, 'init_value': -86.47159576416016, 'ave_value': -59.77610974072336} step=15390
2022-04-20 18:33.52 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183134/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:33.56 [info     ] TD3PlusBC_20220420183134: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.000378574544226217, 'time_algorithm_update': 0.008788922376800002, 'critic_loss': 148.11514197455512, 'actor_loss': 2.529977017675924, 'time_step': 0.009228425416332937, 'td_error': 13.331967232932225, 'init_value': -86.4505615234375, 'ave_value': -60.087842700088416} step=15732
2022-04-20 18:33.56 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183134/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:33.59 [info     ] TD3PlusBC_20220420183134: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.00037770034276951125, 'time_algorithm_update': 0.008805450640226664, 'critic_loss': 149.83311930873938, 'actor_loss': 2.5310418870713978, 'time_step': 0.009249326081303825, 'td_error': 13.552592454203667, 'init_value': -86.69287872314453, 'ave_value': -60.58686393680001} step=16074
2022-04-20 18:33.59 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183134/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:34.03 [info     ] TD3PlusBC_20220420183134: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.0003800210896988361, 'time_algorithm_update': 0.008512347762347662, 'critic_loss': 151.2782883002744, 'actor_loss': 2.5312760350299857, 'time_step': 0.008959533178318314, 'td_error': 13.607756966032499, 'init_value': -86.4213638305664, 'ave_value': -60.99170139388123} step=16416
2022-04-20 18:34.03 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183134/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:34.07 [info     ] TD3PlusBC_20220420183134: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.0003811086130421064, 'time_algorithm_update': 0.008970519255476388, 'critic_loss': 152.57495833279793, 'actor_loss': 2.5318826680992084, 'time_step': 0.009421499151932565, 'td_error': 13.935846968447326, 'init_value': -87.38987731933594, 'ave_value': -61.423306386265736} step=16758
2022-04-20 18:34.07 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183134/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:34.10 [info     ] TD3PlusBC_20220420183134: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.00037662188212076825, 'time_algorithm_update': 0.008820049246849372, 'critic_loss': 154.37362851594625, 'actor_loss': 2.5323789454343024, 'time_step': 0.009259889697470861, 'td_error': 14.078768340722464, 'init_value': -87.94722747802734, 'ave_value': -61.85288743932312} step=17100
2022-04-20 18:34.10 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183134/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 102 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.28189108e-01

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 18:34.11 [info     ] FQE_20220420183410: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00016120542962867092, 'time_algorithm_update': 0.004563578640122011, 'loss': 0.006998085141944957, 'time_step': 0.004798110709132919, 'init_value': -0.03759775310754776, 'ave_value': 0.03873462324525725, 'soft_opc': nan} step=166




2022-04-20 18:34.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183410/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:34.12 [info     ] FQE_20220420183410: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.0001573634434895343, 'time_algorithm_update': 0.004992476428847715, 'loss': 0.005350050303232239, 'time_step': 0.005221928458615958, 'init_value': -0.18762391805648804, 'ave_value': -0.06217260781574894, 'soft_opc': nan} step=332




2022-04-20 18:34.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183410/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:34.13 [info     ] FQE_20220420183410: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00016257992709975644, 'time_algorithm_update': 0.005106723452188882, 'loss': 0.0047972111805645095, 'time_step': 0.0053393582263624815, 'init_value': -0.21722349524497986, 'ave_value': -0.07611644035876468, 'soft_opc': nan} step=498




2022-04-20 18:34.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183410/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:34.14 [info     ] FQE_20220420183410: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00016037383711481668, 'time_algorithm_update': 0.0049724205430731715, 'loss': 0.004465281189116369, 'time_step': 0.005204220852219915, 'init_value': -0.3088071942329407, 'ave_value': -0.13806063170872024, 'soft_opc': nan} step=664




2022-04-20 18:34.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183410/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:34.15 [info     ] FQE_20220420183410: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00016174546207290097, 'time_algorithm_update': 0.004984192101352186, 'loss': 0.004067157695339207, 'time_step': 0.0052180376397558004, 'init_value': -0.36414384841918945, 'ave_value': -0.17072673427981433, 'soft_opc': nan} step=830




2022-04-20 18:34.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183410/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:34.16 [info     ] FQE_20220420183410: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00016005498817168087, 'time_algorithm_update': 0.004993509097271655, 'loss': 0.0036859520429083294, 'time_step': 0.005227551402815853, 'init_value': -0.3980199992656708, 'ave_value': -0.18716123063402534, 'soft_opc': nan} step=996




2022-04-20 18:34.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183410/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:34.17 [info     ] FQE_20220420183410: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00015983954969658908, 'time_algorithm_update': 0.0050122924597866565, 'loss': 0.0035012770459583275, 'time_step': 0.005239854376000094, 'init_value': -0.44297826290130615, 'ave_value': -0.19045600384591266, 'soft_opc': nan} step=1162




2022-04-20 18:34.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183410/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:34.18 [info     ] FQE_20220420183410: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00016411241278590927, 'time_algorithm_update': 0.00508918245154691, 'loss': 0.0033528644672920368, 'time_step': 0.0053288721176515145, 'init_value': -0.5319721698760986, 'ave_value': -0.23620628051896084, 'soft_opc': nan} step=1328




2022-04-20 18:34.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183410/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:34.19 [info     ] FQE_20220420183410: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00016112930803413852, 'time_algorithm_update': 0.005063548145523991, 'loss': 0.0031451573690079735, 'time_step': 0.0052995236523180125, 'init_value': -0.5704636573791504, 'ave_value': -0.24472230967922926, 'soft_opc': nan} step=1494




2022-04-20 18:34.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183410/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:34.20 [info     ] FQE_20220420183410: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00016433934131300593, 'time_algorithm_update': 0.004096569785152574, 'loss': 0.0033912094939690845, 'time_step': 0.004335189440164222, 'init_value': -0.6719273328781128, 'ave_value': -0.2922366959588216, 'soft_opc': nan} step=1660




2022-04-20 18:34.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183410/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:34.21 [info     ] FQE_20220420183410: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00016451025583657874, 'time_algorithm_update': 0.005177291042833443, 'loss': 0.0033933468996154435, 'time_step': 0.005412136215761483, 'init_value': -0.7617602944374084, 'ave_value': -0.3486909627176083, 'soft_opc': nan} step=1826




2022-04-20 18:34.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183410/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:34.22 [info     ] FQE_20220420183410: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00016582012176513672, 'time_algorithm_update': 0.005022622016539057, 'loss': 0.0034342270726001405, 'time_step': 0.005261796066559941, 'init_value': -0.7848906517028809, 'ave_value': -0.33053936450448523, 'soft_opc': nan} step=1992




2022-04-20 18:34.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183410/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:34.23 [info     ] FQE_20220420183410: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00016768438270293087, 'time_algorithm_update': 0.005070468029343939, 'loss': 0.0039800841807309225, 'time_step': 0.005313850310911615, 'init_value': -0.911615252494812, 'ave_value': -0.41580011772226777, 'soft_opc': nan} step=2158




2022-04-20 18:34.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183410/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:34.24 [info     ] FQE_20220420183410: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.000166947583118117, 'time_algorithm_update': 0.005089939358722733, 'loss': 0.004041665313619536, 'time_step': 0.005335401339703296, 'init_value': -0.9931871891021729, 'ave_value': -0.4544886392725816, 'soft_opc': nan} step=2324




2022-04-20 18:34.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183410/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:34.25 [info     ] FQE_20220420183410: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00016400038477886155, 'time_algorithm_update': 0.0050759143139942585, 'loss': 0.004374519375528781, 'time_step': 0.005314739353685494, 'init_value': -1.0602110624313354, 'ave_value': -0.48808959886633063, 'soft_opc': nan} step=2490




2022-04-20 18:34.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183410/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:34.26 [info     ] FQE_20220420183410: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00016733824488628343, 'time_algorithm_update': 0.005099005009754595, 'loss': 0.004939747268420992, 'time_step': 0.005342334149831749, 'init_value': -1.2084197998046875, 'ave_value': -0.5950962916276678, 'soft_opc': nan} step=2656




2022-04-20 18:34.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183410/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:34.27 [info     ] FQE_20220420183410: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00016433934131300593, 'time_algorithm_update': 0.005077451108449913, 'loss': 0.005486229332220034, 'time_step': 0.005315542221069336, 'init_value': -1.3122217655181885, 'ave_value': -0.6636106915075567, 'soft_opc': nan} step=2822




2022-04-20 18:34.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183410/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:34.28 [info     ] FQE_20220420183410: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00016251816807023013, 'time_algorithm_update': 0.004991555788430823, 'loss': 0.006214907019068094, 'time_step': 0.005228499332106257, 'init_value': -1.356536626815796, 'ave_value': -0.6708204109280429, 'soft_opc': nan} step=2988




2022-04-20 18:34.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183410/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:34.28 [info     ] FQE_20220420183410: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00015831424529293934, 'time_algorithm_update': 0.0040304387908384025, 'loss': 0.006812902261413275, 'time_step': 0.00426023121339729, 'init_value': -1.4699522256851196, 'ave_value': -0.7619534014212387, 'soft_opc': nan} step=3154




2022-04-20 18:34.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183410/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:34.29 [info     ] FQE_20220420183410: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00016763267746890885, 'time_algorithm_update': 0.004903586514024849, 'loss': 0.007266715117056955, 'time_step': 0.0051520703786827, 'init_value': -1.540313482284546, 'ave_value': -0.8008290129789227, 'soft_opc': nan} step=3320




2022-04-20 18:34.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183410/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:34.30 [info     ] FQE_20220420183410: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00016559319323804006, 'time_algorithm_update': 0.005064065197864211, 'loss': 0.008332414428439799, 'time_step': 0.005309771342449878, 'init_value': -1.6439175605773926, 'ave_value': -0.8657309095266166, 'soft_opc': nan} step=3486




2022-04-20 18:34.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183410/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:34.31 [info     ] FQE_20220420183410: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00016382947025528872, 'time_algorithm_update': 0.005018956689949495, 'loss': 0.009134638999134347, 'time_step': 0.00525707509144243, 'init_value': -1.7395408153533936, 'ave_value': -0.9426670676642643, 'soft_opc': nan} step=3652




2022-04-20 18:34.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183410/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:34.32 [info     ] FQE_20220420183410: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.0001677260341414486, 'time_algorithm_update': 0.004980289792440024, 'loss': 0.009730709756651601, 'time_step': 0.005223720906728722, 'init_value': -1.8340439796447754, 'ave_value': -0.9981022342754242, 'soft_opc': nan} step=3818




2022-04-20 18:34.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183410/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:34.33 [info     ] FQE_20220420183410: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00016852028398628695, 'time_algorithm_update': 0.005040875400405332, 'loss': 0.01038979478068201, 'time_step': 0.005285923739513719, 'init_value': -1.933441400527954, 'ave_value': -1.0612356218836598, 'soft_opc': nan} step=3984




2022-04-20 18:34.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183410/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:34.34 [info     ] FQE_20220420183410: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.0001634258821786168, 'time_algorithm_update': 0.004931807518005371, 'loss': 0.011511847074564367, 'time_step': 0.005171882100852139, 'init_value': -1.9868385791778564, 'ave_value': -1.0919361613472705, 'soft_opc': nan} step=4150




2022-04-20 18:34.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183410/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:34.35 [info     ] FQE_20220420183410: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00016832495310220374, 'time_algorithm_update': 0.005035203623484416, 'loss': 0.012420468572358859, 'time_step': 0.005278206733335932, 'init_value': -2.0031824111938477, 'ave_value': -1.0675095197115396, 'soft_opc': nan} step=4316




2022-04-20 18:34.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183410/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:34.36 [info     ] FQE_20220420183410: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00016633717410535697, 'time_algorithm_update': 0.00511634206197348, 'loss': 0.012934336395294642, 'time_step': 0.005356544471648802, 'init_value': -2.116032123565674, 'ave_value': -1.1655448336741543, 'soft_opc': nan} step=4482




2022-04-20 18:34.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183410/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:34.37 [info     ] FQE_20220420183410: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00016187759767095726, 'time_algorithm_update': 0.004694161644901137, 'loss': 0.013920327888364085, 'time_step': 0.004931764430310352, 'init_value': -2.2439467906951904, 'ave_value': -1.2565673694912312, 'soft_opc': nan} step=4648




2022-04-20 18:34.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183410/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:34.38 [info     ] FQE_20220420183410: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00016714435025870082, 'time_algorithm_update': 0.004606890391154462, 'loss': 0.014227654351516211, 'time_step': 0.004851553813520685, 'init_value': -2.271453619003296, 'ave_value': -1.2734376559529017, 'soft_opc': nan} step=4814




2022-04-20 18:34.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183410/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:34.39 [info     ] FQE_20220420183410: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00016657846519745975, 'time_algorithm_update': 0.00507702166775623, 'loss': 0.015470342085057175, 'time_step': 0.005319392824747476, 'init_value': -2.3306572437286377, 'ave_value': -1.3108022691530956, 'soft_opc': nan} step=4980




2022-04-20 18:34.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183410/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:34.40 [info     ] FQE_20220420183410: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00016945959573768708, 'time_algorithm_update': 0.005048802100032209, 'loss': 0.016049746611907362, 'time_step': 0.0052949621016720695, 'init_value': -2.4089436531066895, 'ave_value': -1.355871611021392, 'soft_opc': nan} step=5146




2022-04-20 18:34.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183410/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:34.41 [info     ] FQE_20220420183410: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00016357381659817984, 'time_algorithm_update': 0.005108186997563006, 'loss': 0.01653590456004737, 'time_step': 0.005347556378467974, 'init_value': -2.558330535888672, 'ave_value': -1.491332145638529, 'soft_opc': nan} step=5312




2022-04-20 18:34.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183410/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:34.42 [info     ] FQE_20220420183410: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00016771598034594432, 'time_algorithm_update': 0.005003331655479339, 'loss': 0.017970355325771206, 'time_step': 0.005245297788137413, 'init_value': -2.529493808746338, 'ave_value': -1.4708117489610706, 'soft_opc': nan} step=5478




2022-04-20 18:34.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183410/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:34.43 [info     ] FQE_20220420183410: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00016304814671895592, 'time_algorithm_update': 0.004971264356590179, 'loss': 0.01836175100698349, 'time_step': 0.005208871450768896, 'init_value': -2.660757064819336, 'ave_value': -1.5853086998199557, 'soft_opc': nan} step=5644




2022-04-20 18:34.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183410/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:34.44 [info     ] FQE_20220420183410: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00016444993306355304, 'time_algorithm_update': 0.004980196435767484, 'loss': 0.01930917651428987, 'time_step': 0.005222320556640625, 'init_value': -2.825895309448242, 'ave_value': -1.740307414939476, 'soft_opc': nan} step=5810




2022-04-20 18:34.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183410/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:34.45 [info     ] FQE_20220420183410: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.0001654653664094856, 'time_algorithm_update': 0.004976094487201737, 'loss': 0.02011361640614334, 'time_step': 0.00521446710609528, 'init_value': -2.7942514419555664, 'ave_value': -1.7052341740763, 'soft_opc': nan} step=5976




2022-04-20 18:34.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183410/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:34.45 [info     ] FQE_20220420183410: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00016321044370352504, 'time_algorithm_update': 0.005130068365349827, 'loss': 0.021063367014257126, 'time_step': 0.005370423018214214, 'init_value': -2.82647442817688, 'ave_value': -1.697122097731912, 'soft_opc': nan} step=6142




2022-04-20 18:34.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183410/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:34.46 [info     ] FQE_20220420183410: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00016350056751664863, 'time_algorithm_update': 0.004101769033684788, 'loss': 0.022198119406246143, 'time_step': 0.004338264465332031, 'init_value': -2.9961140155792236, 'ave_value': -1.8699098983955746, 'soft_opc': nan} step=6308




2022-04-20 18:34.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183410/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:34.47 [info     ] FQE_20220420183410: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.0001717346260346562, 'time_algorithm_update': 0.005040079714303993, 'loss': 0.022865446239530324, 'time_step': 0.005289227129465126, 'init_value': -3.0369277000427246, 'ave_value': -1.9080016464456446, 'soft_opc': nan} step=6474




2022-04-20 18:34.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183410/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:34.48 [info     ] FQE_20220420183410: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00016696769070912557, 'time_algorithm_update': 0.0049541513603853895, 'loss': 0.02452692962938875, 'time_step': 0.005197069731103368, 'init_value': -3.100449562072754, 'ave_value': -1.9339888396060414, 'soft_opc': nan} step=6640




2022-04-20 18:34.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183410/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:34.49 [info     ] FQE_20220420183410: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.0001670323222516531, 'time_algorithm_update': 0.005018633532236858, 'loss': 0.025776052470908063, 'time_step': 0.005261139697339161, 'init_value': -3.2369823455810547, 'ave_value': -2.0534956098391532, 'soft_opc': nan} step=6806




2022-04-20 18:34.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183410/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:34.50 [info     ] FQE_20220420183410: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.0001649296427347574, 'time_algorithm_update': 0.0051059349473700465, 'loss': 0.02576175588463624, 'time_step': 0.005344230008412556, 'init_value': -3.2663004398345947, 'ave_value': -2.064671438160512, 'soft_opc': nan} step=6972




2022-04-20 18:34.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183410/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:34.51 [info     ] FQE_20220420183410: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00016595800238919546, 'time_algorithm_update': 0.005112884992576507, 'loss': 0.026821846887188774, 'time_step': 0.00535229027989399, 'init_value': -3.445679187774658, 'ave_value': -2.25161866149068, 'soft_opc': nan} step=7138




2022-04-20 18:34.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183410/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:34.52 [info     ] FQE_20220420183410: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00016596949244120033, 'time_algorithm_update': 0.0050076519150331796, 'loss': 0.028132820914987564, 'time_step': 0.005251785358750677, 'init_value': -3.429879903793335, 'ave_value': -2.2126136752550263, 'soft_opc': nan} step=7304




2022-04-20 18:34.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183410/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:34.53 [info     ] FQE_20220420183410: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00017012027372796852, 'time_algorithm_update': 0.005048779119928199, 'loss': 0.028934781008304645, 'time_step': 0.005296237497444612, 'init_value': -3.4127655029296875, 'ave_value': -2.1982421823131206, 'soft_opc': nan} step=7470




2022-04-20 18:34.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183410/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:34.54 [info     ] FQE_20220420183410: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00016475729195468397, 'time_algorithm_update': 0.004964015570031591, 'loss': 0.028702114264099264, 'time_step': 0.005203214036412986, 'init_value': -3.555715560913086, 'ave_value': -2.3088804499925795, 'soft_opc': nan} step=7636




2022-04-20 18:34.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183410/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:34.55 [info     ] FQE_20220420183410: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.0001653547746589385, 'time_algorithm_update': 0.004098561872918922, 'loss': 0.029389894282703388, 'time_step': 0.004337000559611493, 'init_value': -3.5374298095703125, 'ave_value': -2.295728974494043, 'soft_opc': nan} step=7802




2022-04-20 18:34.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183410/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:34.56 [info     ] FQE_20220420183410: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.0001680276480065771, 'time_algorithm_update': 0.005057510123195419, 'loss': 0.030517618703453923, 'time_step': 0.005299625626529555, 'init_value': -3.5928194522857666, 'ave_value': -2.328755903514186, 'soft_opc': nan} step=7968




2022-04-20 18:34.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183410/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:34.57 [info     ] FQE_20220420183410: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00016554436051701926, 'time_algorithm_update': 0.005071604108235922, 'loss': 0.03180364920148985, 'time_step': 0.005311618368309665, 'init_value': -3.74112606048584, 'ave_value': -2.473770406622406, 'soft_opc': nan} step=8134




2022-04-20 18:34.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183410/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:34.58 [info     ] FQE_20220420183410: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00016726930457425406, 'time_algorithm_update': 0.005113166498850627, 'loss': 0.032440530824200085, 'time_step': 0.005357443568218185, 'init_value': -3.725008726119995, 'ave_value': -2.434459123790667, 'soft_opc': nan} step=8300




2022-04-20 18:34.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183410/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.43210892e-01 -3.25999953e-02
 -1.38000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.28049971e-01  6.00000000e-01  4.67532035e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.87189108e-01  2.46000047e-02
 -8.00013420e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.61927906e-02  2.08952959e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.53789108e-01  1.32000047e-02
  8.79998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -9.71586383e-02 -6.00000000e-01 -5.33093061e-02]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.39489108e-01 -4.75999953e-02
 -9.30001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  1.41298638e-01  5.43892365e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.9573146

2022-04-20 18:34.58 [info     ] Directory is created at d3rlpy_logs/FQE_20220420183458
2022-04-20 18:34.58 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 18:34.58 [debug    ] Building models...
2022-04-20 18:34.58 [debug    ] Models have been built.
2022-04-20 18:34.58 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420183458/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 18:35.00 [info     ] FQE_20220420183458: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00016535229461137638, 'time_algorithm_update': 0.005037000012952228, 'loss': 0.03000375723498765, 'time_step': 0.00528121549029683, 'init_value': -1.0960952043533325, 'ave_value': -1.121456617170626, 'soft_opc': nan} step=344




2022-04-20 18:35.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183458/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:35.02 [info     ] FQE_20220420183458: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00017160246538561443, 'time_algorithm_update': 0.005069503950518231, 'loss': 0.022956050082304797, 'time_step': 0.005316733620887579, 'init_value': -1.597901701927185, 'ave_value': -1.6937994346492462, 'soft_opc': nan} step=688




2022-04-20 18:35.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183458/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:35.04 [info     ] FQE_20220420183458: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00016973392907963244, 'time_algorithm_update': 0.004593170659486638, 'loss': 0.027389033489080884, 'time_step': 0.004840464786041615, 'init_value': -2.098623752593994, 'ave_value': -2.3194312113765125, 'soft_opc': nan} step=1032




2022-04-20 18:35.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183458/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:35.06 [info     ] FQE_20220420183458: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00017183048780574354, 'time_algorithm_update': 0.00504943520523781, 'loss': 0.03194354566323116, 'time_step': 0.00529940876849862, 'init_value': -2.439793825149536, 'ave_value': -2.7954514202099663, 'soft_opc': nan} step=1376




2022-04-20 18:35.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183458/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:35.08 [info     ] FQE_20220420183458: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.0001682445060375125, 'time_algorithm_update': 0.005058341941168142, 'loss': 0.040942005477412495, 'time_step': 0.005302881085595419, 'init_value': -2.874814987182617, 'ave_value': -3.433917779202766, 'soft_opc': nan} step=1720




2022-04-20 18:35.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183458/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:35.10 [info     ] FQE_20220420183458: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.0001710632512735766, 'time_algorithm_update': 0.005124051903569421, 'loss': 0.05042205870606352, 'time_step': 0.005372813967771308, 'init_value': -3.167046546936035, 'ave_value': -3.934186699105477, 'soft_opc': nan} step=2064




2022-04-20 18:35.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183458/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:35.12 [info     ] FQE_20220420183458: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00017200999481733456, 'time_algorithm_update': 0.00505074096280475, 'loss': 0.06392805186793381, 'time_step': 0.0053020812744318056, 'init_value': -3.7709670066833496, 'ave_value': -4.710054330366689, 'soft_opc': nan} step=2408




2022-04-20 18:35.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183458/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:35.14 [info     ] FQE_20220420183458: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.0001663946828176809, 'time_algorithm_update': 0.004576588092848312, 'loss': 0.07730078552091538, 'time_step': 0.004819156818611677, 'init_value': -4.090534210205078, 'ave_value': -5.268181232831164, 'soft_opc': nan} step=2752




2022-04-20 18:35.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183458/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:35.15 [info     ] FQE_20220420183458: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.0001742084359013757, 'time_algorithm_update': 0.005024576602980148, 'loss': 0.08908281089239863, 'time_step': 0.005273602729619935, 'init_value': -4.279025077819824, 'ave_value': -5.682217757407207, 'soft_opc': nan} step=3096




2022-04-20 18:35.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183458/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:35.17 [info     ] FQE_20220420183458: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00016918501188588697, 'time_algorithm_update': 0.005029518243878386, 'loss': 0.10449015329656906, 'time_step': 0.005274783733279206, 'init_value': -4.606313705444336, 'ave_value': -6.328211396157759, 'soft_opc': nan} step=3440




2022-04-20 18:35.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183458/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:35.19 [info     ] FQE_20220420183458: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00016954957052718763, 'time_algorithm_update': 0.005099846180095229, 'loss': 0.11500074658651165, 'time_step': 0.00534353353256403, 'init_value': -4.940701961517334, 'ave_value': -6.9019226010653885, 'soft_opc': nan} step=3784




2022-04-20 18:35.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183458/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:35.21 [info     ] FQE_20220420183458: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.0001714638499326484, 'time_algorithm_update': 0.0046509788479915885, 'loss': 0.1281090982171685, 'time_step': 0.004898738722468532, 'init_value': -5.253808975219727, 'ave_value': -7.530728813146266, 'soft_opc': nan} step=4128




2022-04-20 18:35.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183458/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:35.23 [info     ] FQE_20220420183458: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00017026690549628678, 'time_algorithm_update': 0.004990864631741546, 'loss': 0.14415057087966868, 'time_step': 0.005240274030108785, 'init_value': -5.301687240600586, 'ave_value': -7.8947048534622395, 'soft_opc': nan} step=4472




2022-04-20 18:35.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183458/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:35.25 [info     ] FQE_20220420183458: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.0001728624798530756, 'time_algorithm_update': 0.0050098154433937956, 'loss': 0.15479160807918496, 'time_step': 0.005258782658466073, 'init_value': -5.66317892074585, 'ave_value': -8.4538423579182, 'soft_opc': nan} step=4816




2022-04-20 18:35.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183458/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:35.27 [info     ] FQE_20220420183458: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00017544765805089198, 'time_algorithm_update': 0.005062295253886733, 'loss': 0.16732200362373056, 'time_step': 0.0053159227204877275, 'init_value': -6.18005895614624, 'ave_value': -9.10551633788902, 'soft_opc': nan} step=5160




2022-04-20 18:35.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183458/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:35.29 [info     ] FQE_20220420183458: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.0001714174137559048, 'time_algorithm_update': 0.005198866128921509, 'loss': 0.175673293577897, 'time_step': 0.005446596894153329, 'init_value': -6.4788312911987305, 'ave_value': -9.647675029327253, 'soft_opc': nan} step=5504




2022-04-20 18:35.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183458/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:35.31 [info     ] FQE_20220420183458: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00016778638196545979, 'time_algorithm_update': 0.004561817230180253, 'loss': 0.19058763210358487, 'time_step': 0.004805858745131382, 'init_value': -6.456640720367432, 'ave_value': -9.753486635844718, 'soft_opc': nan} step=5848




2022-04-20 18:35.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183458/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:35.33 [info     ] FQE_20220420183458: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00017136266065198322, 'time_algorithm_update': 0.00513990604600241, 'loss': 0.19357880375477984, 'time_step': 0.005388031865275184, 'init_value': -6.821107864379883, 'ave_value': -10.179345473477634, 'soft_opc': nan} step=6192




2022-04-20 18:35.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183458/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:35.35 [info     ] FQE_20220420183458: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00017185751781907191, 'time_algorithm_update': 0.005009775244912436, 'loss': 0.19706237252238532, 'time_step': 0.005258747311525567, 'init_value': -7.173645973205566, 'ave_value': -10.622982332430075, 'soft_opc': nan} step=6536




2022-04-20 18:35.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183458/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:35.37 [info     ] FQE_20220420183458: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.0001720751440802286, 'time_algorithm_update': 0.00500163990397786, 'loss': 0.19830005391782454, 'time_step': 0.005252402189166047, 'init_value': -7.512997627258301, 'ave_value': -10.963864481436541, 'soft_opc': nan} step=6880




2022-04-20 18:35.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183458/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:35.39 [info     ] FQE_20220420183458: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00017418625742890114, 'time_algorithm_update': 0.0050637174484341645, 'loss': 0.20493516199247436, 'time_step': 0.0053155775680098424, 'init_value': -8.267946243286133, 'ave_value': -11.676380643787216, 'soft_opc': nan} step=7224




2022-04-20 18:35.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183458/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:35.41 [info     ] FQE_20220420183458: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00017213405564773913, 'time_algorithm_update': 0.004790472429852153, 'loss': 0.201573210243179, 'time_step': 0.005040374606154686, 'init_value': -8.256288528442383, 'ave_value': -11.784245702205226, 'soft_opc': nan} step=7568




2022-04-20 18:35.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183458/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:35.43 [info     ] FQE_20220420183458: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00017222415569216707, 'time_algorithm_update': 0.00517407683439033, 'loss': 0.1985045363476803, 'time_step': 0.005425868339316789, 'init_value': -9.0843505859375, 'ave_value': -12.652725027675261, 'soft_opc': nan} step=7912




2022-04-20 18:35.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183458/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:35.45 [info     ] FQE_20220420183458: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00017168286234833474, 'time_algorithm_update': 0.005067781653515128, 'loss': 0.20367029732699657, 'time_step': 0.005317617987477502, 'init_value': -9.499933242797852, 'ave_value': -12.930427853896509, 'soft_opc': nan} step=8256




2022-04-20 18:35.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183458/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:35.47 [info     ] FQE_20220420183458: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00014907537504684095, 'time_algorithm_update': 0.004834315111470777, 'loss': 0.2082141166184704, 'time_step': 0.005051844341810359, 'init_value': -9.771469116210938, 'ave_value': -13.192445103834206, 'soft_opc': nan} step=8600




2022-04-20 18:35.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183458/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:35.48 [info     ] FQE_20220420183458: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00012918613677801087, 'time_algorithm_update': 0.004162093234616656, 'loss': 0.2152667560636304, 'time_step': 0.004348867161329402, 'init_value': -10.40118408203125, 'ave_value': -13.708430660378422, 'soft_opc': nan} step=8944




2022-04-20 18:35.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183458/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:35.50 [info     ] FQE_20220420183458: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.0001262003599211227, 'time_algorithm_update': 0.004689686520155086, 'loss': 0.2188675663661385, 'time_step': 0.004873318727626357, 'init_value': -10.672494888305664, 'ave_value': -13.910952347126864, 'soft_opc': nan} step=9288




2022-04-20 18:35.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183458/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:35.52 [info     ] FQE_20220420183458: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.0001452045385227647, 'time_algorithm_update': 0.004862185134444126, 'loss': 0.22325180446577453, 'time_step': 0.0050732756769934365, 'init_value': -10.968564987182617, 'ave_value': -14.161920659663156, 'soft_opc': nan} step=9632




2022-04-20 18:35.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183458/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:35.54 [info     ] FQE_20220420183458: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.0001717029615890148, 'time_algorithm_update': 0.005115091800689697, 'loss': 0.23408226541590033, 'time_step': 0.005362944547520127, 'init_value': -11.345844268798828, 'ave_value': -14.379088141356368, 'soft_opc': nan} step=9976




2022-04-20 18:35.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183458/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:35.56 [info     ] FQE_20220420183458: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00016635656356811523, 'time_algorithm_update': 0.0045897115108578706, 'loss': 0.2424503594344525, 'time_step': 0.004831269036891849, 'init_value': -11.810022354125977, 'ave_value': -14.760202547542852, 'soft_opc': nan} step=10320




2022-04-20 18:35.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183458/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:35.58 [info     ] FQE_20220420183458: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00016770044038462084, 'time_algorithm_update': 0.005046380120654439, 'loss': 0.2547401433897226, 'time_step': 0.005291832740916763, 'init_value': -12.489381790161133, 'ave_value': -15.286209272282399, 'soft_opc': nan} step=10664




2022-04-20 18:35.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183458/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:36.00 [info     ] FQE_20220420183458: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00017007977463478265, 'time_algorithm_update': 0.0051272767920826755, 'loss': 0.2636696925700837, 'time_step': 0.00537272664003594, 'init_value': -12.752107620239258, 'ave_value': -15.496241515715866, 'soft_opc': nan} step=11008




2022-04-20 18:36.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183458/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:36.02 [info     ] FQE_20220420183458: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00017300386761510096, 'time_algorithm_update': 0.004989172137060831, 'loss': 0.2742167271966071, 'time_step': 0.0052416892938835676, 'init_value': -13.318794250488281, 'ave_value': -15.968497530938974, 'soft_opc': nan} step=11352




2022-04-20 18:36.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183458/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:36.04 [info     ] FQE_20220420183458: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00017049146253009175, 'time_algorithm_update': 0.00502579226050266, 'loss': 0.2819035312561535, 'time_step': 0.005272921434668607, 'init_value': -13.670612335205078, 'ave_value': -16.284826615313488, 'soft_opc': nan} step=11696




2022-04-20 18:36.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183458/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:36.05 [info     ] FQE_20220420183458: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.000168062919794127, 'time_algorithm_update': 0.004589828640915627, 'loss': 0.29240479577484346, 'time_step': 0.004835784435272217, 'init_value': -13.984171867370605, 'ave_value': -16.562647037094393, 'soft_opc': nan} step=12040




2022-04-20 18:36.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183458/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:36.07 [info     ] FQE_20220420183458: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.0001675846964813942, 'time_algorithm_update': 0.005014762628910153, 'loss': 0.2973448891398432, 'time_step': 0.005256444215774536, 'init_value': -14.323652267456055, 'ave_value': -16.833848953673716, 'soft_opc': nan} step=12384




2022-04-20 18:36.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183458/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:36.09 [info     ] FQE_20220420183458: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00016882946324902912, 'time_algorithm_update': 0.005002402982046438, 'loss': 0.3147689322941005, 'time_step': 0.0052458394405453706, 'init_value': -14.717034339904785, 'ave_value': -17.050757848825473, 'soft_opc': nan} step=12728




2022-04-20 18:36.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183458/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:36.11 [info     ] FQE_20220420183458: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00016808301903480707, 'time_algorithm_update': 0.005046224178269852, 'loss': 0.324745498175278, 'time_step': 0.005290046680805295, 'init_value': -15.241641998291016, 'ave_value': -17.555816875503456, 'soft_opc': nan} step=13072




2022-04-20 18:36.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183458/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:36.13 [info     ] FQE_20220420183458: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.0001699862092040306, 'time_algorithm_update': 0.004573971726173578, 'loss': 0.3395685424735813, 'time_step': 0.0048217087291007816, 'init_value': -15.452390670776367, 'ave_value': -17.503505818401504, 'soft_opc': nan} step=13416




2022-04-20 18:36.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183458/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:36.15 [info     ] FQE_20220420183458: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00016716261242711268, 'time_algorithm_update': 0.005015599866246068, 'loss': 0.35261641816929157, 'time_step': 0.005259961582893549, 'init_value': -15.705938339233398, 'ave_value': -17.680355041146882, 'soft_opc': nan} step=13760




2022-04-20 18:36.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183458/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:36.17 [info     ] FQE_20220420183458: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00016854045002959495, 'time_algorithm_update': 0.005077843056168667, 'loss': 0.3602115239644813, 'time_step': 0.005321285752362983, 'init_value': -16.091726303100586, 'ave_value': -17.9796760972648, 'soft_opc': nan} step=14104




2022-04-20 18:36.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183458/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:36.19 [info     ] FQE_20220420183458: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00016850371693455897, 'time_algorithm_update': 0.0050531715847725095, 'loss': 0.37478824541362565, 'time_step': 0.005298531332681346, 'init_value': -16.474279403686523, 'ave_value': -18.273841017447747, 'soft_opc': nan} step=14448




2022-04-20 18:36.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183458/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:36.21 [info     ] FQE_20220420183458: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.0001708976058072822, 'time_algorithm_update': 0.0049969054931818056, 'loss': 0.39849412607483914, 'time_step': 0.005246400833129883, 'init_value': -16.581249237060547, 'ave_value': -18.32003938872493, 'soft_opc': nan} step=14792




2022-04-20 18:36.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183458/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:36.23 [info     ] FQE_20220420183458: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00017019482546074447, 'time_algorithm_update': 0.004614805759385575, 'loss': 0.4003335210429721, 'time_step': 0.004861033240029978, 'init_value': -16.75312042236328, 'ave_value': -18.289446184686007, 'soft_opc': nan} step=15136




2022-04-20 18:36.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183458/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:36.25 [info     ] FQE_20220420183458: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00017003403153530386, 'time_algorithm_update': 0.00509040023005286, 'loss': 0.40307901013469283, 'time_step': 0.005336920189303021, 'init_value': -16.69959259033203, 'ave_value': -18.185933073380173, 'soft_opc': nan} step=15480




2022-04-20 18:36.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183458/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:36.27 [info     ] FQE_20220420183458: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00016793262126833894, 'time_algorithm_update': 0.005111299281896547, 'loss': 0.42141939746757406, 'time_step': 0.0053553504999293835, 'init_value': -17.352237701416016, 'ave_value': -18.643026918189616, 'soft_opc': nan} step=15824




2022-04-20 18:36.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183458/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:36.29 [info     ] FQE_20220420183458: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00016765885574873105, 'time_algorithm_update': 0.0050205269525217455, 'loss': 0.43328163024514565, 'time_step': 0.00526164783987888, 'init_value': -17.38439178466797, 'ave_value': -18.479170679697045, 'soft_opc': nan} step=16168




2022-04-20 18:36.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183458/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:36.31 [info     ] FQE_20220420183458: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00017227890879608864, 'time_algorithm_update': 0.004646260377972625, 'loss': 0.4446937348693609, 'time_step': 0.004895422347756319, 'init_value': -17.828306198120117, 'ave_value': -18.927745184825884, 'soft_opc': nan} step=16512




2022-04-20 18:36.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183458/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:36.32 [info     ] FQE_20220420183458: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00016918154649956283, 'time_algorithm_update': 0.004961932121321212, 'loss': 0.45884581749749825, 'time_step': 0.005207907321841218, 'init_value': -17.769926071166992, 'ave_value': -18.85422185381354, 'soft_opc': nan} step=16856




2022-04-20 18:36.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183458/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:36.34 [info     ] FQE_20220420183458: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00017026898472808128, 'time_algorithm_update': 0.005066765602244888, 'loss': 0.4703801705848512, 'time_step': 0.005312461492627166, 'init_value': -17.992992401123047, 'ave_value': -18.947956772836612, 'soft_opc': nan} step=17200




2022-04-20 18:36.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183458/model_17200.pt
search iteration:  37
using hyper params:  [0.009099281399429816, 0.004204548023999208, 1.5837638050927432e-05, 3]
2022-04-20 18:36.34 [debug    ] RoundIterator is selected.
2022-04-20 18:36.34 [info     ] Directory is created at d3rlpy_logs/TD3PlusBC_20220420183634
2022-04-20 18:36.34 [debug    ] Fitting scaler...              scaler=standard
2022-04-20 18:36.35 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 18:36.35 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 18:36.35 [info     ] Parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183634/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0090992813994

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:36.38 [info     ] TD3PlusBC_20220420183634: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.00034909290179871677, 'time_algorithm_update': 0.008727417354695281, 'critic_loss': 2.8120572211885313, 'actor_loss': 2.409810014635499, 'time_step': 0.009156206895036307, 'td_error': 0.8252863293015728, 'init_value': -4.370156764984131, 'ave_value': -2.4363919979611595} step=342
2022-04-20 18:36.38 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183634/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:36.42 [info     ] TD3PlusBC_20220420183634: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.0003476003457231131, 'time_algorithm_update': 0.008490825257106134, 'critic_loss': 1.1637245909860956, 'actor_loss': 2.276061016216613, 'time_step': 0.00890945133410002, 'td_error': 0.8617088756101254, 'init_value': -6.296570301055908, 'ave_value': -3.5349965441402946} step=684
2022-04-20 18:36.42 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183634/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:36.46 [info     ] TD3PlusBC_20220420183634: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.00035472432075188176, 'time_algorithm_update': 0.00899446638006913, 'critic_loss': 1.7462081561485927, 'actor_loss': 2.276119675552636, 'time_step': 0.00943159290224488, 'td_error': 0.9341597464817334, 'init_value': -8.334582328796387, 'ave_value': -4.734835866274468} step=1026
2022-04-20 18:36.46 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183634/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:36.49 [info     ] TD3PlusBC_20220420183634: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.000355739342538934, 'time_algorithm_update': 0.008587193767926847, 'critic_loss': 2.366732943824857, 'actor_loss': 2.2716153462727866, 'time_step': 0.009019522639045938, 'td_error': 1.0262255696589526, 'init_value': -10.398221969604492, 'ave_value': -5.900244615203239} step=1368
2022-04-20 18:36.49 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183634/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:36.53 [info     ] TD3PlusBC_20220420183634: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.00035088034401163025, 'time_algorithm_update': 0.008858369107831988, 'critic_loss': 3.1568706825114132, 'actor_loss': 2.279028514672441, 'time_step': 0.009286428055568048, 'td_error': 1.1539498931632917, 'init_value': -12.451387405395508, 'ave_value': -7.103559199287263} step=1710
2022-04-20 18:36.53 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183634/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:36.57 [info     ] TD3PlusBC_20220420183634: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.0003463022890146713, 'time_algorithm_update': 0.009029891058715463, 'critic_loss': 3.878820730928789, 'actor_loss': 2.276932804208053, 'time_step': 0.009454581472608779, 'td_error': 1.3003055362221942, 'init_value': -14.605862617492676, 'ave_value': -8.277779809311872} step=2052
2022-04-20 18:36.57 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183634/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:37.00 [info     ] TD3PlusBC_20220420183634: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.00035752120770906145, 'time_algorithm_update': 0.00860808049029077, 'critic_loss': 4.8304192047370105, 'actor_loss': 2.282665525960643, 'time_step': 0.009047152006138138, 'td_error': 1.4663790099690721, 'init_value': -16.8800106048584, 'ave_value': -9.596075609722552} step=2394
2022-04-20 18:37.00 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183634/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:37.04 [info     ] TD3PlusBC_20220420183634: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.00035620293422052035, 'time_algorithm_update': 0.00907877244447407, 'critic_loss': 6.089054074552324, 'actor_loss': 2.2780359711563376, 'time_step': 0.0095134569190399, 'td_error': 1.690136708573078, 'init_value': -18.95039176940918, 'ave_value': -10.806194823292705} step=2736
2022-04-20 18:37.04 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183634/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:37.08 [info     ] TD3PlusBC_20220420183634: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.0003542509692454199, 'time_algorithm_update': 0.008909477825053254, 'critic_loss': 7.110208572002879, 'actor_loss': 2.286005536018059, 'time_step': 0.009345762910898666, 'td_error': 1.8287400847780089, 'init_value': -20.95505142211914, 'ave_value': -11.991578264073134} step=3078
2022-04-20 18:37.08 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183634/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:37.11 [info     ] TD3PlusBC_20220420183634: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.00035718937366329437, 'time_algorithm_update': 0.008860023398148386, 'critic_loss': 8.614131364906044, 'actor_loss': 2.2821257937024213, 'time_step': 0.00929358270433214, 'td_error': 1.9757348687132459, 'init_value': -23.470365524291992, 'ave_value': -13.199603802602368} step=3420
2022-04-20 18:37.11 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183634/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:37.15 [info     ] TD3PlusBC_20220420183634: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.00035731346286528294, 'time_algorithm_update': 0.008899816295556855, 'critic_loss': 9.998231070083484, 'actor_loss': 2.2880180091188667, 'time_step': 0.009338396334508706, 'td_error': 2.1562668933627167, 'init_value': -25.119853973388672, 'ave_value': -14.255529211684433} step=3762
2022-04-20 18:37.15 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183634/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:37.19 [info     ] TD3PlusBC_20220420183634: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.00035097794226038526, 'time_algorithm_update': 0.008499383926391602, 'critic_loss': 11.751248619012665, 'actor_loss': 2.2889221439584655, 'time_step': 0.0089217898441337, 'td_error': 2.3605378668711334, 'init_value': -27.197973251342773, 'ave_value': -15.337888837237422} step=4104
2022-04-20 18:37.19 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183634/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:37.22 [info     ] TD3PlusBC_20220420183634: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.00035485886690909404, 'time_algorithm_update': 0.008980707815516065, 'critic_loss': 13.554179704677292, 'actor_loss': 2.29343304996602, 'time_step': 0.009407120141369558, 'td_error': 2.544830986467771, 'init_value': -28.928112030029297, 'ave_value': -16.4606648141165} step=4446
2022-04-20 18:37.22 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183634/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:37.26 [info     ] TD3PlusBC_20220420183634: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.00035160047966137267, 'time_algorithm_update': 0.008988407620212488, 'critic_loss': 15.438731808411447, 'actor_loss': 2.2875479480676484, 'time_step': 0.009409088837473016, 'td_error': 2.886871392823392, 'init_value': -30.76910400390625, 'ave_value': -17.514178452922685} step=4788
2022-04-20 18:37.26 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183634/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:37.30 [info     ] TD3PlusBC_20220420183634: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.00035079320271809894, 'time_algorithm_update': 0.008577119537264283, 'critic_loss': 17.48062270984315, 'actor_loss': 2.2882265863362807, 'time_step': 0.009000705696685969, 'td_error': 2.9811442929812135, 'init_value': -31.990066528320312, 'ave_value': -18.45708016981765} step=5130
2022-04-20 18:37.30 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183634/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:37.33 [info     ] TD3PlusBC_20220420183634: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.00035214493846335605, 'time_algorithm_update': 0.00891215689698158, 'critic_loss': 19.737384609311643, 'actor_loss': 2.2870765889597218, 'time_step': 0.009337802379451997, 'td_error': 3.1580974182235972, 'init_value': -34.67924118041992, 'ave_value': -19.627045020176567} step=5472
2022-04-20 18:37.33 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183634/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:37.37 [info     ] TD3PlusBC_20220420183634: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.0003530749103479218, 'time_algorithm_update': 0.008565064062151993, 'critic_loss': 22.009713515900728, 'actor_loss': 2.289601793066103, 'time_step': 0.008988262617100052, 'td_error': 3.3661681929586877, 'init_value': -36.20191192626953, 'ave_value': -20.504774149942154} step=5814
2022-04-20 18:37.37 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183634/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:37.41 [info     ] TD3PlusBC_20220420183634: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.0003554974383080912, 'time_algorithm_update': 0.008869514827839813, 'critic_loss': 24.409794035013658, 'actor_loss': 2.2868356509515415, 'time_step': 0.009302170653092233, 'td_error': 3.584058149279191, 'init_value': -37.20130920410156, 'ave_value': -21.32992302498113} step=6156
2022-04-20 18:37.41 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183634/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:37.44 [info     ] TD3PlusBC_20220420183634: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.0003615304043418483, 'time_algorithm_update': 0.009033037905107465, 'critic_loss': 26.93037091640004, 'actor_loss': 2.293211117125394, 'time_step': 0.009471092307776735, 'td_error': 3.830112002616059, 'init_value': -39.875038146972656, 'ave_value': -22.419960015501644} step=6498
2022-04-20 18:37.44 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183634/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:37.48 [info     ] TD3PlusBC_20220420183634: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.00035350503977279215, 'time_algorithm_update': 0.008482011438113207, 'critic_loss': 29.491898310811894, 'actor_loss': 2.2935877487673397, 'time_step': 0.008908939640424406, 'td_error': 4.051305677077562, 'init_value': -41.30936813354492, 'ave_value': -23.25290480284121} step=6840
2022-04-20 18:37.48 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183634/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:37.52 [info     ] TD3PlusBC_20220420183634: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.00035698302308021234, 'time_algorithm_update': 0.008903884748269243, 'critic_loss': 32.38108403222603, 'actor_loss': 2.2914904850965354, 'time_step': 0.009332576690361513, 'td_error': 4.295061298240391, 'init_value': -42.5571174621582, 'ave_value': -23.990823918755442} step=7182
2022-04-20 18:37.52 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183634/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:37.55 [info     ] TD3PlusBC_20220420183634: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.00034761916824251586, 'time_algorithm_update': 0.008532372831601149, 'critic_loss': 34.95147883543494, 'actor_loss': 2.2931161930686548, 'time_step': 0.008953535765932318, 'td_error': 4.4840953480291885, 'init_value': -44.23313903808594, 'ave_value': -25.03044684481585} step=7524
2022-04-20 18:37.55 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183634/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:37.59 [info     ] TD3PlusBC_20220420183634: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.0003588053218105383, 'time_algorithm_update': 0.008915691347847208, 'critic_loss': 37.412779713234706, 'actor_loss': 2.291750670873631, 'time_step': 0.009351854435881676, 'td_error': 4.7001954435246125, 'init_value': -45.221595764160156, 'ave_value': -25.8670717666132} step=7866
2022-04-20 18:37.59 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183634/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:38.03 [info     ] TD3PlusBC_20220420183634: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.0003575386359677677, 'time_algorithm_update': 0.008867759453622918, 'critic_loss': 39.84456887161522, 'actor_loss': 2.295438194832607, 'time_step': 0.009301192579213639, 'td_error': 5.0596102622099295, 'init_value': -47.564971923828125, 'ave_value': -26.801874874174963} step=8208
2022-04-20 18:38.03 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183634/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:38.06 [info     ] TD3PlusBC_20220420183634: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.0003579980448672646, 'time_algorithm_update': 0.008543598024468673, 'critic_loss': 42.67184101907831, 'actor_loss': 2.296245321195725, 'time_step': 0.008981025706954866, 'td_error': 5.024965175931704, 'init_value': -47.72956466674805, 'ave_value': -27.2855130094226} step=8550
2022-04-20 18:38.06 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183634/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:38.10 [info     ] TD3PlusBC_20220420183634: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.0003522390510603698, 'time_algorithm_update': 0.008909364192806489, 'critic_loss': 45.03622607738651, 'actor_loss': 2.2969368549815394, 'time_step': 0.009334237951981393, 'td_error': 5.436302298149148, 'init_value': -49.745975494384766, 'ave_value': -28.260211901452124} step=8892
2022-04-20 18:38.10 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183634/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:38.14 [info     ] TD3PlusBC_20220420183634: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.0003545249414722822, 'time_algorithm_update': 0.008957314212419833, 'critic_loss': 47.59151448143853, 'actor_loss': 2.29271077691463, 'time_step': 0.00938984455420957, 'td_error': 5.446019390580998, 'init_value': -50.25756072998047, 'ave_value': -28.808390801036563} step=9234
2022-04-20 18:38.14 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183634/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:38.17 [info     ] TD3PlusBC_20220420183634: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.0003619751735040319, 'time_algorithm_update': 0.008789807732342279, 'critic_loss': 49.86654377541347, 'actor_loss': 2.2941454600172433, 'time_step': 0.009225769349706103, 'td_error': 5.642542905862654, 'init_value': -51.243858337402344, 'ave_value': -29.66160407958888} step=9576
2022-04-20 18:38.17 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183634/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:38.21 [info     ] TD3PlusBC_20220420183634: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.00036189848916572435, 'time_algorithm_update': 0.00883216188665022, 'critic_loss': 52.492078691895244, 'actor_loss': 2.29060615433587, 'time_step': 0.009268478343361303, 'td_error': 5.825224813267991, 'init_value': -52.93357467651367, 'ave_value': -30.54859266532778} step=9918
2022-04-20 18:38.21 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183634/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:38.25 [info     ] TD3PlusBC_20220420183634: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.0003631073131895902, 'time_algorithm_update': 0.008587430095114904, 'critic_loss': 54.95533681055259, 'actor_loss': 2.293688736463848, 'time_step': 0.00902895620691846, 'td_error': 5.962831173751501, 'init_value': -53.778480529785156, 'ave_value': -31.18530796444438} step=10260
2022-04-20 18:38.25 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183634/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:38.28 [info     ] TD3PlusBC_20220420183634: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.00036158896329110127, 'time_algorithm_update': 0.009053393414146021, 'critic_loss': 57.30465895669502, 'actor_loss': 2.2973713526251722, 'time_step': 0.009497977139657004, 'td_error': 6.169646845564758, 'init_value': -53.86663818359375, 'ave_value': -31.50971836181601} step=10602
2022-04-20 18:38.28 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183634/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:38.32 [info     ] TD3PlusBC_20220420183634: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.0003638874020492821, 'time_algorithm_update': 0.008939796721029002, 'critic_loss': 59.86106405202408, 'actor_loss': 2.292380186549404, 'time_step': 0.009380938714010674, 'td_error': 6.387942632195382, 'init_value': -55.7312126159668, 'ave_value': -32.37004534461628} step=10944
2022-04-20 18:38.32 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183634/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:38.36 [info     ] TD3PlusBC_20220420183634: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.00035744312911005743, 'time_algorithm_update': 0.008572275178474292, 'critic_loss': 62.20222119560019, 'actor_loss': 2.2959213912138465, 'time_step': 0.009010634924236097, 'td_error': 6.538248866409818, 'init_value': -56.327972412109375, 'ave_value': -32.977796841256726} step=11286
2022-04-20 18:38.36 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183634/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:38.39 [info     ] TD3PlusBC_20220420183634: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.0003561583178782324, 'time_algorithm_update': 0.008944822333709539, 'critic_loss': 64.4420862476728, 'actor_loss': 2.302271572469968, 'time_step': 0.009379584189744024, 'td_error': 6.606574277090279, 'init_value': -56.53934860229492, 'ave_value': -33.6443243901914} step=11628
2022-04-20 18:38.39 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183634/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:38.43 [info     ] TD3PlusBC_20220420183634: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.00035577350192599825, 'time_algorithm_update': 0.008518558496620223, 'critic_loss': 66.80612266830533, 'actor_loss': 2.300969744286342, 'time_step': 0.008951446466278611, 'td_error': 6.802365634290833, 'init_value': -58.26348876953125, 'ave_value': -34.27957532259409} step=11970
2022-04-20 18:38.43 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183634/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:38.47 [info     ] TD3PlusBC_20220420183634: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.00036340568497864127, 'time_algorithm_update': 0.00906744128779361, 'critic_loss': 69.11894825048614, 'actor_loss': 2.2942375732444185, 'time_step': 0.009506800021344458, 'td_error': 6.926118917464157, 'init_value': -58.844322204589844, 'ave_value': -34.76175640997947} step=12312
2022-04-20 18:38.47 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183634/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:38.51 [info     ] TD3PlusBC_20220420183634: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.00035956310249908623, 'time_algorithm_update': 0.009040088681449667, 'critic_loss': 71.28046614663643, 'actor_loss': 2.2975711864337587, 'time_step': 0.009476406532421447, 'td_error': 7.105929211678709, 'init_value': -59.241676330566406, 'ave_value': -35.3580170625361} step=12654
2022-04-20 18:38.51 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183634/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:38.54 [info     ] TD3PlusBC_20220420183634: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.0003621654900891042, 'time_algorithm_update': 0.008474478247570015, 'critic_loss': 73.49385452270508, 'actor_loss': 2.2959308819464077, 'time_step': 0.008916281120121827, 'td_error': 7.024127073816953, 'init_value': -59.375946044921875, 'ave_value': -35.803769030293516} step=12996
2022-04-20 18:38.54 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183634/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:38.58 [info     ] TD3PlusBC_20220420183634: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.0003628591347856131, 'time_algorithm_update': 0.008920549649244163, 'critic_loss': 75.71906443367227, 'actor_loss': 2.294879541062472, 'time_step': 0.009363278310898452, 'td_error': 7.367778945772204, 'init_value': -61.396141052246094, 'ave_value': -36.521560901936965} step=13338
2022-04-20 18:38.58 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183634/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:39.01 [info     ] TD3PlusBC_20220420183634: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.0003598663542005751, 'time_algorithm_update': 0.008628666052344249, 'critic_loss': 77.68753415381002, 'actor_loss': 2.2982350636643973, 'time_step': 0.009063683058086195, 'td_error': 7.735621615534374, 'init_value': -61.983001708984375, 'ave_value': -37.05032745791758} step=13680
2022-04-20 18:39.01 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183634/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:39.05 [info     ] TD3PlusBC_20220420183634: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.00036063807749608804, 'time_algorithm_update': 0.008960187783715321, 'critic_loss': 79.82460307517248, 'actor_loss': 2.3000067646740474, 'time_step': 0.009400473003498992, 'td_error': 7.777520020061694, 'init_value': -63.062965393066406, 'ave_value': -37.7897035640609} step=14022
2022-04-20 18:39.05 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183634/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:39.09 [info     ] TD3PlusBC_20220420183634: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.0003560614167598256, 'time_algorithm_update': 0.00890844328361645, 'critic_loss': 81.93221687852291, 'actor_loss': 2.3050957939081025, 'time_step': 0.009342770827443976, 'td_error': 8.08944635611022, 'init_value': -64.29296875, 'ave_value': -38.33584945204307} step=14364
2022-04-20 18:39.09 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183634/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:39.12 [info     ] TD3PlusBC_20220420183634: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.0003560879077130591, 'time_algorithm_update': 0.008496306793034425, 'critic_loss': 83.46087726793792, 'actor_loss': 2.3042784504026, 'time_step': 0.008929323034676893, 'td_error': 7.884179496347024, 'init_value': -62.32646560668945, 'ave_value': -38.398906603971795} step=14706
2022-04-20 18:39.13 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183634/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:39.16 [info     ] TD3PlusBC_20220420183634: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.00036195216820253964, 'time_algorithm_update': 0.00892450307544909, 'critic_loss': 85.21448058412786, 'actor_loss': 2.299676006997538, 'time_step': 0.009366216018185979, 'td_error': 8.372296200295345, 'init_value': -65.51541900634766, 'ave_value': -39.24152854901252} step=15048
2022-04-20 18:39.16 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183634/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:39.20 [info     ] TD3PlusBC_20220420183634: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.00035785304175482853, 'time_algorithm_update': 0.008998286654377541, 'critic_loss': 87.25229017915781, 'actor_loss': 2.298697862011647, 'time_step': 0.009432912569994118, 'td_error': 8.518815713397318, 'init_value': -66.21015167236328, 'ave_value': -39.875959060833296} step=15390
2022-04-20 18:39.20 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183634/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:39.24 [info     ] TD3PlusBC_20220420183634: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.00036279290740252933, 'time_algorithm_update': 0.008820933905261301, 'critic_loss': 89.04079992729321, 'actor_loss': 2.3008110718420376, 'time_step': 0.009262020127815112, 'td_error': 8.440056717864922, 'init_value': -65.26066589355469, 'ave_value': -39.93790076706105} step=15732
2022-04-20 18:39.24 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183634/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:39.27 [info     ] TD3PlusBC_20220420183634: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.0003555950365568462, 'time_algorithm_update': 0.008754781812255145, 'critic_loss': 90.91529285140902, 'actor_loss': 2.3037940298604687, 'time_step': 0.00918750804767274, 'td_error': 8.593865942971831, 'init_value': -66.1474838256836, 'ave_value': -40.48436211353528} step=16074
2022-04-20 18:39.27 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183634/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:39.31 [info     ] TD3PlusBC_20220420183634: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.0003582329777946249, 'time_algorithm_update': 0.00846318125027662, 'critic_loss': 92.41703994929442, 'actor_loss': 2.3029167889154447, 'time_step': 0.008897370065164846, 'td_error': 8.436377248579657, 'init_value': -64.7466049194336, 'ave_value': -40.608324723672794} step=16416
2022-04-20 18:39.31 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183634/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:39.34 [info     ] TD3PlusBC_20220420183634: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.0003536876879240337, 'time_algorithm_update': 0.008788856149416917, 'critic_loss': 94.30437147129349, 'actor_loss': 2.2997244756821305, 'time_step': 0.009224522183513084, 'td_error': 8.791921961987331, 'init_value': -66.22084045410156, 'ave_value': -41.22075688316982} step=16758
2022-04-20 18:39.34 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183634/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:39.38 [info     ] TD3PlusBC_20220420183634: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.0003589789072672526, 'time_algorithm_update': 0.009017526754858897, 'critic_loss': 95.9559854987072, 'actor_loss': 2.3058472795096057, 'time_step': 0.00945661639609532, 'td_error': 8.956391019741408, 'init_value': -66.13682556152344, 'ave_value': -41.56673553608148} step=17100
2022-04-20 18:39.38 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420183634/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 102 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.28189108e-01  9.8

Epoch 1/50:   0%|          | 0/177 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 18:39.39 [info     ] FQE_20220420183938: epoch=1 step=177 epoch=1 metrics={'time_sample_batch': 0.00016144439998993091, 'time_algorithm_update': 0.004071863357630153, 'loss': 0.007229868665937398, 'time_step': 0.004307505774632686, 'init_value': -0.46281611919403076, 'ave_value': -0.381031063181144, 'soft_opc': nan} step=177




2022-04-20 18:39.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183938/model_177.pt


Epoch 2/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:39.40 [info     ] FQE_20220420183938: epoch=2 step=354 epoch=2 metrics={'time_sample_batch': 0.00015977951092908612, 'time_algorithm_update': 0.0049782513225145935, 'loss': 0.005643224760647969, 'time_step': 0.0052097827027746514, 'init_value': -0.5923503637313843, 'ave_value': -0.4780989836644124, 'soft_opc': nan} step=354




2022-04-20 18:39.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183938/model_354.pt


Epoch 3/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:39.41 [info     ] FQE_20220420183938: epoch=3 step=531 epoch=3 metrics={'time_sample_batch': 0.0001604408867615091, 'time_algorithm_update': 0.005001881701798089, 'loss': 0.0051158689236068455, 'time_step': 0.005234459699210474, 'init_value': -0.6207376718521118, 'ave_value': -0.4849579616083397, 'soft_opc': nan} step=531




2022-04-20 18:39.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183938/model_531.pt


Epoch 4/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:39.42 [info     ] FQE_20220420183938: epoch=4 step=708 epoch=4 metrics={'time_sample_batch': 0.00016263110489495058, 'time_algorithm_update': 0.005005113149093369, 'loss': 0.004793906648837998, 'time_step': 0.005239572902183748, 'init_value': -0.6193293333053589, 'ave_value': -0.46722997616271716, 'soft_opc': nan} step=708




2022-04-20 18:39.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183938/model_708.pt


Epoch 5/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:39.43 [info     ] FQE_20220420183938: epoch=5 step=885 epoch=5 metrics={'time_sample_batch': 0.00016252603908043124, 'time_algorithm_update': 0.005051541463129938, 'loss': 0.004638962829117216, 'time_step': 0.005286673368033716, 'init_value': -0.6579472422599792, 'ave_value': -0.5150640106594956, 'soft_opc': nan} step=885




2022-04-20 18:39.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183938/model_885.pt


Epoch 6/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:39.44 [info     ] FQE_20220420183938: epoch=6 step=1062 epoch=6 metrics={'time_sample_batch': 0.00016250852811134468, 'time_algorithm_update': 0.005045638919550147, 'loss': 0.004555612874853998, 'time_step': 0.005280990385066318, 'init_value': -0.6929877400398254, 'ave_value': -0.547453035132305, 'soft_opc': nan} step=1062




2022-04-20 18:39.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183938/model_1062.pt


Epoch 7/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:39.45 [info     ] FQE_20220420183938: epoch=7 step=1239 epoch=7 metrics={'time_sample_batch': 0.0001631995378914526, 'time_algorithm_update': 0.0049475290007510424, 'loss': 0.004286749449579603, 'time_step': 0.005185233671112922, 'init_value': -0.6790423393249512, 'ave_value': -0.534225294521979, 'soft_opc': nan} step=1239




2022-04-20 18:39.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183938/model_1239.pt


Epoch 8/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:39.46 [info     ] FQE_20220420183938: epoch=8 step=1416 epoch=8 metrics={'time_sample_batch': 0.00016297054829570533, 'time_algorithm_update': 0.005094458154365841, 'loss': 0.004134747490423632, 'time_step': 0.0053323514043948075, 'init_value': -0.6530559062957764, 'ave_value': -0.5026944598367622, 'soft_opc': nan} step=1416




2022-04-20 18:39.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183938/model_1416.pt


Epoch 9/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:39.47 [info     ] FQE_20220420183938: epoch=9 step=1593 epoch=9 metrics={'time_sample_batch': 0.00016188890920520503, 'time_algorithm_update': 0.004926382485082594, 'loss': 0.0040540545080435895, 'time_step': 0.005161125107673602, 'init_value': -0.6673725247383118, 'ave_value': -0.5306783722685622, 'soft_opc': nan} step=1593




2022-04-20 18:39.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183938/model_1593.pt


Epoch 10/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:39.48 [info     ] FQE_20220420183938: epoch=10 step=1770 epoch=10 metrics={'time_sample_batch': 0.0001596838740979211, 'time_algorithm_update': 0.004116447631922145, 'loss': 0.004256869865565608, 'time_step': 0.0043476813257077316, 'init_value': -0.6447571516036987, 'ave_value': -0.5095495901755742, 'soft_opc': nan} step=1770




2022-04-20 18:39.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183938/model_1770.pt


Epoch 11/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:39.49 [info     ] FQE_20220420183938: epoch=11 step=1947 epoch=11 metrics={'time_sample_batch': 0.000164064310364804, 'time_algorithm_update': 0.004994152629442808, 'loss': 0.004348577197514854, 'time_step': 0.005233577415768036, 'init_value': -0.6590366363525391, 'ave_value': -0.49703963346074564, 'soft_opc': nan} step=1947




2022-04-20 18:39.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183938/model_1947.pt


Epoch 12/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:39.50 [info     ] FQE_20220420183938: epoch=12 step=2124 epoch=12 metrics={'time_sample_batch': 0.00016327901075115312, 'time_algorithm_update': 0.005100328370002703, 'loss': 0.004526160264093071, 'time_step': 0.005341061091018936, 'init_value': -0.7142197489738464, 'ave_value': -0.5318281270915994, 'soft_opc': nan} step=2124




2022-04-20 18:39.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183938/model_2124.pt


Epoch 13/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:39.51 [info     ] FQE_20220420183938: epoch=13 step=2301 epoch=13 metrics={'time_sample_batch': 0.00016017822222521075, 'time_algorithm_update': 0.004980328392847783, 'loss': 0.004709191123776053, 'time_step': 0.005216861175278486, 'init_value': -0.7263469696044922, 'ave_value': -0.5207518380426162, 'soft_opc': nan} step=2301




2022-04-20 18:39.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183938/model_2301.pt


Epoch 14/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:39.52 [info     ] FQE_20220420183938: epoch=14 step=2478 epoch=14 metrics={'time_sample_batch': 0.00016232533643474687, 'time_algorithm_update': 0.0050501082576600844, 'loss': 0.005193442219373702, 'time_step': 0.005284302652218921, 'init_value': -0.7673892378807068, 'ave_value': -0.5278386140745637, 'soft_opc': nan} step=2478




2022-04-20 18:39.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183938/model_2478.pt


Epoch 15/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:39.53 [info     ] FQE_20220420183938: epoch=15 step=2655 epoch=15 metrics={'time_sample_batch': 0.0001601795692228328, 'time_algorithm_update': 0.005066055362507448, 'loss': 0.005743932091738861, 'time_step': 0.005300399273802331, 'init_value': -0.78510981798172, 'ave_value': -0.5447072909473701, 'soft_opc': nan} step=2655




2022-04-20 18:39.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183938/model_2655.pt


Epoch 16/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:39.54 [info     ] FQE_20220420183938: epoch=16 step=2832 epoch=16 metrics={'time_sample_batch': 0.00016917886033570025, 'time_algorithm_update': 0.005045273883194573, 'loss': 0.006499030061676509, 'time_step': 0.005286688185007559, 'init_value': -0.889411449432373, 'ave_value': -0.6005822998983366, 'soft_opc': nan} step=2832




2022-04-20 18:39.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183938/model_2832.pt


Epoch 17/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:39.55 [info     ] FQE_20220420183938: epoch=17 step=3009 epoch=17 metrics={'time_sample_batch': 0.00015914642204672603, 'time_algorithm_update': 0.005018184414017672, 'loss': 0.0071199031870307055, 'time_step': 0.00525059942471779, 'init_value': -0.9837706089019775, 'ave_value': -0.6599496188397344, 'soft_opc': nan} step=3009




2022-04-20 18:39.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183938/model_3009.pt


Epoch 18/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:39.56 [info     ] FQE_20220420183938: epoch=18 step=3186 epoch=18 metrics={'time_sample_batch': 0.00015944006752833135, 'time_algorithm_update': 0.005025066224868688, 'loss': 0.007583300221979745, 'time_step': 0.00525885517314329, 'init_value': -0.9861858487129211, 'ave_value': -0.6343117184165094, 'soft_opc': nan} step=3186




2022-04-20 18:39.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183938/model_3186.pt


Epoch 19/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:39.57 [info     ] FQE_20220420183938: epoch=19 step=3363 epoch=19 metrics={'time_sample_batch': 0.00015884065358652232, 'time_algorithm_update': 0.004475888559373759, 'loss': 0.008561724187848343, 'time_step': 0.004705500468022406, 'init_value': -1.0729889869689941, 'ave_value': -0.6660486554497623, 'soft_opc': nan} step=3363




2022-04-20 18:39.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183938/model_3363.pt


Epoch 20/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:39.58 [info     ] FQE_20220420183938: epoch=20 step=3540 epoch=20 metrics={'time_sample_batch': 0.00016481189404503774, 'time_algorithm_update': 0.005157473397120244, 'loss': 0.009482199198561116, 'time_step': 0.005399130158505197, 'init_value': -1.0823134183883667, 'ave_value': -0.643973386674202, 'soft_opc': nan} step=3540




2022-04-20 18:39.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183938/model_3540.pt


Epoch 21/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:39.59 [info     ] FQE_20220420183938: epoch=21 step=3717 epoch=21 metrics={'time_sample_batch': 0.00016399291949083575, 'time_algorithm_update': 0.005132009754073148, 'loss': 0.010098890142283786, 'time_step': 0.005369120398483708, 'init_value': -1.1643673181533813, 'ave_value': -0.6723995911997852, 'soft_opc': nan} step=3717




2022-04-20 18:39.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183938/model_3717.pt


Epoch 22/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:40.00 [info     ] FQE_20220420183938: epoch=22 step=3894 epoch=22 metrics={'time_sample_batch': 0.0001661386867027498, 'time_algorithm_update': 0.0049805061965338925, 'loss': 0.010847087164204253, 'time_step': 0.005221152709702314, 'init_value': -1.1668235063552856, 'ave_value': -0.6553893803498587, 'soft_opc': nan} step=3894




2022-04-20 18:40.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183938/model_3894.pt


Epoch 23/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:40.01 [info     ] FQE_20220420183938: epoch=23 step=4071 epoch=23 metrics={'time_sample_batch': 0.00015953839835474047, 'time_algorithm_update': 0.005049071069491111, 'loss': 0.012360583884920399, 'time_step': 0.005280415217081706, 'init_value': -1.2814401388168335, 'ave_value': -0.7232477688300494, 'soft_opc': nan} step=4071




2022-04-20 18:40.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183938/model_4071.pt


Epoch 24/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:40.02 [info     ] FQE_20220420183938: epoch=24 step=4248 epoch=24 metrics={'time_sample_batch': 0.00016174747445489053, 'time_algorithm_update': 0.004915614586091984, 'loss': 0.012747106167335211, 'time_step': 0.005152110999586892, 'init_value': -1.3197914361953735, 'ave_value': -0.7252278210231671, 'soft_opc': nan} step=4248




2022-04-20 18:40.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183938/model_4248.pt


Epoch 25/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:40.03 [info     ] FQE_20220420183938: epoch=25 step=4425 epoch=25 metrics={'time_sample_batch': 0.00015989804671982588, 'time_algorithm_update': 0.005029150321658722, 'loss': 0.01358417872376319, 'time_step': 0.00526309552165748, 'init_value': -1.4082013368606567, 'ave_value': -0.7684045883292729, 'soft_opc': nan} step=4425




2022-04-20 18:40.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183938/model_4425.pt


Epoch 26/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:40.04 [info     ] FQE_20220420183938: epoch=26 step=4602 epoch=26 metrics={'time_sample_batch': 0.00016671520168498412, 'time_algorithm_update': 0.0049633481408243125, 'loss': 0.014719650766280833, 'time_step': 0.00520528507771465, 'init_value': -1.4530401229858398, 'ave_value': -0.7754753820564564, 'soft_opc': nan} step=4602




2022-04-20 18:40.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183938/model_4602.pt


Epoch 27/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:40.05 [info     ] FQE_20220420183938: epoch=27 step=4779 epoch=27 metrics={'time_sample_batch': 0.00015838806238551597, 'time_algorithm_update': 0.004678171233268781, 'loss': 0.0161475915709079, 'time_step': 0.004910180797684664, 'init_value': -1.5061956644058228, 'ave_value': -0.7947758658556967, 'soft_opc': nan} step=4779




2022-04-20 18:40.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183938/model_4779.pt


Epoch 28/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:40.06 [info     ] FQE_20220420183938: epoch=28 step=4956 epoch=28 metrics={'time_sample_batch': 0.00016222431161309366, 'time_algorithm_update': 0.004807429125080001, 'loss': 0.017104633034754145, 'time_step': 0.005042480210126456, 'init_value': -1.6579508781433105, 'ave_value': -0.8974386702602108, 'soft_opc': nan} step=4956




2022-04-20 18:40.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183938/model_4956.pt


Epoch 29/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:40.07 [info     ] FQE_20220420183938: epoch=29 step=5133 epoch=29 metrics={'time_sample_batch': 0.00016345950843250684, 'time_algorithm_update': 0.004960990894985738, 'loss': 0.018614311548644454, 'time_step': 0.005201749208956789, 'init_value': -1.7096282243728638, 'ave_value': -0.8941213014068546, 'soft_opc': nan} step=5133




2022-04-20 18:40.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183938/model_5133.pt


Epoch 30/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:40.08 [info     ] FQE_20220420183938: epoch=30 step=5310 epoch=30 metrics={'time_sample_batch': 0.00016774161387298067, 'time_algorithm_update': 0.005020349039196295, 'loss': 0.019663332818775824, 'time_step': 0.005261865712828555, 'init_value': -1.7924013137817383, 'ave_value': -0.9207222945395264, 'soft_opc': nan} step=5310




2022-04-20 18:40.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183938/model_5310.pt


Epoch 31/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:40.09 [info     ] FQE_20220420183938: epoch=31 step=5487 epoch=31 metrics={'time_sample_batch': 0.00016560392864679887, 'time_algorithm_update': 0.004978538233008089, 'loss': 0.020628853889818877, 'time_step': 0.005220321612169514, 'init_value': -1.7629667520523071, 'ave_value': -0.8743431776817944, 'soft_opc': nan} step=5487




2022-04-20 18:40.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183938/model_5487.pt


Epoch 32/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:40.10 [info     ] FQE_20220420183938: epoch=32 step=5664 epoch=32 metrics={'time_sample_batch': 0.00016271057775465108, 'time_algorithm_update': 0.005052751066994533, 'loss': 0.021786841564538157, 'time_step': 0.005289638109799833, 'init_value': -1.9034854173660278, 'ave_value': -0.9522341357079802, 'soft_opc': nan} step=5664




2022-04-20 18:40.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183938/model_5664.pt


Epoch 33/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:40.11 [info     ] FQE_20220420183938: epoch=33 step=5841 epoch=33 metrics={'time_sample_batch': 0.00016445628667281846, 'time_algorithm_update': 0.005059789129569706, 'loss': 0.022769468595105417, 'time_step': 0.005296639803439211, 'init_value': -2.000410795211792, 'ave_value': -1.0141299348212667, 'soft_opc': nan} step=5841




2022-04-20 18:40.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183938/model_5841.pt


Epoch 34/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:40.12 [info     ] FQE_20220420183938: epoch=34 step=6018 epoch=34 metrics={'time_sample_batch': 0.0001659460660427977, 'time_algorithm_update': 0.005037823639347055, 'loss': 0.023881396941056655, 'time_step': 0.005280090590654794, 'init_value': -2.1228511333465576, 'ave_value': -1.1032302306813968, 'soft_opc': nan} step=6018




2022-04-20 18:40.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183938/model_6018.pt


Epoch 35/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:40.13 [info     ] FQE_20220420183938: epoch=35 step=6195 epoch=35 metrics={'time_sample_batch': 0.000164410488753669, 'time_algorithm_update': 0.005055679439824853, 'loss': 0.024556726829601336, 'time_step': 0.005296961735870878, 'init_value': -2.2052791118621826, 'ave_value': -1.1832298355764366, 'soft_opc': nan} step=6195




2022-04-20 18:40.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183938/model_6195.pt


Epoch 36/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:40.14 [info     ] FQE_20220420183938: epoch=36 step=6372 epoch=36 metrics={'time_sample_batch': 0.00016469874624478614, 'time_algorithm_update': 0.004242534691331077, 'loss': 0.026770247240540293, 'time_step': 0.004480312099564547, 'init_value': -2.2629125118255615, 'ave_value': -1.2060250919524793, 'soft_opc': nan} step=6372




2022-04-20 18:40.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183938/model_6372.pt


Epoch 37/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:40.15 [info     ] FQE_20220420183938: epoch=37 step=6549 epoch=37 metrics={'time_sample_batch': 0.0001658005902996171, 'time_algorithm_update': 0.005022492112412964, 'loss': 0.027690499075846274, 'time_step': 0.005262085273440948, 'init_value': -2.358825922012329, 'ave_value': -1.2699493382618323, 'soft_opc': nan} step=6549




2022-04-20 18:40.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183938/model_6549.pt


Epoch 38/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:40.16 [info     ] FQE_20220420183938: epoch=38 step=6726 epoch=38 metrics={'time_sample_batch': 0.00015975795896713344, 'time_algorithm_update': 0.005034553129120735, 'loss': 0.028229399478341858, 'time_step': 0.005270510743566825, 'init_value': -2.510303258895874, 'ave_value': -1.3925617437138482, 'soft_opc': nan} step=6726




2022-04-20 18:40.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183938/model_6726.pt


Epoch 39/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:40.17 [info     ] FQE_20220420183938: epoch=39 step=6903 epoch=39 metrics={'time_sample_batch': 0.0001664565781415519, 'time_algorithm_update': 0.005029923498293775, 'loss': 0.029422614908919245, 'time_step': 0.005271040113632289, 'init_value': -2.4932844638824463, 'ave_value': -1.3691333063573272, 'soft_opc': nan} step=6903




2022-04-20 18:40.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183938/model_6903.pt


Epoch 40/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:40.18 [info     ] FQE_20220420183938: epoch=40 step=7080 epoch=40 metrics={'time_sample_batch': 0.00016708832002628994, 'time_algorithm_update': 0.005080384723210739, 'loss': 0.03087760644996444, 'time_step': 0.0053218677219024485, 'init_value': -2.5256948471069336, 'ave_value': -1.3526906333572872, 'soft_opc': nan} step=7080




2022-04-20 18:40.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183938/model_7080.pt


Epoch 41/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:40.19 [info     ] FQE_20220420183938: epoch=41 step=7257 epoch=41 metrics={'time_sample_batch': 0.00016718395685745498, 'time_algorithm_update': 0.005125501061563438, 'loss': 0.03207015374998988, 'time_step': 0.005366652698840125, 'init_value': -2.6545162200927734, 'ave_value': -1.4405360243565686, 'soft_opc': nan} step=7257




2022-04-20 18:40.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183938/model_7257.pt


Epoch 42/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:40.20 [info     ] FQE_20220420183938: epoch=42 step=7434 epoch=42 metrics={'time_sample_batch': 0.00016469470525192002, 'time_algorithm_update': 0.005134459942747644, 'loss': 0.03475199149274346, 'time_step': 0.005377777552200576, 'init_value': -2.6598596572875977, 'ave_value': -1.4099366154186093, 'soft_opc': nan} step=7434




2022-04-20 18:40.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183938/model_7434.pt


Epoch 43/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:40.21 [info     ] FQE_20220420183938: epoch=43 step=7611 epoch=43 metrics={'time_sample_batch': 0.00016503953664316295, 'time_algorithm_update': 0.005043014968182408, 'loss': 0.035933703669520506, 'time_step': 0.00528248824642203, 'init_value': -2.601175308227539, 'ave_value': -1.2896236666755097, 'soft_opc': nan} step=7611




2022-04-20 18:40.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183938/model_7611.pt


Epoch 44/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:40.22 [info     ] FQE_20220420183938: epoch=44 step=7788 epoch=44 metrics={'time_sample_batch': 0.0001629665073028392, 'time_algorithm_update': 0.005040884017944336, 'loss': 0.03702046570044516, 'time_step': 0.005276293404358255, 'init_value': -2.7671635150909424, 'ave_value': -1.4027358912804104, 'soft_opc': nan} step=7788




2022-04-20 18:40.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183938/model_7788.pt


Epoch 45/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:40.23 [info     ] FQE_20220420183938: epoch=45 step=7965 epoch=45 metrics={'time_sample_batch': 0.00016340024053713696, 'time_algorithm_update': 0.004140910455736064, 'loss': 0.037619240313642104, 'time_step': 0.004379230704011217, 'init_value': -2.9485456943511963, 'ave_value': -1.4886662069294188, 'soft_opc': nan} step=7965




2022-04-20 18:40.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183938/model_7965.pt


Epoch 46/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:40.24 [info     ] FQE_20220420183938: epoch=46 step=8142 epoch=46 metrics={'time_sample_batch': 0.0001612517793299788, 'time_algorithm_update': 0.005087887499965517, 'loss': 0.0388688618931551, 'time_step': 0.005320086991046108, 'init_value': -3.0379390716552734, 'ave_value': -1.5812125209170778, 'soft_opc': nan} step=8142




2022-04-20 18:40.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183938/model_8142.pt


Epoch 47/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:40.25 [info     ] FQE_20220420183938: epoch=47 step=8319 epoch=47 metrics={'time_sample_batch': 0.0001656524205611924, 'time_algorithm_update': 0.005031504873502053, 'loss': 0.04097041669288534, 'time_step': 0.005273351561551714, 'init_value': -3.1191766262054443, 'ave_value': -1.5975658965540362, 'soft_opc': nan} step=8319




2022-04-20 18:40.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183938/model_8319.pt


Epoch 48/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:40.26 [info     ] FQE_20220420183938: epoch=48 step=8496 epoch=48 metrics={'time_sample_batch': 0.00016666670977059057, 'time_algorithm_update': 0.0050943827224990065, 'loss': 0.04139031642011372, 'time_step': 0.005334339572884942, 'init_value': -3.114004373550415, 'ave_value': -1.5876966147697544, 'soft_opc': nan} step=8496




2022-04-20 18:40.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183938/model_8496.pt


Epoch 49/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:40.27 [info     ] FQE_20220420183938: epoch=49 step=8673 epoch=49 metrics={'time_sample_batch': 0.00015947104847363833, 'time_algorithm_update': 0.004889379113407458, 'loss': 0.04169564539340383, 'time_step': 0.0051233970512778075, 'init_value': -3.2276268005371094, 'ave_value': -1.6845711624881885, 'soft_opc': nan} step=8673




2022-04-20 18:40.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183938/model_8673.pt


Epoch 50/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 18:40.28 [info     ] FQE_20220420183938: epoch=50 step=8850 epoch=50 metrics={'time_sample_batch': 0.00016075877820031118, 'time_algorithm_update': 0.0049242003489348846, 'loss': 0.04292919649545229, 'time_step': 0.005160013834635417, 'init_value': -3.351572275161743, 'ave_value': -1.7700567241579115, 'soft_opc': nan} step=8850




2022-04-20 18:40.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183938/model_8850.pt
start
[ 0.00000000e+00  7.95731469e+08  1.43210892e-01 -3.25999953e-02
 -1.38000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.28049971e-01  6.00000000e-01  4.67532035e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.87189108e-01  2.46000047e-02
 -8.00013420e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.61927906e-02  2.08952959e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.53789108e-01  1.32000047e-02
  8.79998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -9.71586383e-02 -6.00000000e-01 -5.33093061e-02]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.39489108e-01 -4.75999953e-02
 -9.30001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  1.41298638e-01  5.43892365e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.9573146

start
[ 0.00000000e+00  7.95731469e+08  5.32108923e-02 -3.61999953e-02
 -7.30001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  6.00000000e-01  6.00000000e-01]
Read chunk # 239 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.41110892e-01  3.18000047e-02
  1.19998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -6.00000000e-01  5.97482117e-01]
Read chunk # 240 out of 4999
torch.Size([44400, 6])
2022-04-20 18:40.28 [debug    ] RoundIterator is selected.
2022-04-20 18:40.28 [info     ] Directory is created at d3rlpy_logs/FQE_20220420184028
2022-04-20 18:40.28 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 18:40.28 [debug    ] Building models...
2022-04-20 18:40.28 [debug    ] Models have been built.
2022-04-20 18:40.28 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420184028/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batc

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 18:40.31 [info     ] FQE_20220420184028: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00016302147576975268, 'time_algorithm_update': 0.004975664754246556, 'loss': 0.03026827357622773, 'time_step': 0.005212621633396592, 'init_value': -1.3560059070587158, 'ave_value': -1.3244881375415904, 'soft_opc': nan} step=344




2022-04-20 18:40.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184028/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:40.32 [info     ] FQE_20220420184028: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00016594695490460064, 'time_algorithm_update': 0.004564532013826592, 'loss': 0.02544353733131619, 'time_step': 0.004804202983545703, 'init_value': -2.181534767150879, 'ave_value': -2.1458133231143695, 'soft_opc': nan} step=688




2022-04-20 18:40.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184028/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:40.34 [info     ] FQE_20220420184028: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00016727697017580965, 'time_algorithm_update': 0.004987231520719306, 'loss': 0.029609869345264554, 'time_step': 0.005228393299635066, 'init_value': -3.3091044425964355, 'ave_value': -3.272192329034075, 'soft_opc': nan} step=1032




2022-04-20 18:40.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184028/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:40.36 [info     ] FQE_20220420184028: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00016354059064111045, 'time_algorithm_update': 0.005027561686759771, 'loss': 0.03398092704294466, 'time_step': 0.005263458157694617, 'init_value': -3.957350730895996, 'ave_value': -3.9351661467068904, 'soft_opc': nan} step=1376




2022-04-20 18:40.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184028/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:40.38 [info     ] FQE_20220420184028: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00016599824262219807, 'time_algorithm_update': 0.005000542762667634, 'loss': 0.04437423928987321, 'time_step': 0.005241383646809777, 'init_value': -4.79277229309082, 'ave_value': -4.784108322637307, 'soft_opc': nan} step=1720




2022-04-20 18:40.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184028/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:40.40 [info     ] FQE_20220420184028: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00016746410103731378, 'time_algorithm_update': 0.004567470661429472, 'loss': 0.05340529706729793, 'time_step': 0.004811828219613364, 'init_value': -5.635568141937256, 'ave_value': -5.640014931697164, 'soft_opc': nan} step=2064




2022-04-20 18:40.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184028/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:40.42 [info     ] FQE_20220420184028: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00016494476517965628, 'time_algorithm_update': 0.0049913976081582, 'loss': 0.06728295029985697, 'time_step': 0.005231347888015037, 'init_value': -6.534799575805664, 'ave_value': -6.572591057008652, 'soft_opc': nan} step=2408




2022-04-20 18:40.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184028/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:40.44 [info     ] FQE_20220420184028: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00016654022904329522, 'time_algorithm_update': 0.004977304575055144, 'loss': 0.08464745624900558, 'time_step': 0.005221261534579965, 'init_value': -7.2884416580200195, 'ave_value': -7.343584915852543, 'soft_opc': nan} step=2752




2022-04-20 18:40.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184028/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:40.46 [info     ] FQE_20220420184028: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00017091909120249194, 'time_algorithm_update': 0.00500631055166555, 'loss': 0.10045365919900495, 'time_step': 0.005254262408544851, 'init_value': -7.761735439300537, 'ave_value': -7.8505900894686045, 'soft_opc': nan} step=3096




2022-04-20 18:40.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184028/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:40.48 [info     ] FQE_20220420184028: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00016757430032242175, 'time_algorithm_update': 0.005009124445360761, 'loss': 0.11964826379512805, 'time_step': 0.005249562651612038, 'init_value': -8.699230194091797, 'ave_value': -8.872026742338665, 'soft_opc': nan} step=3440




2022-04-20 18:40.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184028/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:40.50 [info     ] FQE_20220420184028: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00016455733498861624, 'time_algorithm_update': 0.004540226487226264, 'loss': 0.13740872355113024, 'time_step': 0.004778785067935323, 'init_value': -9.29030990600586, 'ave_value': -9.614554896034502, 'soft_opc': nan} step=3784




2022-04-20 18:40.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184028/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:40.52 [info     ] FQE_20220420184028: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00016662894293319349, 'time_algorithm_update': 0.005040565895479779, 'loss': 0.15820516295579456, 'time_step': 0.0052798501280851146, 'init_value': -9.88813304901123, 'ave_value': -10.334461755198971, 'soft_opc': nan} step=4128




2022-04-20 18:40.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184028/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:40.54 [info     ] FQE_20220420184028: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00016754380492276923, 'time_algorithm_update': 0.005014609458834626, 'loss': 0.1714580493361884, 'time_step': 0.005257598189420478, 'init_value': -10.128669738769531, 'ave_value': -10.797995826554576, 'soft_opc': nan} step=4472




2022-04-20 18:40.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184028/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:40.56 [info     ] FQE_20220420184028: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00016699973926987758, 'time_algorithm_update': 0.005011636157368504, 'loss': 0.1895768599920408, 'time_step': 0.0052545514217642855, 'init_value': -10.602008819580078, 'ave_value': -11.51788010786514, 'soft_opc': nan} step=4816




2022-04-20 18:40.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184028/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:40.57 [info     ] FQE_20220420184028: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00016256057938864065, 'time_algorithm_update': 0.004677254100178563, 'loss': 0.20609926402590475, 'time_step': 0.004914916531984196, 'init_value': -10.970743179321289, 'ave_value': -12.106832706062795, 'soft_opc': nan} step=5160




2022-04-20 18:40.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184028/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:40.59 [info     ] FQE_20220420184028: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.0001615729442862577, 'time_algorithm_update': 0.0032789152722026028, 'loss': 0.22130993690320053, 'time_step': 0.0035125744897265766, 'init_value': -11.283939361572266, 'ave_value': -12.689548545594638, 'soft_opc': nan} step=5504




2022-04-20 18:40.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184028/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:41.00 [info     ] FQE_20220420184028: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00015960807024046432, 'time_algorithm_update': 0.003445544908213061, 'loss': 0.23368504350506808, 'time_step': 0.0036779274774152177, 'init_value': -11.442333221435547, 'ave_value': -13.065192323460806, 'soft_opc': nan} step=5848




2022-04-20 18:41.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184028/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:41.01 [info     ] FQE_20220420184028: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00016350940216419308, 'time_algorithm_update': 0.0034255156683367354, 'loss': 0.24281068185214386, 'time_step': 0.0036606920319934223, 'init_value': -11.71136474609375, 'ave_value': -13.576016675779043, 'soft_opc': nan} step=6192




2022-04-20 18:41.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184028/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:41.03 [info     ] FQE_20220420184028: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00015766468158988066, 'time_algorithm_update': 0.003420890763748524, 'loss': 0.2591788981672983, 'time_step': 0.0036499084428299306, 'init_value': -12.13492202758789, 'ave_value': -14.253209945012454, 'soft_opc': nan} step=6536




2022-04-20 18:41.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184028/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:41.04 [info     ] FQE_20220420184028: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.0001595158909642419, 'time_algorithm_update': 0.0034008178600045138, 'loss': 0.2696030126278137, 'time_step': 0.003631318031355392, 'init_value': -12.274450302124023, 'ave_value': -14.657561176892806, 'soft_opc': nan} step=6880




2022-04-20 18:41.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184028/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:41.06 [info     ] FQE_20220420184028: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00015958589176798976, 'time_algorithm_update': 0.0034710404484771017, 'loss': 0.28395042701645995, 'time_step': 0.0037012529927630757, 'init_value': -12.465523719787598, 'ave_value': -15.057178908227398, 'soft_opc': nan} step=7224




2022-04-20 18:41.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184028/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:41.07 [info     ] FQE_20220420184028: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00016120284102683844, 'time_algorithm_update': 0.0034830292990041334, 'loss': 0.28962453123355325, 'time_step': 0.0037175243677094925, 'init_value': -12.906401634216309, 'ave_value': -15.748958158328593, 'soft_opc': nan} step=7568




2022-04-20 18:41.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184028/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:41.08 [info     ] FQE_20220420184028: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00016007173893063567, 'time_algorithm_update': 0.003475907930108004, 'loss': 0.3010561633963398, 'time_step': 0.003707980000695517, 'init_value': -13.194236755371094, 'ave_value': -16.155314237676425, 'soft_opc': nan} step=7912




2022-04-20 18:41.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184028/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:41.10 [info     ] FQE_20220420184028: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.0001614870027054188, 'time_algorithm_update': 0.0034620623255884925, 'loss': 0.30998814974499994, 'time_step': 0.0036976753279220225, 'init_value': -13.613716125488281, 'ave_value': -16.771976795098702, 'soft_opc': nan} step=8256




2022-04-20 18:41.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184028/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:41.11 [info     ] FQE_20220420184028: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00016232978465945222, 'time_algorithm_update': 0.003524640271830004, 'loss': 0.31313617842618463, 'time_step': 0.0037610454614772355, 'init_value': -14.113716125488281, 'ave_value': -17.42456484350485, 'soft_opc': nan} step=8600




2022-04-20 18:41.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184028/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:41.13 [info     ] FQE_20220420184028: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00016178779823835507, 'time_algorithm_update': 0.0034105652986570847, 'loss': 0.32434978337178744, 'time_step': 0.0036448143249334292, 'init_value': -14.582533836364746, 'ave_value': -18.105935498483078, 'soft_opc': nan} step=8944




2022-04-20 18:41.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184028/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:41.14 [info     ] FQE_20220420184028: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00015797310097273007, 'time_algorithm_update': 0.003399923790332883, 'loss': 0.33194943147083356, 'time_step': 0.0036286690900492113, 'init_value': -14.635360717773438, 'ave_value': -18.410977812262402, 'soft_opc': nan} step=9288




2022-04-20 18:41.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184028/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:41.15 [info     ] FQE_20220420184028: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00016121462334034054, 'time_algorithm_update': 0.003527010596075723, 'loss': 0.3326029953936678, 'time_step': 0.0037633229133694672, 'init_value': -14.62710189819336, 'ave_value': -18.780676267789005, 'soft_opc': nan} step=9632




2022-04-20 18:41.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184028/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:41.17 [info     ] FQE_20220420184028: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00016245384548985682, 'time_algorithm_update': 0.0034599602222442627, 'loss': 0.3247387027584536, 'time_step': 0.0036965906620025635, 'init_value': -14.35982608795166, 'ave_value': -18.94504543974249, 'soft_opc': nan} step=9976




2022-04-20 18:41.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184028/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:41.18 [info     ] FQE_20220420184028: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00016113561253214991, 'time_algorithm_update': 0.003436479457589083, 'loss': 0.32871612065016875, 'time_step': 0.003669824018034824, 'init_value': -14.341480255126953, 'ave_value': -19.295351619696294, 'soft_opc': nan} step=10320




2022-04-20 18:41.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184028/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:41.19 [info     ] FQE_20220420184028: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00015959005023157872, 'time_algorithm_update': 0.0034890673881353335, 'loss': 0.3319776975441464, 'time_step': 0.0037235673083815465, 'init_value': -14.753233909606934, 'ave_value': -19.969549325143348, 'soft_opc': nan} step=10664




2022-04-20 18:41.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184028/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:41.21 [info     ] FQE_20220420184028: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00016139828881552053, 'time_algorithm_update': 0.0034880430199379143, 'loss': 0.3378040056762307, 'time_step': 0.003724645736605622, 'init_value': -14.854281425476074, 'ave_value': -20.36636075017437, 'soft_opc': nan} step=11008




2022-04-20 18:41.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184028/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:41.22 [info     ] FQE_20220420184028: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00015864192053329114, 'time_algorithm_update': 0.003454499466474666, 'loss': 0.33902011623302863, 'time_step': 0.0036846288414888605, 'init_value': -14.814004898071289, 'ave_value': -20.65547017841432, 'soft_opc': nan} step=11352




2022-04-20 18:41.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184028/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:41.24 [info     ] FQE_20220420184028: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00015866756439208984, 'time_algorithm_update': 0.003434867359871088, 'loss': 0.34262971375354156, 'time_step': 0.0036624988844228346, 'init_value': -14.743056297302246, 'ave_value': -20.893029324213664, 'soft_opc': nan} step=11696




2022-04-20 18:41.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184028/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:41.25 [info     ] FQE_20220420184028: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00016104897787404615, 'time_algorithm_update': 0.0034386751263640646, 'loss': 0.34745016865172357, 'time_step': 0.003672158302262772, 'init_value': -15.168813705444336, 'ave_value': -21.62162186670545, 'soft_opc': nan} step=12040




2022-04-20 18:41.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184028/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:41.26 [info     ] FQE_20220420184028: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00016127977260323457, 'time_algorithm_update': 0.0034841146580008573, 'loss': 0.3430243311558179, 'time_step': 0.0037220279837763587, 'init_value': -14.995285034179688, 'ave_value': -21.688660741138886, 'soft_opc': nan} step=12384




2022-04-20 18:41.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184028/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:41.28 [info     ] FQE_20220420184028: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00016192572061405627, 'time_algorithm_update': 0.00346470502919929, 'loss': 0.3498957721234927, 'time_step': 0.0037028034066045007, 'init_value': -15.134162902832031, 'ave_value': -22.120034757587085, 'soft_opc': nan} step=12728




2022-04-20 18:41.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184028/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:41.29 [info     ] FQE_20220420184028: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00016612022422080817, 'time_algorithm_update': 0.0034651312717171603, 'loss': 0.3427044639667106, 'time_step': 0.0037069895932840746, 'init_value': -15.008037567138672, 'ave_value': -22.133059105002697, 'soft_opc': nan} step=13072




2022-04-20 18:41.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184028/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:41.31 [info     ] FQE_20220420184028: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.0001617933428564737, 'time_algorithm_update': 0.0034766682358675226, 'loss': 0.34152836599495523, 'time_step': 0.003713200951731482, 'init_value': -15.184529304504395, 'ave_value': -22.60745285576679, 'soft_opc': nan} step=13416




2022-04-20 18:41.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184028/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:41.32 [info     ] FQE_20220420184028: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.0001593793547430704, 'time_algorithm_update': 0.002989784922710685, 'loss': 0.3453419702614896, 'time_step': 0.0032208229220190713, 'init_value': -15.304802894592285, 'ave_value': -22.759244379558943, 'soft_opc': nan} step=13760




2022-04-20 18:41.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184028/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:41.33 [info     ] FQE_20220420184028: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00016139828881552053, 'time_algorithm_update': 0.003464143636614777, 'loss': 0.3549230402866162, 'time_step': 0.003698845242345056, 'init_value': -15.974581718444824, 'ave_value': -23.5236962367554, 'soft_opc': nan} step=14104




2022-04-20 18:41.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184028/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:41.35 [info     ] FQE_20220420184028: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00016071838001872218, 'time_algorithm_update': 0.0035072315570920014, 'loss': 0.3670274869289769, 'time_step': 0.003741728705029155, 'init_value': -16.262714385986328, 'ave_value': -23.92632656336913, 'soft_opc': nan} step=14448




2022-04-20 18:41.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184028/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:41.36 [info     ] FQE_20220420184028: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.0001611931379451308, 'time_algorithm_update': 0.0033717848533807797, 'loss': 0.38292205860556733, 'time_step': 0.0036049970360689386, 'init_value': -16.64657211303711, 'ave_value': -24.427378971468205, 'soft_opc': nan} step=14792




2022-04-20 18:41.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184028/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:41.37 [info     ] FQE_20220420184028: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00016104828479678133, 'time_algorithm_update': 0.003597699625547542, 'loss': 0.3922160423068373, 'time_step': 0.003832558559816937, 'init_value': -17.009239196777344, 'ave_value': -24.79159927730373, 'soft_opc': nan} step=15136




2022-04-20 18:41.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184028/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:41.39 [info     ] FQE_20220420184028: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00015964341718097065, 'time_algorithm_update': 0.0034757755523504214, 'loss': 0.403794624701922, 'time_step': 0.003707189199536346, 'init_value': -16.55323600769043, 'ave_value': -24.425562459739712, 'soft_opc': nan} step=15480




2022-04-20 18:41.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184028/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:41.40 [info     ] FQE_20220420184028: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00016075164772743402, 'time_algorithm_update': 0.0034704963828242103, 'loss': 0.4009626075018977, 'time_step': 0.0037021976570750393, 'init_value': -16.584705352783203, 'ave_value': -24.568502121608336, 'soft_opc': nan} step=15824




2022-04-20 18:41.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184028/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:41.42 [info     ] FQE_20220420184028: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00016083273776741915, 'time_algorithm_update': 0.003513283507768498, 'loss': 0.40630457287134475, 'time_step': 0.0037499562252399535, 'init_value': -17.0889892578125, 'ave_value': -24.94815860389992, 'soft_opc': nan} step=16168




2022-04-20 18:41.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184028/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:41.43 [info     ] FQE_20220420184028: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.0001598492611286252, 'time_algorithm_update': 0.0035209759723308473, 'loss': 0.4198159598203939, 'time_step': 0.0037568897701973137, 'init_value': -17.407764434814453, 'ave_value': -25.30303425222954, 'soft_opc': nan} step=16512




2022-04-20 18:41.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184028/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:41.45 [info     ] FQE_20220420184028: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00015842082888581032, 'time_algorithm_update': 0.0035568752954172533, 'loss': 0.43018033868243355, 'time_step': 0.003789079743762349, 'init_value': -17.569854736328125, 'ave_value': -25.47235141410786, 'soft_opc': nan} step=16856




2022-04-20 18:41.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184028/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:41.46 [info     ] FQE_20220420184028: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00016364940377168878, 'time_algorithm_update': 0.003549437883288361, 'loss': 0.4361929988954216, 'time_step': 0.0037888364736423934, 'init_value': -17.528228759765625, 'ave_value': -25.43079855882202, 'soft_opc': nan} step=17200




2022-04-20 18:41.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184028/model_17200.pt
search iteration:  38
using hyper params:  [0.007660988105595241, 0.005623202042325921, 9.960311503796304e-05, 5]
2022-04-20 18:41.46 [debug    ] RoundIterator is selected.
2022-04-20 18:41.46 [info     ] Directory is created at d3rlpy_logs/TD3PlusBC_20220420184146
2022-04-20 18:41.46 [debug    ] Fitting scaler...              scaler=standard
2022-04-20 18:41.46 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 18:41.46 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 18:41.46 [info     ] Parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184146/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.00766098810559

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:41.49 [info     ] TD3PlusBC_20220420184146: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.00037005282284920674, 'time_algorithm_update': 0.007008108479237696, 'critic_loss': 6.163674447097276, 'actor_loss': 2.636395589650026, 'time_step': 0.007459660022579438, 'td_error': 0.913518537837035, 'init_value': -7.98123025894165, 'ave_value': -4.93232860133656} step=342
2022-04-20 18:41.49 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184146/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:41.52 [info     ] TD3PlusBC_20220420184146: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.00036752990811888934, 'time_algorithm_update': 0.00686761440589414, 'critic_loss': 3.393432648035518, 'actor_loss': 2.536270282421893, 'time_step': 0.00731284018845586, 'td_error': 1.042809942647299, 'init_value': -11.41648006439209, 'ave_value': -7.0168464678090885} step=684
2022-04-20 18:41.52 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184146/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:41.55 [info     ] TD3PlusBC_20220420184146: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.00036868017319350216, 'time_algorithm_update': 0.006753598737437823, 'critic_loss': 5.152164054195783, 'actor_loss': 2.525247628228706, 'time_step': 0.0071998904323020176, 'td_error': 1.2620046006272738, 'init_value': -15.046152114868164, 'ave_value': -9.367737393075105} step=1026
2022-04-20 18:41.55 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184146/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:41.58 [info     ] TD3PlusBC_20220420184146: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.00037094375543427047, 'time_algorithm_update': 0.00708609784555714, 'critic_loss': 7.564942752408703, 'actor_loss': 2.521964662953427, 'time_step': 0.007536782855876008, 'td_error': 1.5183556554747293, 'init_value': -18.651355743408203, 'ave_value': -11.570220957499602} step=1368
2022-04-20 18:41.58 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184146/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:42.01 [info     ] TD3PlusBC_20220420184146: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.0003665811137149208, 'time_algorithm_update': 0.006976239165367439, 'critic_loss': 10.178716937003777, 'actor_loss': 2.521722552372001, 'time_step': 0.007422937984355012, 'td_error': 1.8174991301399432, 'init_value': -22.148387908935547, 'ave_value': -13.755652181681347} step=1710
2022-04-20 18:42.01 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184146/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:42.04 [info     ] TD3PlusBC_20220420184146: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.00037373994526110197, 'time_algorithm_update': 0.0070269191474245304, 'critic_loss': 13.031730519400703, 'actor_loss': 2.520251516710248, 'time_step': 0.007478792067856816, 'td_error': 2.2287285292909753, 'init_value': -25.962133407592773, 'ave_value': -16.23238362682287} step=2052
2022-04-20 18:42.04 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184146/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:42.07 [info     ] TD3PlusBC_20220420184146: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.0003751697596053631, 'time_algorithm_update': 0.006930910355863515, 'critic_loss': 16.1252173150492, 'actor_loss': 2.5207199673903617, 'time_step': 0.007381897920753524, 'td_error': 2.5085662605437467, 'init_value': -29.12312889099121, 'ave_value': -18.37469424821256} step=2394
2022-04-20 18:42.07 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184146/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:42.10 [info     ] TD3PlusBC_20220420184146: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.00037093329847904676, 'time_algorithm_update': 0.007044128507201435, 'critic_loss': 19.528428299385205, 'actor_loss': 2.52008995535778, 'time_step': 0.007488482179697494, 'td_error': 2.8625947182428506, 'init_value': -32.827980041503906, 'ave_value': -20.590577922015793} step=2736
2022-04-20 18:42.10 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184146/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:42.13 [info     ] TD3PlusBC_20220420184146: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.00036647375564129034, 'time_algorithm_update': 0.006942536398681283, 'critic_loss': 23.444819645574917, 'actor_loss': 2.5188262239534254, 'time_step': 0.007383863828335589, 'td_error': 3.2664299926673475, 'init_value': -35.79296112060547, 'ave_value': -22.566876235010675} step=3078
2022-04-20 18:42.13 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184146/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:42.16 [info     ] TD3PlusBC_20220420184146: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.000374771001046164, 'time_algorithm_update': 0.006821706978201169, 'critic_loss': 26.884429412975646, 'actor_loss': 2.519749980223806, 'time_step': 0.0072680167984544184, 'td_error': 3.6314685018107307, 'init_value': -39.107391357421875, 'ave_value': -24.606145591894418} step=3420
2022-04-20 18:42.16 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184146/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:42.18 [info     ] TD3PlusBC_20220420184146: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.00036835809897261054, 'time_algorithm_update': 0.006948694151047378, 'critic_loss': 31.137771860200758, 'actor_loss': 2.5191023991121884, 'time_step': 0.00739018610346387, 'td_error': 3.9341878567393214, 'init_value': -41.44036865234375, 'ave_value': -26.25297644165269} step=3762
2022-04-20 18:42.18 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184146/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:42.21 [info     ] TD3PlusBC_20220420184146: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.00037982937885306734, 'time_algorithm_update': 0.007096706078066463, 'critic_loss': 34.785173750760265, 'actor_loss': 2.520414040102596, 'time_step': 0.007555623500667817, 'td_error': 4.411160653573087, 'init_value': -45.59845733642578, 'ave_value': -28.501679650726352} step=4104
2022-04-20 18:42.22 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184146/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:42.24 [info     ] TD3PlusBC_20220420184146: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.0003668983080233747, 'time_algorithm_update': 0.006957853746693036, 'critic_loss': 39.553026888105606, 'actor_loss': 2.520047324442724, 'time_step': 0.007402730964080632, 'td_error': 4.699961436883269, 'init_value': -47.34110641479492, 'ave_value': -30.018888733943605} step=4446
2022-04-20 18:42.24 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184146/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:42.27 [info     ] TD3PlusBC_20220420184146: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.00036904965227807474, 'time_algorithm_update': 0.00691608030196519, 'critic_loss': 43.40439365900051, 'actor_loss': 2.519737108409056, 'time_step': 0.007361813595420436, 'td_error': 5.226156263781971, 'init_value': -50.69612121582031, 'ave_value': -31.799462923274486} step=4788
2022-04-20 18:42.27 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184146/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:42.30 [info     ] TD3PlusBC_20220420184146: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.00036733680301242403, 'time_algorithm_update': 0.006981416752463893, 'critic_loss': 48.121669501589054, 'actor_loss': 2.5195039810492976, 'time_step': 0.007423067650599786, 'td_error': 5.483999650048085, 'init_value': -52.38044357299805, 'ave_value': -33.24832346876278} step=5130
2022-04-20 18:42.30 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184146/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:42.33 [info     ] TD3PlusBC_20220420184146: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.0003775755564371745, 'time_algorithm_update': 0.007006878741303383, 'critic_loss': 52.62496948799892, 'actor_loss': 2.518723950748555, 'time_step': 0.007462163417660005, 'td_error': 5.881649848078504, 'init_value': -54.768829345703125, 'ave_value': -34.84388325838616} step=5472
2022-04-20 18:42.33 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184146/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:42.36 [info     ] TD3PlusBC_20220420184146: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.0003764462052730092, 'time_algorithm_update': 0.007073594115630925, 'critic_loss': 56.654705114531936, 'actor_loss': 2.5191497509939627, 'time_step': 0.0075295180605168925, 'td_error': 6.136468064174391, 'init_value': -55.28489303588867, 'ave_value': -36.11392604909913} step=5814
2022-04-20 18:42.36 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184146/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:42.39 [info     ] TD3PlusBC_20220420184146: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.0003703623487238298, 'time_algorithm_update': 0.007009773226509317, 'critic_loss': 60.831184649328044, 'actor_loss': 2.5182501572614524, 'time_step': 0.0074547090028461655, 'td_error': 6.636002365905682, 'init_value': -58.97502517700195, 'ave_value': -37.89629757685419} step=6156
2022-04-20 18:42.39 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184146/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:42.42 [info     ] TD3PlusBC_20220420184146: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.0003715321334481936, 'time_algorithm_update': 0.006955079167907001, 'critic_loss': 64.78042439131708, 'actor_loss': 2.519670869871887, 'time_step': 0.007405887570297509, 'td_error': 6.9283619396619445, 'init_value': -59.75090408325195, 'ave_value': -38.86818947254403} step=6498
2022-04-20 18:42.42 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184146/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:42.45 [info     ] TD3PlusBC_20220420184146: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.0003788317853247213, 'time_algorithm_update': 0.007030328811957822, 'critic_loss': 69.22070653815018, 'actor_loss': 2.5183253776260286, 'time_step': 0.00748645213612339, 'td_error': 7.621822755088821, 'init_value': -63.196807861328125, 'ave_value': -40.618086037962016} step=6840
2022-04-20 18:42.45 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184146/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:42.48 [info     ] TD3PlusBC_20220420184146: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.00036551589854279454, 'time_algorithm_update': 0.0068829108399954455, 'critic_loss': 73.55634094818294, 'actor_loss': 2.5198793160287956, 'time_step': 0.0073243546904179085, 'td_error': 7.552027539999462, 'init_value': -63.96147537231445, 'ave_value': -41.74188118889559} step=7182
2022-04-20 18:42.48 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184146/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:42.51 [info     ] TD3PlusBC_20220420184146: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.00036966800689697266, 'time_algorithm_update': 0.006909146643521493, 'critic_loss': 77.30695621869718, 'actor_loss': 2.5202852104142397, 'time_step': 0.007353813327543917, 'td_error': 8.263357922924477, 'init_value': -66.17642974853516, 'ave_value': -42.76806070152603} step=7524
2022-04-20 18:42.51 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184146/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:42.54 [info     ] TD3PlusBC_20220420184146: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.00036690737071790194, 'time_algorithm_update': 0.0068197424648798, 'critic_loss': 81.32558011729814, 'actor_loss': 2.5204854973575523, 'time_step': 0.007260549138163963, 'td_error': 8.420479201636075, 'init_value': -66.6999282836914, 'ave_value': -43.97250959252519} step=7866
2022-04-20 18:42.54 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184146/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:42.57 [info     ] TD3PlusBC_20220420184146: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.00037012671866612127, 'time_algorithm_update': 0.008464642435486554, 'critic_loss': 85.6107139029698, 'actor_loss': 2.5207821514174253, 'time_step': 0.008913231871978582, 'td_error': 8.998428251350479, 'init_value': -69.03470611572266, 'ave_value': -45.23473966001122} step=8208
2022-04-20 18:42.58 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184146/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:43.01 [info     ] TD3PlusBC_20220420184146: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.00038111488721524067, 'time_algorithm_update': 0.008958773306238722, 'critic_loss': 88.93978416710569, 'actor_loss': 2.521201274548358, 'time_step': 0.009418300717894793, 'td_error': 8.699429126821352, 'init_value': -68.65562438964844, 'ave_value': -45.93767891080152} step=8550
2022-04-20 18:43.01 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184146/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:43.05 [info     ] TD3PlusBC_20220420184146: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.0003811016417386239, 'time_algorithm_update': 0.008769765931960435, 'critic_loss': 92.89189801578634, 'actor_loss': 2.5218015902223643, 'time_step': 0.009233486582661233, 'td_error': 9.76270100078295, 'init_value': -72.43791961669922, 'ave_value': -47.26126142307168} step=8892
2022-04-20 18:43.05 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184146/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:43.09 [info     ] TD3PlusBC_20220420184146: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.0003842659163893315, 'time_algorithm_update': 0.009105790428250854, 'critic_loss': 96.33052432188514, 'actor_loss': 2.5208747372989766, 'time_step': 0.009568504422728778, 'td_error': 9.56229062841395, 'init_value': -72.33839416503906, 'ave_value': -48.064939589288905} step=9234
2022-04-20 18:43.09 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184146/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:43.12 [info     ] TD3PlusBC_20220420184146: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.00037393862741035325, 'time_algorithm_update': 0.008921975977936683, 'critic_loss': 99.88622916372199, 'actor_loss': 2.5218011580015482, 'time_step': 0.009375404196175916, 'td_error': 9.80299319243943, 'init_value': -72.83548736572266, 'ave_value': -48.670099711489975} step=9576
2022-04-20 18:43.12 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184146/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:43.16 [info     ] TD3PlusBC_20220420184146: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.00037362003884120295, 'time_algorithm_update': 0.008394681222257559, 'critic_loss': 102.89855716103001, 'actor_loss': 2.5228102179298624, 'time_step': 0.008829565773233336, 'td_error': 10.676822722800377, 'init_value': -75.43611145019531, 'ave_value': -49.97133735034732} step=9918
2022-04-20 18:43.16 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184146/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:43.20 [info     ] TD3PlusBC_20220420184146: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.00037659329977648997, 'time_algorithm_update': 0.008831908131203456, 'critic_loss': 106.44390409592299, 'actor_loss': 2.522770488471316, 'time_step': 0.00927422409169158, 'td_error': 10.47377988917519, 'init_value': -74.95024108886719, 'ave_value': -50.54694925344104} step=10260
2022-04-20 18:43.20 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184146/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:43.23 [info     ] TD3PlusBC_20220420184146: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.00037171547872978346, 'time_algorithm_update': 0.008517319695991381, 'critic_loss': 109.55537354318719, 'actor_loss': 2.5222742362329136, 'time_step': 0.008955311356929311, 'td_error': 10.88402465190602, 'init_value': -76.34249114990234, 'ave_value': -51.46605059393287} step=10602
2022-04-20 18:43.23 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184146/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:43.27 [info     ] TD3PlusBC_20220420184146: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.0003833714981525265, 'time_algorithm_update': 0.008937545687134504, 'critic_loss': 112.21795294019911, 'actor_loss': 2.5229312090845832, 'time_step': 0.009388383368999637, 'td_error': 11.415011200979187, 'init_value': -79.09381103515625, 'ave_value': -52.52799159977216} step=10944
2022-04-20 18:43.27 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184146/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:43.31 [info     ] TD3PlusBC_20220420184146: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.00037650685561330696, 'time_algorithm_update': 0.008976218993203682, 'critic_loss': 115.6057666979338, 'actor_loss': 2.523138804742467, 'time_step': 0.00941589213254159, 'td_error': 11.602777831184513, 'init_value': -78.723876953125, 'ave_value': -53.17330616485311} step=11286
2022-04-20 18:43.31 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184146/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:43.34 [info     ] TD3PlusBC_20220420184146: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.00036838528705619234, 'time_algorithm_update': 0.008466129414519371, 'critic_loss': 118.29733131364075, 'actor_loss': 2.5237561108773217, 'time_step': 0.00890480217180754, 'td_error': 10.961209106768056, 'init_value': -75.16696166992188, 'ave_value': -53.08780651079583} step=11628
2022-04-20 18:43.34 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184146/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:43.38 [info     ] TD3PlusBC_20220420184146: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.0003763018992909214, 'time_algorithm_update': 0.008984089594835426, 'critic_loss': 121.01990214565345, 'actor_loss': 2.523423576912685, 'time_step': 0.009426526856004145, 'td_error': 10.853366031766667, 'init_value': -75.02262878417969, 'ave_value': -53.61949369313976} step=11970
2022-04-20 18:43.38 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184146/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:43.42 [info     ] TD3PlusBC_20220420184146: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.00037752745444314523, 'time_algorithm_update': 0.008731044523897226, 'critic_loss': 123.56035165619431, 'actor_loss': 2.525022837153652, 'time_step': 0.009176853804560433, 'td_error': 11.955425207448815, 'init_value': -79.80054473876953, 'ave_value': -55.12026920854592} step=12312
2022-04-20 18:43.42 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184146/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:43.45 [info     ] TD3PlusBC_20220420184146: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.000379521944369489, 'time_algorithm_update': 0.008865246298717476, 'critic_loss': 126.18294400220726, 'actor_loss': 2.525000144166556, 'time_step': 0.009313961915802537, 'td_error': 12.53586564539293, 'init_value': -79.34627532958984, 'ave_value': -55.59325620707997} step=12654
2022-04-20 18:43.45 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184146/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:43.49 [info     ] TD3PlusBC_20220420184146: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.0003821654626500537, 'time_algorithm_update': 0.00907664828830295, 'critic_loss': 128.5317135414882, 'actor_loss': 2.5262055062411126, 'time_step': 0.009523713100723356, 'td_error': 12.107002945220925, 'init_value': -78.90577697753906, 'ave_value': -56.145538433028534} step=12996
2022-04-20 18:43.49 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184146/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:43.53 [info     ] TD3PlusBC_20220420184146: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.00037499338562725583, 'time_algorithm_update': 0.00862962669796414, 'critic_loss': 131.08763517413223, 'actor_loss': 2.5270989694093404, 'time_step': 0.00906660473137571, 'td_error': 12.694369672049216, 'init_value': -80.29388427734375, 'ave_value': -56.346615037964405} step=13338
2022-04-20 18:43.53 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184146/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:43.57 [info     ] TD3PlusBC_20220420184146: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.00037956934923317, 'time_algorithm_update': 0.008954584249976085, 'critic_loss': 132.8915935984829, 'actor_loss': 2.526304783179746, 'time_step': 0.009403380734181543, 'td_error': 12.62656700272604, 'init_value': -80.41357421875, 'ave_value': -57.23605949575921} step=13680
2022-04-20 18:43.57 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184146/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:44.00 [info     ] TD3PlusBC_20220420184146: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.00037958259470978673, 'time_algorithm_update': 0.008926729709781402, 'critic_loss': 135.06999425163045, 'actor_loss': 2.5272058980506764, 'time_step': 0.009371700342635663, 'td_error': 13.39699851200777, 'init_value': -82.08427429199219, 'ave_value': -58.11440511503752} step=14022
2022-04-20 18:44.00 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184146/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:44.04 [info     ] TD3PlusBC_20220420184146: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.00037595124272575157, 'time_algorithm_update': 0.008960057420340197, 'critic_loss': 136.73503277873436, 'actor_loss': 2.526946148677179, 'time_step': 0.00940168113039251, 'td_error': 14.029889119802393, 'init_value': -81.94697570800781, 'ave_value': -58.54234229111004} step=14364
2022-04-20 18:44.04 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184146/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:44.08 [info     ] TD3PlusBC_20220420184146: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.00037444822969492415, 'time_algorithm_update': 0.008963299076459562, 'critic_loss': 138.37407737865783, 'actor_loss': 2.5276021580947075, 'time_step': 0.00940291993102135, 'td_error': 13.483178449713765, 'init_value': -80.44395446777344, 'ave_value': -58.43034758635595} step=14706
2022-04-20 18:44.08 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184146/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:44.11 [info     ] TD3PlusBC_20220420184146: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.0003778216434501068, 'time_algorithm_update': 0.008645577040332102, 'critic_loss': 140.04028886940048, 'actor_loss': 2.527782490378932, 'time_step': 0.009091690269827146, 'td_error': 14.221596018144956, 'init_value': -79.99993896484375, 'ave_value': -58.94976148218302} step=15048
2022-04-20 18:44.11 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184146/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:44.15 [info     ] TD3PlusBC_20220420184146: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.00038476018180624085, 'time_algorithm_update': 0.0089406939277872, 'critic_loss': 141.7105004160028, 'actor_loss': 2.527845559761538, 'time_step': 0.009394259480705039, 'td_error': 13.504452378888798, 'init_value': -80.73353576660156, 'ave_value': -59.26227283419622} step=15390
2022-04-20 18:44.15 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184146/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:44.19 [info     ] TD3PlusBC_20220420184146: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.00037566890493471024, 'time_algorithm_update': 0.009168196839895861, 'critic_loss': 143.12305033276652, 'actor_loss': 2.527342266506619, 'time_step': 0.009607142872280545, 'td_error': 14.25410341850474, 'init_value': -80.06974792480469, 'ave_value': -59.544157702621945} step=15732
2022-04-20 18:44.19 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184146/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:44.23 [info     ] TD3PlusBC_20220420184146: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.00038688991502014517, 'time_algorithm_update': 0.009143091084664328, 'critic_loss': 144.6427943581029, 'actor_loss': 2.5293869065959553, 'time_step': 0.009600166688885605, 'td_error': 14.24181293350215, 'init_value': -81.30999755859375, 'ave_value': -60.1258746939092} step=16074
2022-04-20 18:44.23 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184146/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:44.26 [info     ] TD3PlusBC_20220420184146: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.0003754946223476477, 'time_algorithm_update': 0.009228796289678206, 'critic_loss': 145.6299639250103, 'actor_loss': 2.529408563647354, 'time_step': 0.009669649670695701, 'td_error': 14.776659222551556, 'init_value': -82.4774398803711, 'ave_value': -61.01298461542532} step=16416
2022-04-20 18:44.26 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184146/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:44.30 [info     ] TD3PlusBC_20220420184146: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.00038214873152169566, 'time_algorithm_update': 0.008969629717152022, 'critic_loss': 147.29330801266676, 'actor_loss': 2.5299800306732894, 'time_step': 0.009416037832784373, 'td_error': 13.876125911234901, 'init_value': -79.57330322265625, 'ave_value': -60.812373485353916} step=16758
2022-04-20 18:44.30 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184146/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:44.34 [info     ] TD3PlusBC_20220420184146: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.0003779080876132898, 'time_algorithm_update': 0.00917773079453853, 'critic_loss': 148.54178481074104, 'actor_loss': 2.5296146409553395, 'time_step': 0.009619153033920198, 'td_error': 15.072138077368349, 'init_value': -80.4686508178711, 'ave_value': -61.02370143646257} step=17100
2022-04-20 18:44.34 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184146/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 102 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.28189108e-01  9

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 18:44.35 [info     ] FQE_20220420184434: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00016911776669054147, 'time_algorithm_update': 0.005005012075585055, 'loss': 0.007188007082644266, 'time_step': 0.005252272249704384, 'init_value': -0.2374362200498581, 'ave_value': -0.17768940129044658, 'soft_opc': nan} step=166




2022-04-20 18:44.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184434/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:44.36 [info     ] FQE_20220420184434: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00017182223768119352, 'time_algorithm_update': 0.005001553569931582, 'loss': 0.005547453559576029, 'time_step': 0.0052472884396472606, 'init_value': -0.3381837010383606, 'ave_value': -0.2259887007616779, 'soft_opc': nan} step=332




2022-04-20 18:44.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184434/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:44.37 [info     ] FQE_20220420184434: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00017051237175263553, 'time_algorithm_update': 0.004992446267461202, 'loss': 0.004915089586312063, 'time_step': 0.005239817033331078, 'init_value': -0.4218824803829193, 'ave_value': -0.2862230955809774, 'soft_opc': nan} step=498




2022-04-20 18:44.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184434/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:44.38 [info     ] FQE_20220420184434: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00017320822520428393, 'time_algorithm_update': 0.005086249615772661, 'loss': 0.004902062951643245, 'time_step': 0.005334421812769878, 'init_value': -0.4940638542175293, 'ave_value': -0.3334952467666553, 'soft_opc': nan} step=664




2022-04-20 18:44.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184434/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:44.39 [info     ] FQE_20220420184434: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.000174236584858722, 'time_algorithm_update': 0.004548411771475551, 'loss': 0.004384770903859781, 'time_step': 0.004792635699352586, 'init_value': -0.581516683101654, 'ave_value': -0.39540067162629017, 'soft_opc': nan} step=830




2022-04-20 18:44.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184434/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:44.40 [info     ] FQE_20220420184434: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00016742872904582196, 'time_algorithm_update': 0.0050872808479401005, 'loss': 0.004083896111921373, 'time_step': 0.005328041961394161, 'init_value': -0.572941780090332, 'ave_value': -0.38011316422096175, 'soft_opc': nan} step=996




2022-04-20 18:44.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184434/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:44.41 [info     ] FQE_20220420184434: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00017477948981595327, 'time_algorithm_update': 0.0050900240978562685, 'loss': 0.003934287408602436, 'time_step': 0.005337051598422499, 'init_value': -0.6394329071044922, 'ave_value': -0.4383271297523836, 'soft_opc': nan} step=1162




2022-04-20 18:44.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184434/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:44.42 [info     ] FQE_20220420184434: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.0001757805605968797, 'time_algorithm_update': 0.005036249218216862, 'loss': 0.0036507508414516964, 'time_step': 0.005290893187005836, 'init_value': -0.6906998157501221, 'ave_value': -0.47714998170099987, 'soft_opc': nan} step=1328




2022-04-20 18:44.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184434/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:44.43 [info     ] FQE_20220420184434: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.0001722444970923734, 'time_algorithm_update': 0.005113495401589267, 'loss': 0.003466411077849432, 'time_step': 0.005364620541951743, 'init_value': -0.7377333641052246, 'ave_value': -0.5139057136115592, 'soft_opc': nan} step=1494




2022-04-20 18:44.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184434/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:44.44 [info     ] FQE_20220420184434: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00016690449542309865, 'time_algorithm_update': 0.0051010459302419636, 'loss': 0.003680924124768879, 'time_step': 0.0053427506642169265, 'init_value': -0.7989842891693115, 'ave_value': -0.57643324093524, 'soft_opc': nan} step=1660




2022-04-20 18:44.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184434/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:44.45 [info     ] FQE_20220420184434: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00017137843442250448, 'time_algorithm_update': 0.005096546138625547, 'loss': 0.003680045853918755, 'time_step': 0.005341531282447907, 'init_value': -0.868943452835083, 'ave_value': -0.6415959208054249, 'soft_opc': nan} step=1826




2022-04-20 18:44.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184434/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:44.46 [info     ] FQE_20220420184434: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00016880035400390625, 'time_algorithm_update': 0.0050718324730195195, 'loss': 0.0037482074021462754, 'time_step': 0.0053153784878282664, 'init_value': -0.9224658608436584, 'ave_value': -0.6795581295507381, 'soft_opc': nan} step=1992




2022-04-20 18:44.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184434/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:44.47 [info     ] FQE_20220420184434: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00017397805868861187, 'time_algorithm_update': 0.005025106740285115, 'loss': 0.004278002366078569, 'time_step': 0.005275075694164598, 'init_value': -0.9855806827545166, 'ave_value': -0.7411543575778335, 'soft_opc': nan} step=2158




2022-04-20 18:44.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184434/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:44.48 [info     ] FQE_20220420184434: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00017015187137098197, 'time_algorithm_update': 0.005147350839821689, 'loss': 0.0043296116030444, 'time_step': 0.005393467753766531, 'init_value': -1.0301826000213623, 'ave_value': -0.7657866659364453, 'soft_opc': nan} step=2324




2022-04-20 18:44.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184434/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:44.49 [info     ] FQE_20220420184434: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00017036874610257437, 'time_algorithm_update': 0.004602320222969514, 'loss': 0.004603550905790965, 'time_step': 0.004849906427314483, 'init_value': -1.1382721662521362, 'ave_value': -0.8525067528609086, 'soft_opc': nan} step=2490




2022-04-20 18:44.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184434/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:44.49 [info     ] FQE_20220420184434: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00016917378069406533, 'time_algorithm_update': 0.005056925566799669, 'loss': 0.005137511510093394, 'time_step': 0.005303425961230175, 'init_value': -1.2613558769226074, 'ave_value': -0.9625683584711976, 'soft_opc': nan} step=2656




2022-04-20 18:44.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184434/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:44.50 [info     ] FQE_20220420184434: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.0001705540231911533, 'time_algorithm_update': 0.005122631429189659, 'loss': 0.0055272390197048035, 'time_step': 0.005370898419115917, 'init_value': -1.3743432760238647, 'ave_value': -1.068093488915815, 'soft_opc': nan} step=2822




2022-04-20 18:44.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184434/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:44.51 [info     ] FQE_20220420184434: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00016781651830098717, 'time_algorithm_update': 0.005098183471036245, 'loss': 0.0059281851207775745, 'time_step': 0.0053382465638310075, 'init_value': -1.4044054746627808, 'ave_value': -1.069837730904823, 'soft_opc': nan} step=2988




2022-04-20 18:44.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184434/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:44.52 [info     ] FQE_20220420184434: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00016960753015725008, 'time_algorithm_update': 0.005139731499085943, 'loss': 0.006608106196978612, 'time_step': 0.005383454173444265, 'init_value': -1.5067775249481201, 'ave_value': -1.1612037736754697, 'soft_opc': nan} step=3154




2022-04-20 18:44.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184434/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:44.53 [info     ] FQE_20220420184434: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00016689156911459314, 'time_algorithm_update': 0.0050396861800228255, 'loss': 0.007213340400359088, 'time_step': 0.005283374384225133, 'init_value': -1.6341640949249268, 'ave_value': -1.2557426625225054, 'soft_opc': nan} step=3320




2022-04-20 18:44.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184434/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:44.54 [info     ] FQE_20220420184434: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00016777917563197124, 'time_algorithm_update': 0.00504176300692271, 'loss': 0.0077264745897159025, 'time_step': 0.005283818187483822, 'init_value': -1.6752676963806152, 'ave_value': -1.277107505025426, 'soft_opc': nan} step=3486




2022-04-20 18:44.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184434/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:44.55 [info     ] FQE_20220420184434: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00016911633043404086, 'time_algorithm_update': 0.004988536777266537, 'loss': 0.008550245506290615, 'time_step': 0.005232845444277108, 'init_value': -1.7381446361541748, 'ave_value': -1.3010643235466501, 'soft_opc': nan} step=3652




2022-04-20 18:44.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184434/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:44.56 [info     ] FQE_20220420184434: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00017637804330113423, 'time_algorithm_update': 0.0052158703286963775, 'loss': 0.009205384235352233, 'time_step': 0.005464229239038674, 'init_value': -1.90132737159729, 'ave_value': -1.4677227649636366, 'soft_opc': nan} step=3818




2022-04-20 18:44.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184434/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:44.57 [info     ] FQE_20220420184434: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00016917952572006778, 'time_algorithm_update': 0.004390445100255759, 'loss': 0.009732674431414848, 'time_step': 0.004642714937049222, 'init_value': -1.9364378452301025, 'ave_value': -1.4806252897792571, 'soft_opc': nan} step=3984




2022-04-20 18:44.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184434/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:44.58 [info     ] FQE_20220420184434: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00016781077327498472, 'time_algorithm_update': 0.005021231720246464, 'loss': 0.010130381605767432, 'time_step': 0.005260546523404409, 'init_value': -1.9721500873565674, 'ave_value': -1.4918770164947721, 'soft_opc': nan} step=4150




2022-04-20 18:44.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184434/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:44.59 [info     ] FQE_20220420184434: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00017040178000208843, 'time_algorithm_update': 0.0049092180757637485, 'loss': 0.010929827784196508, 'time_step': 0.0051562527576124815, 'init_value': -2.0122790336608887, 'ave_value': -1.5165393409889762, 'soft_opc': nan} step=4316




2022-04-20 18:44.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184434/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:45.00 [info     ] FQE_20220420184434: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00017278740204960467, 'time_algorithm_update': 0.005116222852683929, 'loss': 0.011127893973551467, 'time_step': 0.005362039589020143, 'init_value': -2.1002936363220215, 'ave_value': -1.5924602665938437, 'soft_opc': nan} step=4482




2022-04-20 18:45.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184434/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:45.01 [info     ] FQE_20220420184434: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00017308901591473315, 'time_algorithm_update': 0.005098352949303317, 'loss': 0.012081460929784295, 'time_step': 0.0053470838500792725, 'init_value': -2.1850976943969727, 'ave_value': -1.6537246616448167, 'soft_opc': nan} step=4648




2022-04-20 18:45.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184434/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:45.02 [info     ] FQE_20220420184434: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00017056264073015694, 'time_algorithm_update': 0.00494701603809035, 'loss': 0.013006290507850697, 'time_step': 0.005193820918898985, 'init_value': -2.300529718399048, 'ave_value': -1.7514965306933936, 'soft_opc': nan} step=4814




2022-04-20 18:45.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184434/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:45.03 [info     ] FQE_20220420184434: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00017252025834049088, 'time_algorithm_update': 0.0049809892493558215, 'loss': 0.013395267836777606, 'time_step': 0.005230423915817077, 'init_value': -2.3429455757141113, 'ave_value': -1.7817062626315936, 'soft_opc': nan} step=4980




2022-04-20 18:45.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184434/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:45.04 [info     ] FQE_20220420184434: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.0001735931419464479, 'time_algorithm_update': 0.005108929542173822, 'loss': 0.01431825192272102, 'time_step': 0.005355839269707002, 'init_value': -2.4654123783111572, 'ave_value': -1.8853628704408267, 'soft_opc': nan} step=5146




2022-04-20 18:45.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184434/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:45.05 [info     ] FQE_20220420184434: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00016735691622079136, 'time_algorithm_update': 0.005058001322918628, 'loss': 0.015044001151291451, 'time_step': 0.005298108939664909, 'init_value': -2.602606773376465, 'ave_value': -1.9943303463426796, 'soft_opc': nan} step=5312




2022-04-20 18:45.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184434/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:45.06 [info     ] FQE_20220420184434: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.0001687730651303946, 'time_algorithm_update': 0.005089873290923704, 'loss': 0.016202405849696105, 'time_step': 0.005334263824554811, 'init_value': -2.618509292602539, 'ave_value': -2.0004644636836675, 'soft_opc': nan} step=5478




2022-04-20 18:45.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184434/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:45.07 [info     ] FQE_20220420184434: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.000161456774516278, 'time_algorithm_update': 0.004120792251035392, 'loss': 0.016558294606686806, 'time_step': 0.00435316706278238, 'init_value': -2.6451773643493652, 'ave_value': -2.0389415152068877, 'soft_opc': nan} step=5644




2022-04-20 18:45.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184434/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:45.08 [info     ] FQE_20220420184434: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00016790269369102386, 'time_algorithm_update': 0.005151090851749282, 'loss': 0.017463322949780893, 'time_step': 0.005391261663781591, 'init_value': -2.9198098182678223, 'ave_value': -2.297385999298579, 'soft_opc': nan} step=5810




2022-04-20 18:45.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184434/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:45.09 [info     ] FQE_20220420184434: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00016813249473112175, 'time_algorithm_update': 0.005102283983345491, 'loss': 0.01789554148065263, 'time_step': 0.005344268787338073, 'init_value': -2.9396073818206787, 'ave_value': -2.278829251367364, 'soft_opc': nan} step=5976




2022-04-20 18:45.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184434/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:45.09 [info     ] FQE_20220420184434: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.0001706516886331949, 'time_algorithm_update': 0.00502469597092594, 'loss': 0.019062576027227992, 'time_step': 0.005266893340880613, 'init_value': -3.0201029777526855, 'ave_value': -2.3211602356301637, 'soft_opc': nan} step=6142




2022-04-20 18:45.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184434/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:45.10 [info     ] FQE_20220420184434: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00016600252634071442, 'time_algorithm_update': 0.005044269274516278, 'loss': 0.01959048767422651, 'time_step': 0.0052879862038485975, 'init_value': -3.1045644283294678, 'ave_value': -2.395823950656094, 'soft_opc': nan} step=6308




2022-04-20 18:45.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184434/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:45.11 [info     ] FQE_20220420184434: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00016823016017316337, 'time_algorithm_update': 0.005026599010789251, 'loss': 0.020367643806426954, 'time_step': 0.00526677125907806, 'init_value': -3.1432504653930664, 'ave_value': -2.3958336200095243, 'soft_opc': nan} step=6474




2022-04-20 18:45.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184434/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:45.12 [info     ] FQE_20220420184434: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.0001691220754600433, 'time_algorithm_update': 0.005090990698481181, 'loss': 0.020643393881478716, 'time_step': 0.0053353510707257745, 'init_value': -3.1813883781433105, 'ave_value': -2.4416757990446714, 'soft_opc': nan} step=6640




2022-04-20 18:45.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184434/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:45.13 [info     ] FQE_20220420184434: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00016749192433184888, 'time_algorithm_update': 0.004991653453872864, 'loss': 0.02079575638502769, 'time_step': 0.005230050489126918, 'init_value': -3.206875801086426, 'ave_value': -2.4214939878934674, 'soft_opc': nan} step=6806




2022-04-20 18:45.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184434/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:45.14 [info     ] FQE_20220420184434: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00017129369528896837, 'time_algorithm_update': 0.0049418842936136635, 'loss': 0.021444173371952296, 'time_step': 0.00518488883972168, 'init_value': -3.276148796081543, 'ave_value': -2.4528094522285837, 'soft_opc': nan} step=6972




2022-04-20 18:45.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184434/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:45.15 [info     ] FQE_20220420184434: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.0001693288963961314, 'time_algorithm_update': 0.0044802154403135, 'loss': 0.021532279861451363, 'time_step': 0.004720771169087973, 'init_value': -3.3982481956481934, 'ave_value': -2.5385138735544306, 'soft_opc': nan} step=7138




2022-04-20 18:45.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184434/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:45.16 [info     ] FQE_20220420184434: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00016775619552796147, 'time_algorithm_update': 0.005007379026298064, 'loss': 0.02218678355755576, 'time_step': 0.005246820220028062, 'init_value': -3.5109524726867676, 'ave_value': -2.6691281762913808, 'soft_opc': nan} step=7304




2022-04-20 18:45.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184434/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:45.17 [info     ] FQE_20220420184434: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00017309906971023744, 'time_algorithm_update': 0.005157697631652097, 'loss': 0.023412384755533547, 'time_step': 0.005407666585531579, 'init_value': -3.537318229675293, 'ave_value': -2.6732268097969865, 'soft_opc': nan} step=7470




2022-04-20 18:45.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184434/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:45.18 [info     ] FQE_20220420184434: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00016834649694971292, 'time_algorithm_update': 0.004988147551754871, 'loss': 0.024841731115446304, 'time_step': 0.0052312454545354265, 'init_value': -3.630552053451538, 'ave_value': -2.72025701280784, 'soft_opc': nan} step=7636




2022-04-20 18:45.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184434/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:45.19 [info     ] FQE_20220420184434: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00016660431781447078, 'time_algorithm_update': 0.005109853055103716, 'loss': 0.025150300316366715, 'time_step': 0.005346783672470644, 'init_value': -3.6989755630493164, 'ave_value': -2.8096394890936107, 'soft_opc': nan} step=7802




2022-04-20 18:45.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184434/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:45.20 [info     ] FQE_20220420184434: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00016823446894266518, 'time_algorithm_update': 0.005041912377598774, 'loss': 0.025513992393080772, 'time_step': 0.005284677068871188, 'init_value': -3.775289297103882, 'ave_value': -2.8482125526176647, 'soft_opc': nan} step=7968




2022-04-20 18:45.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184434/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:45.21 [info     ] FQE_20220420184434: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00016695620065712067, 'time_algorithm_update': 0.005106055592916098, 'loss': 0.027007851238412983, 'time_step': 0.0053439384483429326, 'init_value': -3.855255126953125, 'ave_value': -2.9215432012201967, 'soft_opc': nan} step=8134




2022-04-20 18:45.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184434/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:45.22 [info     ] FQE_20220420184434: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.000169060316430517, 'time_algorithm_update': 0.005126756357859416, 'loss': 0.02744849258402632, 'time_step': 0.005372344729412033, 'init_value': -3.9011802673339844, 'ave_value': -2.9503881720708565, 'soft_opc': nan} step=8300




2022-04-20 18:45.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184434/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.43210892e-01 -3.25999953e-02
 -1.38000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.28049971e-01  6.00000000e-01  4.67532035e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.87189108e-01  2.46000047e-02
 -8.00013420e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.61927906e-02  2.08952959e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.53789108e-01  1.32000047e-02
  8.79998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -9.71586383e-02 -6.00000000e-01 -5.33093061e-02]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.39489108e-01 -4.75999953e-02
 -9.30001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  1.41298638e-01  5.43892365e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.9573146

2022-04-20 18:45.22 [info     ] Directory is created at d3rlpy_logs/FQE_20220420184522
2022-04-20 18:45.22 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 18:45.22 [debug    ] Building models...
2022-04-20 18:45.22 [debug    ] Models have been built.
2022-04-20 18:45.22 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420184522/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 18:45.24 [info     ] FQE_20220420184522: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00016542922618777254, 'time_algorithm_update': 0.004692709723184275, 'loss': 0.029706516227316716, 'time_step': 0.004934378141580626, 'init_value': -0.9783750772476196, 'ave_value': -1.0078053983213666, 'soft_opc': nan} step=344




2022-04-20 18:45.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184522/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:45.26 [info     ] FQE_20220420184522: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00016796034435893214, 'time_algorithm_update': 0.005080760911453602, 'loss': 0.02627862932236302, 'time_step': 0.0053253624328347135, 'init_value': -1.568267583847046, 'ave_value': -1.6026724478615835, 'soft_opc': nan} step=688




2022-04-20 18:45.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184522/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:45.28 [info     ] FQE_20220420184522: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.0001699383868727573, 'time_algorithm_update': 0.0050191324810649075, 'loss': 0.03078600236886116, 'time_step': 0.005266931167868681, 'init_value': -2.382495403289795, 'ave_value': -2.445685887957613, 'soft_opc': nan} step=1032




2022-04-20 18:45.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184522/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:45.30 [info     ] FQE_20220420184522: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00017659054246059683, 'time_algorithm_update': 0.0051141221855962, 'loss': 0.035725114628845867, 'time_step': 0.005368339460949565, 'init_value': -2.717045307159424, 'ave_value': -2.8166559895315952, 'soft_opc': nan} step=1376




2022-04-20 18:45.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184522/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:45.32 [info     ] FQE_20220420184522: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.0001691101595412853, 'time_algorithm_update': 0.0050592263077580656, 'loss': 0.04523226198047226, 'time_step': 0.005301250967868539, 'init_value': -3.3743481636047363, 'ave_value': -3.4856237679132605, 'soft_opc': nan} step=1720




2022-04-20 18:45.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184522/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:45.34 [info     ] FQE_20220420184522: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00017563825429872025, 'time_algorithm_update': 0.004692149716754293, 'loss': 0.05569035881794556, 'time_step': 0.004947108584781026, 'init_value': -3.757711887359619, 'ave_value': -3.860466304176428, 'soft_opc': nan} step=2064




2022-04-20 18:45.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184522/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:45.36 [info     ] FQE_20220420184522: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00017161424769911656, 'time_algorithm_update': 0.005112652168717495, 'loss': 0.07073895548751881, 'time_step': 0.005362022061680638, 'init_value': -4.493102073669434, 'ave_value': -4.616511290538259, 'soft_opc': nan} step=2408




2022-04-20 18:45.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184522/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:45.38 [info     ] FQE_20220420184522: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00017397556194039278, 'time_algorithm_update': 0.005125643208969471, 'loss': 0.08858127075756445, 'time_step': 0.005377790955610053, 'init_value': -4.596977233886719, 'ave_value': -4.739581853214039, 'soft_opc': nan} step=2752




2022-04-20 18:45.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184522/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:45.40 [info     ] FQE_20220420184522: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.0001700395761534225, 'time_algorithm_update': 0.0050635185352591585, 'loss': 0.10675460531181374, 'time_step': 0.005307654308718305, 'init_value': -4.909976959228516, 'ave_value': -5.08860472859563, 'soft_opc': nan} step=3096




2022-04-20 18:45.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184522/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:45.42 [info     ] FQE_20220420184522: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.0001755356788635254, 'time_algorithm_update': 0.005024772743846095, 'loss': 0.1331446171564938, 'time_step': 0.005280936873236368, 'init_value': -5.353511810302734, 'ave_value': -5.529629515676323, 'soft_opc': nan} step=3440




2022-04-20 18:45.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184522/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:45.44 [info     ] FQE_20220420184522: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00016699211541996447, 'time_algorithm_update': 0.004656435445297596, 'loss': 0.15393156570187488, 'time_step': 0.004897859900496727, 'init_value': -5.501568794250488, 'ave_value': -5.6731728748508585, 'soft_opc': nan} step=3784




2022-04-20 18:45.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184522/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:45.46 [info     ] FQE_20220420184522: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.0001775054044501726, 'time_algorithm_update': 0.00510485851487448, 'loss': 0.18057611694039646, 'time_step': 0.005360922841138618, 'init_value': -5.833868026733398, 'ave_value': -6.067976537713496, 'soft_opc': nan} step=4128




2022-04-20 18:45.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184522/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:45.48 [info     ] FQE_20220420184522: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.0001751704271449599, 'time_algorithm_update': 0.005048466283221578, 'loss': 0.202453528996557, 'time_step': 0.005301096411638482, 'init_value': -5.961180686950684, 'ave_value': -6.271716483159889, 'soft_opc': nan} step=4472




2022-04-20 18:45.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184522/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:45.50 [info     ] FQE_20220420184522: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00017500131629234137, 'time_algorithm_update': 0.005064675974291425, 'loss': 0.2297462641131566, 'time_step': 0.005316595005434613, 'init_value': -6.357089519500732, 'ave_value': -6.564870435598743, 'soft_opc': nan} step=4816




2022-04-20 18:45.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184522/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:45.51 [info     ] FQE_20220420184522: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.0001702759155007296, 'time_algorithm_update': 0.004620820976967035, 'loss': 0.25568938828181736, 'time_step': 0.004865891018579173, 'init_value': -6.408059597015381, 'ave_value': -6.74047287973483, 'soft_opc': nan} step=5160




2022-04-20 18:45.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184522/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:45.53 [info     ] FQE_20220420184522: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.0001618390859559525, 'time_algorithm_update': 0.00497797686000203, 'loss': 0.27931309790826986, 'time_step': 0.005209558924963308, 'init_value': -6.64677619934082, 'ave_value': -7.020957869886177, 'soft_opc': nan} step=5504




2022-04-20 18:45.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184522/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:45.55 [info     ] FQE_20220420184522: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.0001587632090546364, 'time_algorithm_update': 0.0049414960450904315, 'loss': 0.31395711088661366, 'time_step': 0.005170266988665559, 'init_value': -6.758044242858887, 'ave_value': -7.280293688374395, 'soft_opc': nan} step=5848




2022-04-20 18:45.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184522/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:45.57 [info     ] FQE_20220420184522: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00014460987822954044, 'time_algorithm_update': 0.00474701301996098, 'loss': 0.33936927696624986, 'time_step': 0.004956341760103093, 'init_value': -6.788761138916016, 'ave_value': -7.330969287876484, 'soft_opc': nan} step=6192




2022-04-20 18:45.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184522/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:45.59 [info     ] FQE_20220420184522: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00014934983364371367, 'time_algorithm_update': 0.004884462023890296, 'loss': 0.37227953595737384, 'time_step': 0.005099773406982422, 'init_value': -6.911093235015869, 'ave_value': -7.606771377959029, 'soft_opc': nan} step=6536




2022-04-20 18:45.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184522/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:46.01 [info     ] FQE_20220420184522: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.0001521394696346549, 'time_algorithm_update': 0.004567521256069804, 'loss': 0.40367173050465277, 'time_step': 0.004787528930708419, 'init_value': -7.241351127624512, 'ave_value': -8.037961872479132, 'soft_opc': nan} step=6880




2022-04-20 18:46.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184522/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:46.03 [info     ] FQE_20220420184522: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00015653565872547238, 'time_algorithm_update': 0.0049272678619207335, 'loss': 0.4443246407520979, 'time_step': 0.005152073710463768, 'init_value': -7.145563125610352, 'ave_value': -8.06771818575227, 'soft_opc': nan} step=7224




2022-04-20 18:46.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184522/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:46.05 [info     ] FQE_20220420184522: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.0001690983772277832, 'time_algorithm_update': 0.005033371753470842, 'loss': 0.479982428122745, 'time_step': 0.005280586176140364, 'init_value': -7.1922287940979, 'ave_value': -8.320243996905313, 'soft_opc': nan} step=7568




2022-04-20 18:46.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184522/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:46.07 [info     ] FQE_20220420184522: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00017273703286814135, 'time_algorithm_update': 0.005034019087636193, 'loss': 0.4906967672530215, 'time_step': 0.005282365998556447, 'init_value': -6.864431381225586, 'ave_value': -8.141821124624494, 'soft_opc': nan} step=7912




2022-04-20 18:46.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184522/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:46.08 [info     ] FQE_20220420184522: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00017208207485287688, 'time_algorithm_update': 0.004641939734303674, 'loss': 0.5082643993068919, 'time_step': 0.004890215951342916, 'init_value': -6.728619575500488, 'ave_value': -8.200812555860459, 'soft_opc': nan} step=8256




2022-04-20 18:46.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184522/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:46.10 [info     ] FQE_20220420184522: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.0001727287159409634, 'time_algorithm_update': 0.005057020242824111, 'loss': 0.5339892654744691, 'time_step': 0.005307194105414457, 'init_value': -7.083972930908203, 'ave_value': -8.639716297551688, 'soft_opc': nan} step=8600




2022-04-20 18:46.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184522/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:46.12 [info     ] FQE_20220420184522: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00017439556676287983, 'time_algorithm_update': 0.0050868897937064944, 'loss': 0.5541284533737357, 'time_step': 0.005338650110156037, 'init_value': -7.200300216674805, 'ave_value': -8.888023961069448, 'soft_opc': nan} step=8944




2022-04-20 18:46.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184522/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:46.14 [info     ] FQE_20220420184522: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00017217633336089378, 'time_algorithm_update': 0.00499446100966875, 'loss': 0.575565689037619, 'time_step': 0.005239163027253262, 'init_value': -7.461860179901123, 'ave_value': -9.298409352486802, 'soft_opc': nan} step=9288




2022-04-20 18:46.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184522/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:46.16 [info     ] FQE_20220420184522: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.0001769557941791623, 'time_algorithm_update': 0.005002868729968404, 'loss': 0.5881238885137231, 'time_step': 0.005256591841231945, 'init_value': -7.431018352508545, 'ave_value': -9.234560668731511, 'soft_opc': nan} step=9632




2022-04-20 18:46.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184522/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:46.18 [info     ] FQE_20220420184522: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00017340169396511343, 'time_algorithm_update': 0.004673032566558483, 'loss': 0.6057799558350166, 'time_step': 0.004921337199765582, 'init_value': -7.742276668548584, 'ave_value': -9.557790430835741, 'soft_opc': nan} step=9976




2022-04-20 18:46.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184522/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:46.20 [info     ] FQE_20220420184522: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00017979949019676032, 'time_algorithm_update': 0.005162983439689459, 'loss': 0.6176792041961711, 'time_step': 0.005418814198915349, 'init_value': -7.961648941040039, 'ave_value': -9.787888783216477, 'soft_opc': nan} step=10320




2022-04-20 18:46.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184522/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:46.22 [info     ] FQE_20220420184522: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00017684143643046534, 'time_algorithm_update': 0.005067993042080901, 'loss': 0.6217777679002908, 'time_step': 0.005323005277057027, 'init_value': -8.13920783996582, 'ave_value': -9.920743790886844, 'soft_opc': nan} step=10664




2022-04-20 18:46.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184522/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:46.24 [info     ] FQE_20220420184522: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.0001774173836375392, 'time_algorithm_update': 0.005052603261415349, 'loss': 0.6150583736165318, 'time_step': 0.005309594925059829, 'init_value': -8.365961074829102, 'ave_value': -10.208072669088404, 'soft_opc': nan} step=11008




2022-04-20 18:46.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184522/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:46.26 [info     ] FQE_20220420184522: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00017836689949035645, 'time_algorithm_update': 0.004729236974272617, 'loss': 0.6129974533132342, 'time_step': 0.004984569410945094, 'init_value': -8.1643705368042, 'ave_value': -10.116705852450997, 'soft_opc': nan} step=11352




2022-04-20 18:46.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184522/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:46.28 [info     ] FQE_20220420184522: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.0001759757829266925, 'time_algorithm_update': 0.0051472686057867, 'loss': 0.6097568024577963, 'time_step': 0.005399602097134257, 'init_value': -8.612858772277832, 'ave_value': -10.382323449371416, 'soft_opc': nan} step=11696




2022-04-20 18:46.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184522/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:46.30 [info     ] FQE_20220420184522: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00017614073531572208, 'time_algorithm_update': 0.005064598349637763, 'loss': 0.5992155014753862, 'time_step': 0.005318240370861319, 'init_value': -8.27773666381836, 'ave_value': -9.837154059288688, 'soft_opc': nan} step=12040




2022-04-20 18:46.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184522/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:46.32 [info     ] FQE_20220420184522: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00018495390581530193, 'time_algorithm_update': 0.005098078833069912, 'loss': 0.5794602143903111, 'time_step': 0.005361918793168179, 'init_value': -8.403332710266113, 'ave_value': -9.894205575411602, 'soft_opc': nan} step=12384




2022-04-20 18:46.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184522/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:46.34 [info     ] FQE_20220420184522: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.0001789712628652883, 'time_algorithm_update': 0.005126901837282403, 'loss': 0.5731923521474697, 'time_step': 0.005383180324421372, 'init_value': -8.752235412597656, 'ave_value': -10.023486744779422, 'soft_opc': nan} step=12728




2022-04-20 18:46.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184522/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:46.36 [info     ] FQE_20220420184522: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00017995543258134708, 'time_algorithm_update': 0.004707754351371943, 'loss': 0.564972679699273, 'time_step': 0.004967363767845686, 'init_value': -8.95711612701416, 'ave_value': -10.17370909887517, 'soft_opc': nan} step=13072




2022-04-20 18:46.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184522/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:46.38 [info     ] FQE_20220420184522: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00017502210861028626, 'time_algorithm_update': 0.005118836497151574, 'loss': 0.5632482040565201, 'time_step': 0.0053711339484813604, 'init_value': -9.37618637084961, 'ave_value': -10.292659112059319, 'soft_opc': nan} step=13416




2022-04-20 18:46.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184522/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:46.40 [info     ] FQE_20220420184522: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00017661202785580656, 'time_algorithm_update': 0.005160801632459773, 'loss': 0.5536945743139746, 'time_step': 0.005415124255557393, 'init_value': -9.914179801940918, 'ave_value': -10.578534270821324, 'soft_opc': nan} step=13760




2022-04-20 18:46.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184522/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:46.42 [info     ] FQE_20220420184522: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00017698282419249068, 'time_algorithm_update': 0.005099399145259414, 'loss': 0.5575151586212045, 'time_step': 0.00535501851591953, 'init_value': -10.273069381713867, 'ave_value': -11.01892400977122, 'soft_opc': nan} step=14104




2022-04-20 18:46.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184522/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:46.44 [info     ] FQE_20220420184522: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00017765233683031658, 'time_algorithm_update': 0.004855600207350975, 'loss': 0.5546721513762117, 'time_step': 0.005112045726110769, 'init_value': -10.357027053833008, 'ave_value': -10.825335194912702, 'soft_opc': nan} step=14448




2022-04-20 18:46.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184522/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:46.46 [info     ] FQE_20220420184522: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00017882017202155534, 'time_algorithm_update': 0.005144015994182852, 'loss': 0.5535693556330231, 'time_step': 0.005400679832281068, 'init_value': -10.308122634887695, 'ave_value': -10.604316992546444, 'soft_opc': nan} step=14792




2022-04-20 18:46.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184522/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:46.48 [info     ] FQE_20220420184522: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00017718035121296727, 'time_algorithm_update': 0.005037250213844832, 'loss': 0.5407669789720934, 'time_step': 0.005291336497595144, 'init_value': -10.509990692138672, 'ave_value': -10.73146927735311, 'soft_opc': nan} step=15136




2022-04-20 18:46.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184522/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:46.50 [info     ] FQE_20220420184522: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00017492715702500453, 'time_algorithm_update': 0.005047398944233739, 'loss': 0.5468243516136914, 'time_step': 0.005297567955283231, 'init_value': -10.783439636230469, 'ave_value': -10.65377189168719, 'soft_opc': nan} step=15480




2022-04-20 18:46.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184522/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:46.52 [info     ] FQE_20220420184522: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00017930740533873092, 'time_algorithm_update': 0.005014484704926957, 'loss': 0.5467720658161006, 'time_step': 0.005269345849059349, 'init_value': -11.249555587768555, 'ave_value': -11.098973827167343, 'soft_opc': nan} step=15824




2022-04-20 18:46.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184522/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:46.53 [info     ] FQE_20220420184522: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00017672222714091456, 'time_algorithm_update': 0.004655158103898514, 'loss': 0.5609609368203078, 'time_step': 0.004907628131467242, 'init_value': -11.656232833862305, 'ave_value': -11.247468099737254, 'soft_opc': nan} step=16168




2022-04-20 18:46.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184522/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:46.55 [info     ] FQE_20220420184522: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.0001783149186954942, 'time_algorithm_update': 0.005054683879364369, 'loss': 0.5579815784978226, 'time_step': 0.0053122306978979775, 'init_value': -12.028613090515137, 'ave_value': -11.367931982384457, 'soft_opc': nan} step=16512




2022-04-20 18:46.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184522/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:46.57 [info     ] FQE_20220420184522: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.0001750228016875511, 'time_algorithm_update': 0.005050088777098545, 'loss': 0.5699918383169312, 'time_step': 0.00530169592347256, 'init_value': -12.480042457580566, 'ave_value': -11.554986505966156, 'soft_opc': nan} step=16856




2022-04-20 18:46.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184522/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:46.59 [info     ] FQE_20220420184522: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00017960889394893202, 'time_algorithm_update': 0.005091074594231539, 'loss': 0.5842481728718985, 'time_step': 0.005347899226255195, 'init_value': -13.02946949005127, 'ave_value': -12.033459801004302, 'soft_opc': nan} step=17200




2022-04-20 18:46.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184522/model_17200.pt
search iteration:  39
using hyper params:  [0.008952639059189492, 0.0006120888437079064, 5.897384852206009e-05, 5]
2022-04-20 18:46.59 [debug    ] RoundIterator is selected.
2022-04-20 18:46.59 [info     ] Directory is created at d3rlpy_logs/TD3PlusBC_20220420184659
2022-04-20 18:46.59 [debug    ] Fitting scaler...              scaler=standard
2022-04-20 18:47.00 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 18:47.00 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 18:47.00 [info     ] Parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184659/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0089526390591

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:47.03 [info     ] TD3PlusBC_20220420184659: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.00037587037560535454, 'time_algorithm_update': 0.00841528630396079, 'critic_loss': 22.167514356256227, 'actor_loss': 2.737385086148803, 'time_step': 0.008871960361101473, 'td_error': 1.0333894908730494, 'init_value': -7.548655033111572, 'ave_value': -4.542654662837803} step=342
2022-04-20 18:47.03 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184659/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:47.07 [info     ] TD3PlusBC_20220420184659: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.00038471138268186334, 'time_algorithm_update': 0.009046053328709296, 'critic_loss': 3.897140021561182, 'actor_loss': 2.5807651539295042, 'time_step': 0.0095103693287275, 'td_error': 1.074877256078535, 'init_value': -10.989870071411133, 'ave_value': -6.808523355222912} step=684
2022-04-20 18:47.07 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184659/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:47.11 [info     ] TD3PlusBC_20220420184659: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.00038329202529282597, 'time_algorithm_update': 0.009079844630949678, 'critic_loss': 5.047613866496504, 'actor_loss': 2.551082447955483, 'time_step': 0.009544851487142998, 'td_error': 1.2689397397587954, 'init_value': -14.661308288574219, 'ave_value': -9.150293641111704} step=1026
2022-04-20 18:47.11 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184659/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:47.14 [info     ] TD3PlusBC_20220420184659: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.0003800447921306766, 'time_algorithm_update': 0.008583381162052267, 'critic_loss': 6.488772907452277, 'actor_loss': 2.5434524999027364, 'time_step': 0.009044747603567023, 'td_error': 1.5552470367208102, 'init_value': -18.379920959472656, 'ave_value': -11.49926342595872} step=1368
2022-04-20 18:47.14 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184659/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:47.18 [info     ] TD3PlusBC_20220420184659: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.0003801626071595309, 'time_algorithm_update': 0.008861584970128466, 'critic_loss': 8.375323566776967, 'actor_loss': 2.5375218001025464, 'time_step': 0.009325893998843187, 'td_error': 1.9074881422694943, 'init_value': -22.081762313842773, 'ave_value': -13.81813673816789} step=1710
2022-04-20 18:47.18 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184659/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:47.22 [info     ] TD3PlusBC_20220420184659: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.0003804066027814185, 'time_algorithm_update': 0.008562071281566955, 'critic_loss': 10.366540561642562, 'actor_loss': 2.5335028534047086, 'time_step': 0.00901931419707181, 'td_error': 2.342233900161601, 'init_value': -25.67618751525879, 'ave_value': -16.109856718028475} step=2052
2022-04-20 18:47.22 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184659/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:47.25 [info     ] TD3PlusBC_20220420184659: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.00038350883283113176, 'time_algorithm_update': 0.0090223467140867, 'critic_loss': 12.58919098084433, 'actor_loss': 2.530738762247632, 'time_step': 0.009476207850272195, 'td_error': 2.8168189383702287, 'init_value': -29.16274070739746, 'ave_value': -18.293499925974505} step=2394
2022-04-20 18:47.25 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184659/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:47.29 [info     ] TD3PlusBC_20220420184659: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.0003835785458659568, 'time_algorithm_update': 0.009027812216016982, 'critic_loss': 14.836114244851453, 'actor_loss': 2.5288595748923677, 'time_step': 0.009487469991048178, 'td_error': 3.3174576952666746, 'init_value': -32.4865608215332, 'ave_value': -20.44877054660233} step=2736
2022-04-20 18:47.29 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184659/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:47.33 [info     ] TD3PlusBC_20220420184659: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.0003786512285645245, 'time_algorithm_update': 0.008802457859641627, 'critic_loss': 17.083835175162868, 'actor_loss': 2.5274565847296464, 'time_step': 0.009256528832061945, 'td_error': 3.8728740723586115, 'init_value': -35.61854553222656, 'ave_value': -22.46186651942723} step=3078
2022-04-20 18:47.33 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184659/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:47.37 [info     ] TD3PlusBC_20220420184659: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.0003814718179535448, 'time_algorithm_update': 0.008957887253566095, 'critic_loss': 19.499981638980888, 'actor_loss': 2.5257258610418667, 'time_step': 0.00941524658983911, 'td_error': 4.460144211165957, 'init_value': -38.823673248291016, 'ave_value': -24.555336803157672} step=3420
2022-04-20 18:47.37 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184659/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:47.40 [info     ] TD3PlusBC_20220420184659: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.0003769565046879283, 'time_algorithm_update': 0.008717036386679488, 'critic_loss': 21.847694769240263, 'actor_loss': 2.525784808989854, 'time_step': 0.009171535397133631, 'td_error': 5.032486473156204, 'init_value': -41.71092987060547, 'ave_value': -26.445704800684997} step=3762
2022-04-20 18:47.40 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184659/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:47.44 [info     ] TD3PlusBC_20220420184659: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.0003821563999555264, 'time_algorithm_update': 0.009043080067774009, 'critic_loss': 23.873294071844448, 'actor_loss': 2.5242957706339877, 'time_step': 0.009503594616003204, 'td_error': 5.630244319567554, 'init_value': -44.333534240722656, 'ave_value': -28.142666516306868} step=4104
2022-04-20 18:47.44 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184659/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:47.48 [info     ] TD3PlusBC_20220420184659: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.0003835701803017778, 'time_algorithm_update': 0.009004317529020254, 'critic_loss': 26.092796080293713, 'actor_loss': 2.524388293773807, 'time_step': 0.0094658673158166, 'td_error': 6.222374352413666, 'init_value': -47.04578399658203, 'ave_value': -29.90258535597079} step=4446
2022-04-20 18:47.48 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184659/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:47.51 [info     ] TD3PlusBC_20220420184659: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.00038275523492467333, 'time_algorithm_update': 0.008604057351051018, 'critic_loss': 28.371237607030142, 'actor_loss': 2.5246853730831926, 'time_step': 0.009067085054185655, 'td_error': 6.856743733365721, 'init_value': -49.58308029174805, 'ave_value': -31.605197788894195} step=4788
2022-04-20 18:47.51 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184659/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:47.55 [info     ] TD3PlusBC_20220420184659: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.00038683135607089214, 'time_algorithm_update': 0.008943659520288657, 'critic_loss': 30.5876682264763, 'actor_loss': 2.5238704402544343, 'time_step': 0.009410700602838171, 'td_error': 7.531590852049292, 'init_value': -52.192787170410156, 'ave_value': -33.2966835944769} step=5130
2022-04-20 18:47.55 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184659/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:47.59 [info     ] TD3PlusBC_20220420184659: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.0003893019860250908, 'time_algorithm_update': 0.008887671587759988, 'critic_loss': 32.4022352095933, 'actor_loss': 2.522841537207888, 'time_step': 0.009354722430134377, 'td_error': 8.053164598796148, 'init_value': -54.435081481933594, 'ave_value': -34.76219949261302} step=5472
2022-04-20 18:47.59 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184659/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:48.02 [info     ] TD3PlusBC_20220420184659: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.0003792033558003387, 'time_algorithm_update': 0.008817759173655371, 'critic_loss': 34.233587376555505, 'actor_loss': 2.521806758746766, 'time_step': 0.009272871658815974, 'td_error': 8.69645523136899, 'init_value': -56.66144943237305, 'ave_value': -36.24486824333936} step=5814
2022-04-20 18:48.03 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184659/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:48.06 [info     ] TD3PlusBC_20220420184659: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.000385468466240063, 'time_algorithm_update': 0.008976715350011636, 'critic_loss': 36.128564025923524, 'actor_loss': 2.522207444174248, 'time_step': 0.00943867644371345, 'td_error': 9.282193366907492, 'init_value': -58.78136444091797, 'ave_value': -37.684920362449496} step=6156
2022-04-20 18:48.06 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184659/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:48.10 [info     ] TD3PlusBC_20220420184659: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.00038328087120725397, 'time_algorithm_update': 0.008703209503352294, 'critic_loss': 37.98505758820919, 'actor_loss': 2.521450801202428, 'time_step': 0.009165657891167535, 'td_error': 9.851192655526146, 'init_value': -60.81084060668945, 'ave_value': -39.020006956132804} step=6498
2022-04-20 18:48.10 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184659/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:48.14 [info     ] TD3PlusBC_20220420184659: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.0003839243225186889, 'time_algorithm_update': 0.008914448364436278, 'critic_loss': 39.710507353844, 'actor_loss': 2.5204159488454896, 'time_step': 0.009377067549186841, 'td_error': 10.417629418810403, 'init_value': -62.68608474731445, 'ave_value': -40.32501952334517} step=6840
2022-04-20 18:48.14 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184659/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:48.17 [info     ] TD3PlusBC_20220420184659: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.0003869303485803437, 'time_algorithm_update': 0.00900878474029184, 'critic_loss': 41.46476699316013, 'actor_loss': 2.521496958202786, 'time_step': 0.009473391443665265, 'td_error': 11.03350666410704, 'init_value': -64.73237609863281, 'ave_value': -41.6123150409206} step=7182
2022-04-20 18:48.17 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184659/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:48.21 [info     ] TD3PlusBC_20220420184659: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.00038784219507585494, 'time_algorithm_update': 0.008643265356097305, 'critic_loss': 43.08260220533226, 'actor_loss': 2.5235120706390917, 'time_step': 0.009107342240406059, 'td_error': 11.56391344691686, 'init_value': -66.46739196777344, 'ave_value': -42.854250404867805} step=7524
2022-04-20 18:48.21 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184659/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:48.25 [info     ] TD3PlusBC_20220420184659: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.00038400309824804116, 'time_algorithm_update': 0.009092386005914698, 'critic_loss': 44.64133143843266, 'actor_loss': 2.5221162664960004, 'time_step': 0.009556039732102065, 'td_error': 12.065170836917753, 'init_value': -67.93614959716797, 'ave_value': -43.89001733982533} step=7866
2022-04-20 18:48.25 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184659/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:48.28 [info     ] TD3PlusBC_20220420184659: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.00038520983088086223, 'time_algorithm_update': 0.008596240428456088, 'critic_loss': 46.33149644505908, 'actor_loss': 2.5219761028624417, 'time_step': 0.009060509023610611, 'td_error': 12.6125005766294, 'init_value': -69.7658920288086, 'ave_value': -45.07576051813257} step=8208
2022-04-20 18:48.28 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184659/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:48.32 [info     ] TD3PlusBC_20220420184659: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.0003905484550877621, 'time_algorithm_update': 0.009019444560446935, 'critic_loss': 47.887482726783084, 'actor_loss': 2.5210365287044594, 'time_step': 0.009491649287485937, 'td_error': 13.056882802786275, 'init_value': -71.04582977294922, 'ave_value': -46.01253841913305} step=8550
2022-04-20 18:48.32 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184659/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:48.36 [info     ] TD3PlusBC_20220420184659: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.00038103890000728137, 'time_algorithm_update': 0.008930568109478867, 'critic_loss': 49.48321831574914, 'actor_loss': 2.52029399983367, 'time_step': 0.009388137281986704, 'td_error': 13.539290168441992, 'init_value': -72.50365447998047, 'ave_value': -47.02240287697681} step=8892
2022-04-20 18:48.36 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184659/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:48.40 [info     ] TD3PlusBC_20220420184659: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.00038161891245702554, 'time_algorithm_update': 0.008596923616197374, 'critic_loss': 51.163848369442235, 'actor_loss': 2.521780564771061, 'time_step': 0.009058504076729043, 'td_error': 13.874816045285508, 'init_value': -73.55912780761719, 'ave_value': -47.833521230486596} step=9234
2022-04-20 18:48.40 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184659/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:48.43 [info     ] TD3PlusBC_20220420184659: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.0003898143768310547, 'time_algorithm_update': 0.008958704290334244, 'critic_loss': 52.87841729950487, 'actor_loss': 2.520654460840058, 'time_step': 0.009430473310905591, 'td_error': 14.307758450385515, 'init_value': -75.00848388671875, 'ave_value': -48.77538797525958} step=9576
2022-04-20 18:48.43 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184659/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:48.47 [info     ] TD3PlusBC_20220420184659: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.00037858918396353026, 'time_algorithm_update': 0.008936767689665856, 'critic_loss': 54.516362513715066, 'actor_loss': 2.522691453409474, 'time_step': 0.009395459939164725, 'td_error': 14.887073690323573, 'init_value': -76.55049896240234, 'ave_value': -49.8598249704098} step=9918
2022-04-20 18:48.47 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184659/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:48.51 [info     ] TD3PlusBC_20220420184659: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.0003814739093445895, 'time_algorithm_update': 0.00895179363719204, 'critic_loss': 56.43546296281424, 'actor_loss': 2.522773554450587, 'time_step': 0.009406827346623292, 'td_error': 15.230300782520107, 'init_value': -77.8007583618164, 'ave_value': -50.775356378896866} step=10260
2022-04-20 18:48.51 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184659/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:48.54 [info     ] TD3PlusBC_20220420184659: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.0003791998701485974, 'time_algorithm_update': 0.008870462925113433, 'critic_loss': 58.36258978035018, 'actor_loss': 2.5223531987931995, 'time_step': 0.009330901486134668, 'td_error': 15.626948087010085, 'init_value': -78.90899658203125, 'ave_value': -51.556905021448664} step=10602
2022-04-20 18:48.54 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184659/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:48.58 [info     ] TD3PlusBC_20220420184659: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.0003764950043973867, 'time_algorithm_update': 0.008492392406129, 'critic_loss': 60.105594395196924, 'actor_loss': 2.522655425713076, 'time_step': 0.008945807378891616, 'td_error': 15.90843479619706, 'init_value': -79.81700134277344, 'ave_value': -52.2092822341412} step=10944
2022-04-20 18:48.58 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184659/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:49.02 [info     ] TD3PlusBC_20220420184659: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.0003819863001505534, 'time_algorithm_update': 0.00896498125198989, 'critic_loss': 62.216235277945536, 'actor_loss': 2.523132672783924, 'time_step': 0.009423902160242983, 'td_error': 16.24438570365437, 'init_value': -80.86347961425781, 'ave_value': -53.005709136424436} step=11286
2022-04-20 18:49.02 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184659/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:49.05 [info     ] TD3PlusBC_20220420184659: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.0003813630656192177, 'time_algorithm_update': 0.009006536494918733, 'critic_loss': 64.61054282718234, 'actor_loss': 2.523100419351232, 'time_step': 0.009464215116891248, 'td_error': 16.53086162695486, 'init_value': -81.80186462402344, 'ave_value': -53.68947258093503} step=11628
2022-04-20 18:49.06 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184659/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:49.09 [info     ] TD3PlusBC_20220420184659: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.0003779234244809513, 'time_algorithm_update': 0.008711167943407918, 'critic_loss': 67.30185491996899, 'actor_loss': 2.5253443480932223, 'time_step': 0.009166905754490903, 'td_error': 16.7887016484196, 'init_value': -82.79899597167969, 'ave_value': -54.43695731167069} step=11970
2022-04-20 18:49.09 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184659/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:49.13 [info     ] TD3PlusBC_20220420184659: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.00038509759289479396, 'time_algorithm_update': 0.008885449833340116, 'critic_loss': 69.93887186329268, 'actor_loss': 2.5250588815811783, 'time_step': 0.009348232146592169, 'td_error': 17.147974103733088, 'init_value': -84.02818298339844, 'ave_value': -55.151349340689094} step=12312
2022-04-20 18:49.13 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184659/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:49.17 [info     ] TD3PlusBC_20220420184659: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.0003765382264789782, 'time_algorithm_update': 0.00866000763854088, 'critic_loss': 72.33416777047498, 'actor_loss': 2.524211713445117, 'time_step': 0.009114576362029852, 'td_error': 17.23004431731703, 'init_value': -84.45433044433594, 'ave_value': -55.6903312770435} step=12654
2022-04-20 18:49.17 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184659/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:49.20 [info     ] TD3PlusBC_20220420184659: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.00037685820930882504, 'time_algorithm_update': 0.009032310101023892, 'critic_loss': 75.42681812264068, 'actor_loss': 2.5245252938298455, 'time_step': 0.00948771189527902, 'td_error': 17.515045245429075, 'init_value': -85.58447265625, 'ave_value': -56.398553183201514} step=12996
2022-04-20 18:49.20 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184659/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:49.24 [info     ] TD3PlusBC_20220420184659: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.00038208668692070143, 'time_algorithm_update': 0.008919998219138697, 'critic_loss': 78.09741681639912, 'actor_loss': 2.5260165574257836, 'time_step': 0.009383401675531042, 'td_error': 17.737081499791557, 'init_value': -86.21368408203125, 'ave_value': -56.981732098644784} step=13338
2022-04-20 18:49.24 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184659/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:49.28 [info     ] TD3PlusBC_20220420184659: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.00038315887339631017, 'time_algorithm_update': 0.00862369830148262, 'critic_loss': 80.89713640938028, 'actor_loss': 2.5251048350194742, 'time_step': 0.00908497132753071, 'td_error': 17.923910334843143, 'init_value': -87.27032470703125, 'ave_value': -57.677515004086814} step=13680
2022-04-20 18:49.28 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184659/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:49.31 [info     ] TD3PlusBC_20220420184659: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.00037170850742630096, 'time_algorithm_update': 0.009001320565653127, 'critic_loss': 84.33284868831522, 'actor_loss': 2.525921619426437, 'time_step': 0.009448303116692437, 'td_error': 18.019096300587762, 'init_value': -87.67088317871094, 'ave_value': -58.00815994359117} step=14022
2022-04-20 18:49.31 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184659/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:49.35 [info     ] TD3PlusBC_20220420184659: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.00038389155739232113, 'time_algorithm_update': 0.008648364167464407, 'critic_loss': 87.21980712846009, 'actor_loss': 2.5262172389448736, 'time_step': 0.00911368403518409, 'td_error': 18.205681739874827, 'init_value': -88.78913879394531, 'ave_value': -58.78280407361787} step=14364
2022-04-20 18:49.35 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184659/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:49.39 [info     ] TD3PlusBC_20220420184659: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.00038629038292065, 'time_algorithm_update': 0.00906517491703145, 'critic_loss': 90.18471573946769, 'actor_loss': 2.5247907596721983, 'time_step': 0.009530832195839686, 'td_error': 18.369527982828416, 'init_value': -89.36014556884766, 'ave_value': -59.30864995402511} step=14706
2022-04-20 18:49.39 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184659/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:49.43 [info     ] TD3PlusBC_20220420184659: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.00038360015690675256, 'time_algorithm_update': 0.008778576962431969, 'critic_loss': 93.09341828864918, 'actor_loss': 2.5272441105535854, 'time_step': 0.00924074231532582, 'td_error': 18.596736003757748, 'init_value': -90.42481994628906, 'ave_value': -60.013436508771065} step=15048
2022-04-20 18:49.43 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184659/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:49.46 [info     ] TD3PlusBC_20220420184659: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.00037539702409889267, 'time_algorithm_update': 0.008498678430479172, 'critic_loss': 96.38740546243233, 'actor_loss': 2.5275177690717907, 'time_step': 0.008948653064973174, 'td_error': 18.724731410971376, 'init_value': -90.8185806274414, 'ave_value': -60.28971513783903} step=15390
2022-04-20 18:49.46 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184659/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:49.50 [info     ] TD3PlusBC_20220420184659: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.00038249171965303475, 'time_algorithm_update': 0.008977473827830533, 'critic_loss': 99.18060965287059, 'actor_loss': 2.5274133542824906, 'time_step': 0.009441140102364166, 'td_error': 18.82707545481957, 'init_value': -91.31355285644531, 'ave_value': -60.59640892779295} step=15732
2022-04-20 18:49.50 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184659/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:49.53 [info     ] TD3PlusBC_20220420184659: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.00037688748878345155, 'time_algorithm_update': 0.008679813808865018, 'critic_loss': 102.36093953896685, 'actor_loss': 2.5286297128911603, 'time_step': 0.00911909516094721, 'td_error': 18.88364010967726, 'init_value': -91.9562759399414, 'ave_value': -61.09310803949481} step=16074
2022-04-20 18:49.54 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184659/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:49.57 [info     ] TD3PlusBC_20220420184659: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.0003787320956849215, 'time_algorithm_update': 0.008992674755074127, 'critic_loss': 105.21142960710135, 'actor_loss': 2.5290379775197884, 'time_step': 0.009434290099562261, 'td_error': 18.92797046535157, 'init_value': -92.21270751953125, 'ave_value': -61.46117503737225} step=16416
2022-04-20 18:49.57 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184659/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:50.01 [info     ] TD3PlusBC_20220420184659: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.0003775204831396627, 'time_algorithm_update': 0.009024098602651853, 'critic_loss': 107.98116485417238, 'actor_loss': 2.528932571411133, 'time_step': 0.009466520526952911, 'td_error': 19.1273702152558, 'init_value': -93.12538146972656, 'ave_value': -61.91649490573368} step=16758
2022-04-20 18:50.01 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184659/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:50.05 [info     ] TD3PlusBC_20220420184659: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.000381416744656033, 'time_algorithm_update': 0.008651368102135017, 'critic_loss': 110.93360543948168, 'actor_loss': 2.5284742082071583, 'time_step': 0.009099249254193222, 'td_error': 18.937155492515544, 'init_value': -92.95616149902344, 'ave_value': -62.06912066887804} step=17100
2022-04-20 18:50.05 [info     ] Model parameters are saved to d3rlpy_logs/TD3PlusBC_20220420184659/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 102 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.28189108e-01  

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 18:50.06 [info     ] FQE_20220420185005: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00016726068703525038, 'time_algorithm_update': 0.0050129646278289424, 'loss': 0.008183092849486205, 'time_step': 0.005255704902740846, 'init_value': -0.16756592690944672, 'ave_value': -0.1286542433221732, 'soft_opc': nan} step=166




2022-04-20 18:50.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185005/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.07 [info     ] FQE_20220420185005: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00016305532800145895, 'time_algorithm_update': 0.004966480186186641, 'loss': 0.00586790513774627, 'time_step': 0.005203054611941418, 'init_value': -0.30865952372550964, 'ave_value': -0.2192927084208743, 'soft_opc': nan} step=332




2022-04-20 18:50.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185005/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.08 [info     ] FQE_20220420185005: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00016955582492322806, 'time_algorithm_update': 0.004985501967280744, 'loss': 0.005377808654490095, 'time_step': 0.005225099712969309, 'init_value': -0.3788558840751648, 'ave_value': -0.24596686611153387, 'soft_opc': nan} step=498




2022-04-20 18:50.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185005/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.09 [info     ] FQE_20220420185005: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00017031847712505296, 'time_algorithm_update': 0.005104671041649508, 'loss': 0.005260737267805331, 'time_step': 0.005350469106651214, 'init_value': -0.45740753412246704, 'ave_value': -0.2752375652044508, 'soft_opc': nan} step=664




2022-04-20 18:50.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185005/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.10 [info     ] FQE_20220420185005: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00016681257500705948, 'time_algorithm_update': 0.005060335239732122, 'loss': 0.004988565179799874, 'time_step': 0.0053036643798092765, 'init_value': -0.5336666107177734, 'ave_value': -0.31751649754860245, 'soft_opc': nan} step=830




2022-04-20 18:50.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185005/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.11 [info     ] FQE_20220420185005: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00016816840114363706, 'time_algorithm_update': 0.005056954291929682, 'loss': 0.004644703758173588, 'time_step': 0.005298772490168193, 'init_value': -0.6156451106071472, 'ave_value': -0.3859656928090362, 'soft_opc': nan} step=996




2022-04-20 18:50.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185005/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.11 [info     ] FQE_20220420185005: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00016685135393257602, 'time_algorithm_update': 0.004482899803713143, 'loss': 0.004391705925879349, 'time_step': 0.004718797752656132, 'init_value': -0.6566096544265747, 'ave_value': -0.39288535065915403, 'soft_opc': nan} step=1162




2022-04-20 18:50.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185005/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.12 [info     ] FQE_20220420185005: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.0001627206802368164, 'time_algorithm_update': 0.004843413111675216, 'loss': 0.004190350320268737, 'time_step': 0.005075654351567647, 'init_value': -0.7248015403747559, 'ave_value': -0.432115592197557, 'soft_opc': nan} step=1328




2022-04-20 18:50.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185005/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.13 [info     ] FQE_20220420185005: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00016593645854168628, 'time_algorithm_update': 0.005075502108378583, 'loss': 0.00402239642066724, 'time_step': 0.005316267530602145, 'init_value': -0.7641178369522095, 'ave_value': -0.454473310747595, 'soft_opc': nan} step=1494




2022-04-20 18:50.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185005/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.14 [info     ] FQE_20220420185005: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.0001681281859616199, 'time_algorithm_update': 0.005068925489862281, 'loss': 0.003972838196555624, 'time_step': 0.005311875458223274, 'init_value': -0.8474990129470825, 'ave_value': -0.5111359211137971, 'soft_opc': nan} step=1660




2022-04-20 18:50.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185005/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.15 [info     ] FQE_20220420185005: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00017036874610257437, 'time_algorithm_update': 0.004946250513375524, 'loss': 0.003815917902014566, 'time_step': 0.005191169589398855, 'init_value': -0.9253636002540588, 'ave_value': -0.5582956545726137, 'soft_opc': nan} step=1826




2022-04-20 18:50.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185005/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.16 [info     ] FQE_20220420185005: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.0001691105854080384, 'time_algorithm_update': 0.005054723785584231, 'loss': 0.0037593064059682907, 'time_step': 0.0052999846906547085, 'init_value': -0.9707341194152832, 'ave_value': -0.5922585977879059, 'soft_opc': nan} step=1992




2022-04-20 18:50.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185005/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.17 [info     ] FQE_20220420185005: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00016954002610172135, 'time_algorithm_update': 0.0050027255552360815, 'loss': 0.004092527160712753, 'time_step': 0.005246660795556493, 'init_value': -1.0276484489440918, 'ave_value': -0.62156962701539, 'soft_opc': nan} step=2158




2022-04-20 18:50.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185005/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.18 [info     ] FQE_20220420185005: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00016728079462625893, 'time_algorithm_update': 0.005124590483056493, 'loss': 0.004204006918217897, 'time_step': 0.005367536142647985, 'init_value': -1.1382708549499512, 'ave_value': -0.7220929222366026, 'soft_opc': nan} step=2324




2022-04-20 18:50.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185005/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.19 [info     ] FQE_20220420185005: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00017167573951813112, 'time_algorithm_update': 0.005066753870033356, 'loss': 0.0045319862876940205, 'time_step': 0.005314577056700925, 'init_value': -1.1874918937683105, 'ave_value': -0.7484061379363207, 'soft_opc': nan} step=2490




2022-04-20 18:50.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185005/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.20 [info     ] FQE_20220420185005: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00017402832766613328, 'time_algorithm_update': 0.005188373197992164, 'loss': 0.004417206495944186, 'time_step': 0.005436934620501047, 'init_value': -1.300611138343811, 'ave_value': -0.8484318348981843, 'soft_opc': nan} step=2656




2022-04-20 18:50.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185005/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.21 [info     ] FQE_20220420185005: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00016406214380838783, 'time_algorithm_update': 0.0041577227144356235, 'loss': 0.0050295262516150815, 'time_step': 0.004396168582410698, 'init_value': -1.4336771965026855, 'ave_value': -0.9568741741279762, 'soft_opc': nan} step=2822




2022-04-20 18:50.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185005/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.22 [info     ] FQE_20220420185005: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.0001659766737237034, 'time_algorithm_update': 0.004974916756871235, 'loss': 0.005149971268768142, 'time_step': 0.00521784949015422, 'init_value': -1.4434891939163208, 'ave_value': -0.9617854727415351, 'soft_opc': nan} step=2988




2022-04-20 18:50.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185005/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.23 [info     ] FQE_20220420185005: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.0001675436295658709, 'time_algorithm_update': 0.005082354488143002, 'loss': 0.005767442662961759, 'time_step': 0.0053236024925507695, 'init_value': -1.5736827850341797, 'ave_value': -1.0401202614242966, 'soft_opc': nan} step=3154




2022-04-20 18:50.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185005/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.24 [info     ] FQE_20220420185005: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.0001634747148996376, 'time_algorithm_update': 0.0050494038915059655, 'loss': 0.006038391874284001, 'time_step': 0.00528902605355504, 'init_value': -1.646103858947754, 'ave_value': -1.1011590012558945, 'soft_opc': nan} step=3320




2022-04-20 18:50.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185005/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.25 [info     ] FQE_20220420185005: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00016843985362225268, 'time_algorithm_update': 0.005065701094018407, 'loss': 0.006396607819142336, 'time_step': 0.005309400788272719, 'init_value': -1.6933501958847046, 'ave_value': -1.116014453767891, 'soft_opc': nan} step=3486




2022-04-20 18:50.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185005/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.26 [info     ] FQE_20220420185005: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00017013176377997342, 'time_algorithm_update': 0.005013511841555676, 'loss': 0.007134751592067082, 'time_step': 0.0052579282278037934, 'init_value': -1.8576164245605469, 'ave_value': -1.2316317656698326, 'soft_opc': nan} step=3652




2022-04-20 18:50.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185005/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.27 [info     ] FQE_20220420185005: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00017175473362566476, 'time_algorithm_update': 0.004989405712449407, 'loss': 0.0072167012258432806, 'time_step': 0.005236431776759136, 'init_value': -1.895709753036499, 'ave_value': -1.287860751252722, 'soft_opc': nan} step=3818




2022-04-20 18:50.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185005/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.28 [info     ] FQE_20220420185005: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.0001685633716813053, 'time_algorithm_update': 0.00504490697240255, 'loss': 0.008145017339707723, 'time_step': 0.005288049399134624, 'init_value': -1.9732656478881836, 'ave_value': -1.3372317908415654, 'soft_opc': nan} step=3984




2022-04-20 18:50.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185005/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.29 [info     ] FQE_20220420185005: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00016962907400475927, 'time_algorithm_update': 0.00494973918041551, 'loss': 0.008610173674154443, 'time_step': 0.005194428455398743, 'init_value': -2.138000011444092, 'ave_value': -1.4526372569218815, 'soft_opc': nan} step=4150




2022-04-20 18:50.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185005/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.29 [info     ] FQE_20220420185005: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00016450307455407568, 'time_algorithm_update': 0.004126537277037839, 'loss': 0.009272602712875793, 'time_step': 0.004363436296761754, 'init_value': -2.237276077270508, 'ave_value': -1.5162442299898142, 'soft_opc': nan} step=4316




2022-04-20 18:50.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185005/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.30 [info     ] FQE_20220420185005: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00016991919781788285, 'time_algorithm_update': 0.005116323390638972, 'loss': 0.009995881648992289, 'time_step': 0.005362068314150155, 'init_value': -2.279216766357422, 'ave_value': -1.541878391235127, 'soft_opc': nan} step=4482




2022-04-20 18:50.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185005/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.31 [info     ] FQE_20220420185005: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00017009729362395872, 'time_algorithm_update': 0.004967867609966232, 'loss': 0.010234867716320309, 'time_step': 0.005214408219578755, 'init_value': -2.4367332458496094, 'ave_value': -1.651466104129816, 'soft_opc': nan} step=4648




2022-04-20 18:50.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185005/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.32 [info     ] FQE_20220420185005: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00016954720738422438, 'time_algorithm_update': 0.005115397005196077, 'loss': 0.011043634414336229, 'time_step': 0.005361865801983569, 'init_value': -2.4746015071868896, 'ave_value': -1.6634867419966974, 'soft_opc': nan} step=4814




2022-04-20 18:50.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185005/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.33 [info     ] FQE_20220420185005: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00017114288835640414, 'time_algorithm_update': 0.005000377275857581, 'loss': 0.011329158047965941, 'time_step': 0.005247983587793557, 'init_value': -2.5580453872680664, 'ave_value': -1.7194745335828614, 'soft_opc': nan} step=4980




2022-04-20 18:50.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185005/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.34 [info     ] FQE_20220420185005: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.0001701662339359881, 'time_algorithm_update': 0.005025167063058141, 'loss': 0.012137334504170367, 'time_step': 0.0052690276180405215, 'init_value': -2.667922019958496, 'ave_value': -1.784910731457241, 'soft_opc': nan} step=5146




2022-04-20 18:50.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185005/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.35 [info     ] FQE_20220420185005: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.0001689870673489858, 'time_algorithm_update': 0.004971603313124323, 'loss': 0.012808698033829814, 'time_step': 0.005213111279958702, 'init_value': -2.777008056640625, 'ave_value': -1.858083460716581, 'soft_opc': nan} step=5312




2022-04-20 18:50.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185005/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.36 [info     ] FQE_20220420185005: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00016939783670816077, 'time_algorithm_update': 0.005036606846085514, 'loss': 0.013422314464731747, 'time_step': 0.005281866314899491, 'init_value': -2.8774845600128174, 'ave_value': -1.9242775054187053, 'soft_opc': nan} step=5478




2022-04-20 18:50.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185005/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.37 [info     ] FQE_20220420185005: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00016838671213173005, 'time_algorithm_update': 0.004970671182655427, 'loss': 0.014262240125477898, 'time_step': 0.005212294050009854, 'init_value': -2.996119976043701, 'ave_value': -2.004434557289288, 'soft_opc': nan} step=5644




2022-04-20 18:50.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185005/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.38 [info     ] FQE_20220420185005: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00017135114554899284, 'time_algorithm_update': 0.004790303218795593, 'loss': 0.014969805660882, 'time_step': 0.0050366183361375185, 'init_value': -3.166738748550415, 'ave_value': -2.1447240870143918, 'soft_opc': nan} step=5810




2022-04-20 18:50.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185005/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.39 [info     ] FQE_20220420185005: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00016530019691191525, 'time_algorithm_update': 0.0030966623719916285, 'loss': 0.015500386376691287, 'time_step': 0.0033311068293559983, 'init_value': -3.211313247680664, 'ave_value': -2.1374080882667825, 'soft_opc': nan} step=5976




2022-04-20 18:50.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185005/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.39 [info     ] FQE_20220420185005: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00016146395579878106, 'time_algorithm_update': 0.003588409308927605, 'loss': 0.016912042372298975, 'time_step': 0.0038224444331893003, 'init_value': -3.337716579437256, 'ave_value': -2.2702670480519, 'soft_opc': nan} step=6142




2022-04-20 18:50.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185005/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.40 [info     ] FQE_20220420185005: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00016373036855674652, 'time_algorithm_update': 0.0035456174827483765, 'loss': 0.018435800391254968, 'time_step': 0.003784186868782503, 'init_value': -3.50911283493042, 'ave_value': -2.394912489067327, 'soft_opc': nan} step=6308




2022-04-20 18:50.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185005/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.41 [info     ] FQE_20220420185005: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00016240183129368057, 'time_algorithm_update': 0.0035156097756810934, 'loss': 0.01870235369375245, 'time_step': 0.0037496018122477703, 'init_value': -3.614044189453125, 'ave_value': -2.47610219147157, 'soft_opc': nan} step=6474




2022-04-20 18:50.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185005/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.41 [info     ] FQE_20220420185005: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00016229698456913592, 'time_algorithm_update': 0.003451982176447489, 'loss': 0.019453473115642834, 'time_step': 0.0036847232336021333, 'init_value': -3.6865503787994385, 'ave_value': -2.4917638183109934, 'soft_opc': nan} step=6640




2022-04-20 18:50.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185005/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.42 [info     ] FQE_20220420185005: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00016296053507241858, 'time_algorithm_update': 0.003422844840819577, 'loss': 0.020119547657388073, 'time_step': 0.0036579298685832195, 'init_value': -3.71292781829834, 'ave_value': -2.5441531355853555, 'soft_opc': nan} step=6806




2022-04-20 18:50.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185005/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.43 [info     ] FQE_20220420185005: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.0001673597887337926, 'time_algorithm_update': 0.003534815397607275, 'loss': 0.02008372406595869, 'time_step': 0.003777193735881024, 'init_value': -3.7707772254943848, 'ave_value': -2.5513547936269827, 'soft_opc': nan} step=6972




2022-04-20 18:50.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185005/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.44 [info     ] FQE_20220420185005: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00016242050262818853, 'time_algorithm_update': 0.0035316283444324172, 'loss': 0.02134138959633882, 'time_step': 0.003768307616911739, 'init_value': -3.928903341293335, 'ave_value': -2.672134368430387, 'soft_opc': nan} step=7138




2022-04-20 18:50.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185005/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.44 [info     ] FQE_20220420185005: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00016387112169380648, 'time_algorithm_update': 0.0035106805433709936, 'loss': 0.021945941116077353, 'time_step': 0.003750154771000506, 'init_value': -3.988307237625122, 'ave_value': -2.7378935690063075, 'soft_opc': nan} step=7304




2022-04-20 18:50.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185005/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.45 [info     ] FQE_20220420185005: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00016536482845444278, 'time_algorithm_update': 0.0036279192890029356, 'loss': 0.02286787251857704, 'time_step': 0.003865619739854192, 'init_value': -3.9995663166046143, 'ave_value': -2.690287965556254, 'soft_opc': nan} step=7470




2022-04-20 18:50.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185005/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.46 [info     ] FQE_20220420185005: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00016406214380838783, 'time_algorithm_update': 0.003695483667304717, 'loss': 0.022601056554360337, 'time_step': 0.003933459879404091, 'init_value': -4.079512596130371, 'ave_value': -2.740557242802403, 'soft_opc': nan} step=7636




2022-04-20 18:50.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185005/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.46 [info     ] FQE_20220420185005: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.0001669719994786274, 'time_algorithm_update': 0.0035153684845889905, 'loss': 0.024533471281931972, 'time_step': 0.0037577812930187546, 'init_value': -4.130899429321289, 'ave_value': -2.7703608640813613, 'soft_opc': nan} step=7802




2022-04-20 18:50.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185005/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.47 [info     ] FQE_20220420185005: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00016493538776075984, 'time_algorithm_update': 0.003558680235621441, 'loss': 0.025254948683930224, 'time_step': 0.003792804407786174, 'init_value': -4.227170944213867, 'ave_value': -2.867980514590939, 'soft_opc': nan} step=7968




2022-04-20 18:50.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185005/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.48 [info     ] FQE_20220420185005: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00016551850790000823, 'time_algorithm_update': 0.003616189382162439, 'loss': 0.02565427683644468, 'time_step': 0.0038578409746468784, 'init_value': -4.263649940490723, 'ave_value': -2.8598380403643526, 'soft_opc': nan} step=8134




2022-04-20 18:50.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185005/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.49 [info     ] FQE_20220420185005: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00016514795372285038, 'time_algorithm_update': 0.0035144478441720985, 'loss': 0.025334836910226304, 'time_step': 0.0037537253046610267, 'init_value': -4.3191237449646, 'ave_value': -2.9316485477963816, 'soft_opc': nan} step=8300




2022-04-20 18:50.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185005/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.43210892e-01 -3.25999953e-02
 -1.38000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.28049971e-01  6.00000000e-01  4.67532035e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.87189108e-01  2.46000047e-02
 -8.00013420e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.61927906e-02  2.08952959e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.53789108e-01  1.32000047e-02
  8.79998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -9.71586383e-02 -6.00000000e-01 -5.33093061e-02]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.39489108e-01 -4.75999953e-02
 -9.30001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  1.41298638e-01  5.43892365e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.9573146

2022-04-20 18:50.49 [info     ] Directory is created at d3rlpy_logs/FQE_20220420185049
2022-04-20 18:50.49 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 18:50.49 [debug    ] Building models...
2022-04-20 18:50.49 [debug    ] Models have been built.
2022-04-20 18:50.49 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420185049/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 18:50.50 [info     ] FQE_20220420185049: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00016577437866565793, 'time_algorithm_update': 0.003516213145366935, 'loss': 0.025976567345043253, 'time_step': 0.003753246263016102, 'init_value': -1.3801497220993042, 'ave_value': -1.357331733372029, 'soft_opc': nan} step=344




2022-04-20 18:50.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185049/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:50.52 [info     ] FQE_20220420185049: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.0001643334710320761, 'time_algorithm_update': 0.0036024201747983002, 'loss': 0.02376839887827288, 'time_step': 0.003842776597932328, 'init_value': -2.1880836486816406, 'ave_value': -2.1210400356097265, 'soft_opc': nan} step=688




2022-04-20 18:50.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185049/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:50.53 [info     ] FQE_20220420185049: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00016419346942458043, 'time_algorithm_update': 0.0034583647583806237, 'loss': 0.02729948515039977, 'time_step': 0.0036947879680367403, 'init_value': -3.472621440887451, 'ave_value': -3.34190683217199, 'soft_opc': nan} step=1032




2022-04-20 18:50.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185049/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:50.55 [info     ] FQE_20220420185049: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.0001676900442256484, 'time_algorithm_update': 0.0035230822341386662, 'loss': 0.03089404263706921, 'time_step': 0.0037653155105058537, 'init_value': -4.323354721069336, 'ave_value': -4.078540793839876, 'soft_opc': nan} step=1376




2022-04-20 18:50.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185049/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:50.56 [info     ] FQE_20220420185049: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.0001661832942519077, 'time_algorithm_update': 0.003554611705070318, 'loss': 0.039198041966639806, 'time_step': 0.003796438838160315, 'init_value': -5.623519420623779, 'ave_value': -5.253189487365989, 'soft_opc': nan} step=1720




2022-04-20 18:50.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185049/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:50.57 [info     ] FQE_20220420185049: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00017049908638000488, 'time_algorithm_update': 0.003558242736860763, 'loss': 0.04834785613469606, 'time_step': 0.003803686347118644, 'init_value': -6.512433052062988, 'ave_value': -6.0434131001298494, 'soft_opc': nan} step=2064




2022-04-20 18:50.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185049/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:50.59 [info     ] FQE_20220420185049: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.0001669401346251022, 'time_algorithm_update': 0.003514386193696843, 'loss': 0.0607843971323915, 'time_step': 0.0037547835083894953, 'init_value': -7.613819599151611, 'ave_value': -7.052077724133526, 'soft_opc': nan} step=2408




2022-04-20 18:50.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185049/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.00 [info     ] FQE_20220420185049: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00016836925994518192, 'time_algorithm_update': 0.0035165569116902906, 'loss': 0.07848151442919706, 'time_step': 0.003759153360544249, 'init_value': -8.451390266418457, 'ave_value': -7.820822331121376, 'soft_opc': nan} step=2752




2022-04-20 18:51.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185049/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.02 [info     ] FQE_20220420185049: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00016915035802264546, 'time_algorithm_update': 0.0034767430882121243, 'loss': 0.0977050325492137, 'time_step': 0.0037213834219200666, 'init_value': -9.237689971923828, 'ave_value': -8.659622756237383, 'soft_opc': nan} step=3096




2022-04-20 18:51.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185049/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.03 [info     ] FQE_20220420185049: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00017000353613565134, 'time_algorithm_update': 0.003588556550269903, 'loss': 0.11893400951927571, 'time_step': 0.003828903270322223, 'init_value': -10.306211471557617, 'ave_value': -9.740300777366569, 'soft_opc': nan} step=3440




2022-04-20 18:51.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185049/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.04 [info     ] FQE_20220420185049: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00016987323760986328, 'time_algorithm_update': 0.0034952551819557366, 'loss': 0.14058696671096652, 'time_step': 0.0037421293036882268, 'init_value': -11.048894882202148, 'ave_value': -10.611797170434986, 'soft_opc': nan} step=3784




2022-04-20 18:51.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185049/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.06 [info     ] FQE_20220420185049: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00016487753668496775, 'time_algorithm_update': 0.003508869298668795, 'loss': 0.16724609203506693, 'time_step': 0.003748363533685374, 'init_value': -12.0107421875, 'ave_value': -11.650864645489701, 'soft_opc': nan} step=4128




2022-04-20 18:51.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185049/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.07 [info     ] FQE_20220420185049: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.0001702301724012508, 'time_algorithm_update': 0.003505816986394483, 'loss': 0.18946597994261877, 'time_step': 0.003753564385480659, 'init_value': -12.487682342529297, 'ave_value': -12.282018083414515, 'soft_opc': nan} step=4472




2022-04-20 18:51.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185049/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.09 [info     ] FQE_20220420185049: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00016944145047387412, 'time_algorithm_update': 0.003531871146933977, 'loss': 0.21043592041120107, 'time_step': 0.003776241180508636, 'init_value': -13.320028305053711, 'ave_value': -13.104907571141784, 'soft_opc': nan} step=4816




2022-04-20 18:51.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185049/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.10 [info     ] FQE_20220420185049: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00016776558964751487, 'time_algorithm_update': 0.0034791577694027924, 'loss': 0.2376086461346943, 'time_step': 0.003720711830050446, 'init_value': -14.032585144042969, 'ave_value': -13.951254433205536, 'soft_opc': nan} step=5160




2022-04-20 18:51.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185049/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.12 [info     ] FQE_20220420185049: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00017136335372924805, 'time_algorithm_update': 0.0035471195398375046, 'loss': 0.25213494118259744, 'time_step': 0.0037922845330349234, 'init_value': -14.439661979675293, 'ave_value': -14.533218584377487, 'soft_opc': nan} step=5504




2022-04-20 18:51.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185049/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.13 [info     ] FQE_20220420185049: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.0001659074495005053, 'time_algorithm_update': 0.003108226975729299, 'loss': 0.27693264610350654, 'time_step': 0.0033491115237391273, 'init_value': -15.012849807739258, 'ave_value': -15.089527717756258, 'soft_opc': nan} step=5848




2022-04-20 18:51.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185049/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.14 [info     ] FQE_20220420185049: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00016773786655692168, 'time_algorithm_update': 0.0035316992637722993, 'loss': 0.29347492353774085, 'time_step': 0.0037749395814052848, 'init_value': -15.530588150024414, 'ave_value': -15.705558774143725, 'soft_opc': nan} step=6192




2022-04-20 18:51.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185049/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.16 [info     ] FQE_20220420185049: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.0001670870670052462, 'time_algorithm_update': 0.003554347642632418, 'loss': 0.3157016135708884, 'time_step': 0.003797701624936836, 'init_value': -16.308425903320312, 'ave_value': -16.497742443001485, 'soft_opc': nan} step=6536




2022-04-20 18:51.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185049/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.17 [info     ] FQE_20220420185049: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00016893134560695913, 'time_algorithm_update': 0.003559251164281091, 'loss': 0.3265023290905235, 'time_step': 0.00380482507306476, 'init_value': -16.510656356811523, 'ave_value': -16.65520826979532, 'soft_opc': nan} step=6880




2022-04-20 18:51.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185049/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.18 [info     ] FQE_20220420185049: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.0001710694889689601, 'time_algorithm_update': 0.0035997871742692103, 'loss': 0.33741644800229126, 'time_step': 0.003845659106276756, 'init_value': -17.1398868560791, 'ave_value': -17.163292916281872, 'soft_opc': nan} step=7224




2022-04-20 18:51.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185049/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.20 [info     ] FQE_20220420185049: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00016957105592239733, 'time_algorithm_update': 0.0035961277263109074, 'loss': 0.3498988244953284, 'time_step': 0.003840052804281545, 'init_value': -17.492063522338867, 'ave_value': -17.62522719744425, 'soft_opc': nan} step=7568




2022-04-20 18:51.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185049/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.21 [info     ] FQE_20220420185049: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00016964452211246935, 'time_algorithm_update': 0.003499774738799694, 'loss': 0.3643802853117069, 'time_step': 0.0037473280762517175, 'init_value': -18.056856155395508, 'ave_value': -18.186647631900872, 'soft_opc': nan} step=7912




2022-04-20 18:51.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185049/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.23 [info     ] FQE_20220420185049: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00016768935114838356, 'time_algorithm_update': 0.003614937843278397, 'loss': 0.37636631186834946, 'time_step': 0.003859445106151492, 'init_value': -18.184532165527344, 'ave_value': -18.335783026618714, 'soft_opc': nan} step=8256




2022-04-20 18:51.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185049/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.24 [info     ] FQE_20220420185049: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00017046443251676337, 'time_algorithm_update': 0.0035722054714380307, 'loss': 0.38702948689244165, 'time_step': 0.003819176623987597, 'init_value': -18.72006607055664, 'ave_value': -18.920806555067365, 'soft_opc': nan} step=8600




2022-04-20 18:51.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185049/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.26 [info     ] FQE_20220420185049: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00016924045806707337, 'time_algorithm_update': 0.0035570548024288443, 'loss': 0.4021551185125095, 'time_step': 0.0038017297900000283, 'init_value': -19.161849975585938, 'ave_value': -19.50154651734424, 'soft_opc': nan} step=8944




2022-04-20 18:51.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185049/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.27 [info     ] FQE_20220420185049: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00017242999963982162, 'time_algorithm_update': 0.003609924122344616, 'loss': 0.41096749023512696, 'time_step': 0.0038569881472476693, 'init_value': -19.42509651184082, 'ave_value': -19.833560926833837, 'soft_opc': nan} step=9288




2022-04-20 18:51.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185049/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.28 [info     ] FQE_20220420185049: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00016954056052274482, 'time_algorithm_update': 0.003480162038359531, 'loss': 0.4233098618339661, 'time_step': 0.003720856683198796, 'init_value': -19.81032371520996, 'ave_value': -20.354026739623162, 'soft_opc': nan} step=9632




2022-04-20 18:51.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185049/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.30 [info     ] FQE_20220420185049: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00017319169155386992, 'time_algorithm_update': 0.003609506889831188, 'loss': 0.442342734207984, 'time_step': 0.0038596558016400005, 'init_value': -20.14975357055664, 'ave_value': -20.77202436387967, 'soft_opc': nan} step=9976




2022-04-20 18:51.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185049/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.31 [info     ] FQE_20220420185049: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00016847114230311193, 'time_algorithm_update': 0.0035964860472568246, 'loss': 0.45657734746156736, 'time_step': 0.0038406606330428014, 'init_value': -20.581087112426758, 'ave_value': -21.363196249611608, 'soft_opc': nan} step=10320




2022-04-20 18:51.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185049/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.33 [info     ] FQE_20220420185049: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00017486685930296432, 'time_algorithm_update': 0.0036642163298850837, 'loss': 0.4698207394739743, 'time_step': 0.0039142252400864, 'init_value': -20.515344619750977, 'ave_value': -21.357319948046996, 'soft_opc': nan} step=10664




2022-04-20 18:51.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185049/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.34 [info     ] FQE_20220420185049: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00016947055971899697, 'time_algorithm_update': 0.0035785187122433686, 'loss': 0.4838914279771839, 'time_step': 0.0038241854933805243, 'init_value': -20.754302978515625, 'ave_value': -21.555567695284346, 'soft_opc': nan} step=11008




2022-04-20 18:51.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185049/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.36 [info     ] FQE_20220420185049: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.000168854414030563, 'time_algorithm_update': 0.0035602783047875694, 'loss': 0.49551989500909005, 'time_step': 0.0038056013196013693, 'init_value': -20.945838928222656, 'ave_value': -21.9200261091675, 'soft_opc': nan} step=11352




2022-04-20 18:51.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185049/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.37 [info     ] FQE_20220420185049: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.0001688703548076541, 'time_algorithm_update': 0.0035843932351400684, 'loss': 0.4989103675159344, 'time_step': 0.003828562276307927, 'init_value': -21.020740509033203, 'ave_value': -22.089175792354755, 'soft_opc': nan} step=11696




2022-04-20 18:51.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185049/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.39 [info     ] FQE_20220420185049: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00016981086065602857, 'time_algorithm_update': 0.003583803426387698, 'loss': 0.505122076969074, 'time_step': 0.0038301223932310593, 'init_value': -20.855562210083008, 'ave_value': -22.156349115520882, 'soft_opc': nan} step=12040




2022-04-20 18:51.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185049/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.40 [info     ] FQE_20220420185049: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00016915659571802892, 'time_algorithm_update': 0.003543560588082602, 'loss': 0.5077623556054003, 'time_step': 0.003789442223171855, 'init_value': -21.287639617919922, 'ave_value': -22.595502077174366, 'soft_opc': nan} step=12384




2022-04-20 18:51.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185049/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.41 [info     ] FQE_20220420185049: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00016901659411053325, 'time_algorithm_update': 0.0035273689170216404, 'loss': 0.5228675731652698, 'time_step': 0.0037721936092820277, 'init_value': -21.972007751464844, 'ave_value': -23.358665002298515, 'soft_opc': nan} step=12728




2022-04-20 18:51.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185049/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.43 [info     ] FQE_20220420185049: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00017003888307615768, 'time_algorithm_update': 0.0035299845906191095, 'loss': 0.5308976477813409, 'time_step': 0.003775162059207295, 'init_value': -21.686134338378906, 'ave_value': -23.222681185792762, 'soft_opc': nan} step=13072




2022-04-20 18:51.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185049/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.44 [info     ] FQE_20220420185049: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00017029532166414483, 'time_algorithm_update': 0.0035134623217028243, 'loss': 0.5498608790857847, 'time_step': 0.0037595768307530602, 'init_value': -22.07550811767578, 'ave_value': -23.61230801108654, 'soft_opc': nan} step=13416




2022-04-20 18:51.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185049/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.46 [info     ] FQE_20220420185049: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00017136058142018873, 'time_algorithm_update': 0.0035468138927637143, 'loss': 0.5610015623726298, 'time_step': 0.003793968017711196, 'init_value': -22.1976318359375, 'ave_value': -23.864487342637133, 'soft_opc': nan} step=13760




2022-04-20 18:51.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185049/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.47 [info     ] FQE_20220420185049: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00017165860464406568, 'time_algorithm_update': 0.0035728465679079986, 'loss': 0.5720564735739297, 'time_step': 0.003820792880169181, 'init_value': -22.54485321044922, 'ave_value': -24.215719358988846, 'soft_opc': nan} step=14104




2022-04-20 18:51.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185049/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.49 [info     ] FQE_20220420185049: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.0001695696697678677, 'time_algorithm_update': 0.003537839928338694, 'loss': 0.5914272068631511, 'time_step': 0.0037822570911673612, 'init_value': -23.04839324951172, 'ave_value': -24.59827377905876, 'soft_opc': nan} step=14448




2022-04-20 18:51.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185049/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.50 [info     ] FQE_20220420185049: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.0001720709856166396, 'time_algorithm_update': 0.0036126846490904343, 'loss': 0.6105197878272901, 'time_step': 0.0038606268028880276, 'init_value': -23.159488677978516, 'ave_value': -24.912494792325237, 'soft_opc': nan} step=14792




2022-04-20 18:51.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185049/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.52 [info     ] FQE_20220420185049: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.0001708629519440407, 'time_algorithm_update': 0.003571747347365978, 'loss': 0.6380379142586229, 'time_step': 0.00382126625194106, 'init_value': -23.182239532470703, 'ave_value': -25.017357677324917, 'soft_opc': nan} step=15136




2022-04-20 18:51.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185049/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.53 [info     ] FQE_20220420185049: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00017513230789539426, 'time_algorithm_update': 0.0035621225833892822, 'loss': 0.6482265721356799, 'time_step': 0.0038156544053277305, 'init_value': -23.52800178527832, 'ave_value': -25.39950796944668, 'soft_opc': nan} step=15480




2022-04-20 18:51.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185049/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.54 [info     ] FQE_20220420185049: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00016889877097551212, 'time_algorithm_update': 0.0035093690073767373, 'loss': 0.662663369038857, 'time_step': 0.003754216571186864, 'init_value': -23.81524658203125, 'ave_value': -25.916768225020235, 'soft_opc': nan} step=15824




2022-04-20 18:51.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185049/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.56 [info     ] FQE_20220420185049: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00017736055130182312, 'time_algorithm_update': 0.003577865140382634, 'loss': 0.6837935199376283, 'time_step': 0.0038314634977385056, 'init_value': -23.959789276123047, 'ave_value': -26.158814098014755, 'soft_opc': nan} step=16168




2022-04-20 18:51.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185049/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.57 [info     ] FQE_20220420185049: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00016946778740993765, 'time_algorithm_update': 0.0035906808320866073, 'loss': 0.7041917264970496, 'time_step': 0.003836051669231681, 'init_value': -24.120563507080078, 'ave_value': -26.47232819745938, 'soft_opc': nan} step=16512




2022-04-20 18:51.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185049/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.59 [info     ] FQE_20220420185049: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00016964937365332314, 'time_algorithm_update': 0.0035656711389852125, 'loss': 0.7224260081515409, 'time_step': 0.0038099406763564708, 'init_value': -24.327266693115234, 'ave_value': -26.87019950480427, 'soft_opc': nan} step=16856




2022-04-20 18:51.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185049/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:52.00 [info     ] FQE_20220420185049: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00017110414283220157, 'time_algorithm_update': 0.003558977398761483, 'loss': 0.7392019134361384, 'time_step': 0.0038058494412621787, 'init_value': -24.275476455688477, 'ave_value': -26.837951458895343, 'soft_opc': nan} step=17200




2022-04-20 18:52.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185049/model_17200.pt


## Reading hyper params from file

In [13]:
with open("hyperparams_td3bc.pkl", "rb") as f:
    data = pkl.load(f)

print(data)

[0.0012009373312255548, 0.007673227118548031, 7.942796560357695e-05, 1]
