# Hyper parameter search for CQL

In [10]:
!pip install d3rlpy



In [11]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import itertools
import math
import subprocess
import os
import d3rlpy
# plt.style.use('matplotlibrc')

# from Python.data_sampler import *

## Building an MDPDataset

We first read in a large batch of samples from the file. As `d3rlpy` wants it in the form (observations, actions, rewards, terminal flags), we go ahead and do that. Here's a helper function to get a dataset from a list of chunks of your choosing.

In [12]:
import numpy as np
import torch
import random
import pandas
from Python.data_sampler import *

In [13]:
def get_dataset(chunks : list, batch_size=30000, 
                path="collected_data/rl_det_small.txt") -> d3rlpy.dataset.MDPDataset :
    random.seed(0)
    samples = DataSampler(path_to_data=path)
    samples.setting("coarse")
    states = []
    actions = []
    rewards = []
    next_states = []
    for chunk in chunks:
        samples.use_chunk(chunk)
        samples.read_chunk()
        [statesChunk, actionsChunk, rewardsChunk, nextStatesChunk] = samples.get_batch(batch_size)
        states.append(statesChunk)
        actions.append(actionsChunk)
        rewards.append(rewardsChunk)
        next_states.append(nextStatesChunk)
    states = torch.cat(states)
    actions = torch.cat(actions)
    rewards = torch.cat(rewards)
    next_states = torch.cat(next_states)
    terminals = np.zeros(len(states))
    terminals[::1111] = 1 #episode length 100, change if necessary
    print(states.shape)
    dataset = d3rlpy.dataset.MDPDataset(states.numpy(), 
                                        actions.numpy(), 
                                        rewards.numpy(), terminals)
    return dataset, states.numpy(), actions.numpy(), rewards.numpy()

We can build the dataset from there, just like this, and split into train and test sets.

In [14]:
dataset, states, actions, rewards = get_dataset([i+200 for  i in range(100)], path="../collected_data/rl_stochpid.txt")

start
[ 0.00000000e+00  7.95731469e+08 -2.55189108e-01  2.54000047e-02
  2.57999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  3.30457556e-01 -3.26640595e-01  6.00000000e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.22789108e-01 -1.71999953e-02
  2.39998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.18067885e-01 -4.36092621e-01  6.00000000e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.89891077e-02  1.38000047e-02
  1.55999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.13215135e-01  3.00794900e-01]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.39310892e-01 -5.61999953e-02
 -4.10001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.81283783e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.02089108e-01  1.38000047e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.

[ 0.00000000e+00  7.95731469e+08 -2.86689108e-01  3.36000047e-02
 -8.80001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.83419307e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 247 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.29489108e-01  1.24000047e-02
  1.97999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  3.73149828e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 248 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  5.92108923e-02 -1.15999953e-02
 -2.20001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.12672481e-02  1.26197128e-01 -4.51488887e-01]
Read chunk # 249 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.47108923e-02  5.20000047e-02
  1.82999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  3.92298179e-01  4.75144083e-03 -9.25660004e-02]
Read chunk # 250 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.79989108e-01  2.20000469e-03
 -1.09000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -1.569924

[ 0.00000000e+00  7.95731469e+08 -3.04389108e-01  5.24000047e-02
 -2.83000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 293 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.79889108e-01 -2.13999953e-02
 -8.00001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -1.48109576e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 294 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.24108923e-02 -5.33999953e-02
  6.69998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.81836613e-01  3.10953755e-01 -6.00000000e-01]
Read chunk # 295 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.41289108e-01  3.60000047e-02
 -1.40001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -8.27789575e-02 -6.00000000e-01  6.00000000e-01]
Read chunk # 296 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.38689108e-01 -5.93999953e-02
 -1.91000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.347818

In [15]:
print("The behavior policy value statistics are:")
dataset.compute_stats()['return']

The behavior policy value statistics are:


{'mean': -116.6173,
 'std': 91.33919,
 'min': -391.5658,
 'max': 0.0,
 'histogram': (array([ 3,  2,  2,  1,  0,  2,  0,  3,  1,  2,  3,  3,  3,  5,  7, 16, 22,
         21,  3,  1]),
  array([-391.5658  , -371.98752 , -352.4092  , -332.83093 , -313.25262 ,
         -293.67435 , -274.09607 , -254.51776 , -234.93948 , -215.36119 ,
         -195.7829  , -176.2046  , -156.62631 , -137.04803 , -117.46974 ,
          -97.89145 ,  -78.313156,  -58.73487 ,  -39.156578,  -19.578289,
            0.      ], dtype=float32))}

In [16]:
from sklearn.model_selection import train_test_split
train_episodes, test_episodes = train_test_split(dataset, test_size=0.2)

## Setting up an Algorithm

In [17]:
from d3rlpy.algos import CQL
from d3rlpy.models.encoders import VectorEncoderFactory

from d3rlpy.preprocessing import MinMaxActionScaler
import random
action_scaler = MinMaxActionScaler(minimum=-0.6, maximum=0.6)

from d3rlpy.metrics.scorer import td_error_scorer
from d3rlpy.metrics.scorer import average_value_estimation_scorer
from d3rlpy.metrics.scorer import initial_state_value_estimation_scorer

from d3rlpy.ope import FQE
# metrics to evaluate with
from d3rlpy.metrics.scorer import soft_opc_scorer
import pickle as pkl
from statistics import harmonic_mean as hm

## Perform a random search on hyper params. 

As good fqe on both stochastic and deterministic data is needed for good performance during the evaluation step in the simulator, we take the harmonic mean of both fqes to understand the performance on each dataset. The harmonic mean is large only when each number whose mean is being taken is large. Therefore, a large harmonic mean corresponds to large numbers whose mean is being taken.


In [18]:

# The number of times we would wanna do a random search for hyper-params. In 
# every random search, we randomly pick a new set of hyper-params.
num_search_iterations = 40
largest_fqe = -np.inf

for i in range(num_search_iterations):

    random.seed()

    actor_lr_this_iter = random.uniform(1e-5, 1e-2)
    critic_lr_this_iter = random.uniform(1e-5, 1e-2)
    temp_lr_this_iter = random.uniform(1e-5, 1e-4)
    n_steps_this_iter = random.choice([1, 3, 5, 7])

    actor_encoder = VectorEncoderFactory(hidden_units=[12, 24, 36, 24, 12],
                                          activation='relu', use_batch_norm=True, dropout_rate=0.2)
    critic_encoder = VectorEncoderFactory(hidden_units=[12, 24, 24, 12],
                                          activation='relu', use_batch_norm=True, dropout_rate=0.2)

    print("search iteration: ", i)
    print("using hyper params: ", [actor_lr_this_iter, critic_lr_this_iter, 
                                   temp_lr_this_iter, n_steps_this_iter])

    model = CQL(q_func_factory='qr', #qr -> quantile regression q function, but you don't have to use this
                reward_scaler='standard',
#                 actor_encoder_factory = actor_encoder,
#                 critic_encoder_factory = critic_encoder,
                action_scaler=action_scaler,
                actor_learning_rate=actor_lr_this_iter, 
                critic_learning_rate=critic_lr_this_iter,
                temp_learning_rate=temp_lr_this_iter,
                n_steps=n_steps_this_iter, 
                use_gpu=True) #change it to true if you have one
    model.build_with_dataset(dataset)

    model.fit(train_episodes,
        eval_episodes=test_episodes,
        n_epochs=50, 
        tensorboard_dir='runs',
        scorers={
            'td_error': td_error_scorer,
            'init_value': initial_state_value_estimation_scorer,
            'ave_value': average_value_estimation_scorer
        })
    
    ope_dataset, states_ope, actions_ope, rewards_ope = get_dataset([i+1000 for i  in range(20)], 
                                                                    path="../collected_data/rl_stochpid.txt") #change if you'd prefer different chunks
    ope_train_episodes, ope_test_episodes = train_test_split(ope_dataset, test_size=0.2)

    fqe = FQE(algo=model, action_scaler = action_scaler, use_gpu=True) #change this if you have one!
    history_stoch = fqe.fit(ope_train_episodes,
        eval_episodes=ope_test_episodes,
        tensorboard_dir='runs',
        n_epochs=50, n_steps_per_epoch=1000, #change if overfitting/underfitting
        scorers={
           'init_value': initial_state_value_estimation_scorer,
            'ave_value': average_value_estimation_scorer,
           'soft_opc': soft_opc_scorer(return_threshold=0)
        })
    
    ope_dataset, states_ope, actions_ope, rewards_ope = get_dataset([i for i in range(40)], 
                                                                    path="../collected_data/rl_purestochastic.txt") #change if you'd prefer different chunks
    ope_train_episodes, ope_test_episodes = train_test_split(ope_dataset, test_size=0.2)

    fqe = FQE(algo=model, action_scaler = action_scaler, use_gpu=True) #change this if you have one!
    history_det = fqe.fit(ope_train_episodes,
        eval_episodes=ope_test_episodes,
        tensorboard_dir='runs',
        n_epochs=50, n_steps_per_epoch=1000, #change if overfitting/underfitting
        scorers={
           'init_value': initial_state_value_estimation_scorer,
            'ave_value': average_value_estimation_scorer,
           'soft_opc': soft_opc_scorer(return_threshold=0)
        })
    
    if(hm([history_stoch[-1][1]["ave_value"] + 50, 
           history_det[-1][1]["ave_value"] + 50]) \
                                                          > largest_fqe):
        largest_fqe = hm([history_stoch[-1][1]["ave_value"] + 50, 
                         history_det[-1][1]["ave_value"] + 50])

        # Save the hyper-params
        hyperparams = [actor_lr_this_iter, critic_lr_this_iter, 
                       temp_lr_this_iter, n_steps_this_iter]

        with open("hyperparams_cql.pkl", "wb") as f:
            print("most optimal hyper params for cql at this point: ", hyperparams)
            pkl.dump(hyperparams, f)

        # Save model and policy
        model.save_model("model_hyperparams_cql.pt")
        model.save_policy("policy_hyperparams_cql.pt")


search iteration:  0
using hyper params:  [0.0006716406409940611, 0.002206817187940385, 3.919949661777032e-05, 3]
2022-04-20 15:25.34 [debug    ] RoundIterator is selected.
2022-04-20 15:25.34 [info     ] Directory is created at d3rlpy_logs/CQL_20220420152534
2022-04-20 15:25.34 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 15:25.34 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 15:25.34 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220420152534/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0006716406409940611, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'alpha_learning_rate': 0.0001, 'alpha_optim_factory': {'optim_cls': 'Ada

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:25.38 [info     ] CQL_20220420152534: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.00022835341113352635, 'time_algorithm_update': 0.012206148682979116, 'temp_loss': 4.719915053997821, 'temp': 0.99341635048738, 'alpha_loss': -16.46600731911018, 'alpha': 1.0170674822483843, 'critic_loss': 35.16187782176057, 'actor_loss': 0.8886321296830993, 'time_step': 0.012485552949515002, 'td_error': 2.3450674034916914, 'init_value': -3.6558213233947754, 'ave_value': -2.34637335012927} step=342
2022-04-20 15:25.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152534/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:25.43 [info     ] CQL_20220420152534: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.00025492802000882334, 'time_algorithm_update': 0.01094192649885925, 'temp_loss': 4.491180210782771, 'temp': 0.9805451138326299, 'alpha_loss': -11.177298913922225, 'alpha': 1.047189322131419, 'critic_loss': 21.19385628393519, 'actor_loss': 3.37152078590895, 'time_step': 0.011250973444933083, 'td_error': 3.958677795854501, 'init_value': -9.645764350891113, 'ave_value': -5.0735166328564825} step=684
2022-04-20 15:25.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152534/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:25.47 [info     ] CQL_20220420152534: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.0002454212534497356, 'time_algorithm_update': 0.011398067948413871, 'temp_loss': 4.02732241014291, 'temp': 0.9682711418609173, 'alpha_loss': -7.798467026816474, 'alpha': 1.0719105768622013, 'critic_loss': 22.24011992850499, 'actor_loss': 6.860156124098259, 'time_step': 0.011702579364441988, 'td_error': 4.235382918407772, 'init_value': -14.881948471069336, 'ave_value': -7.7707334369602235} step=1026
2022-04-20 15:25.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152534/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:25.52 [info     ] CQL_20220420152534: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.0002723981065359729, 'time_algorithm_update': 0.013050103745265313, 'temp_loss': 3.5936402932942264, 'temp': 0.9569177047202462, 'alpha_loss': -5.7518017410534865, 'alpha': 1.0938911640156082, 'critic_loss': 27.572729852464462, 'actor_loss': 10.206290051253916, 'time_step': 0.013386195863199514, 'td_error': 5.344408738900762, 'init_value': -20.17633628845215, 'ave_value': -10.425250122486739} step=1368
2022-04-20 15:25.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152534/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:25.59 [info     ] CQL_20220420152534: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.0002581229683948539, 'time_algorithm_update': 0.020195283387836657, 'temp_loss': 3.252244761115626, 'temp': 0.9461629690482602, 'alpha_loss': -4.009478575304935, 'alpha': 1.1131148944821274, 'critic_loss': 34.83447157988074, 'actor_loss': 13.50674233241388, 'time_step': 0.0205120357156497, 'td_error': 7.064723337120905, 'init_value': -25.064138412475586, 'ave_value': -12.807765586103674} step=1710
2022-04-20 15:25.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152534/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:26.06 [info     ] CQL_20220420152534: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.0002639384297599569, 'time_algorithm_update': 0.019641347098768802, 'temp_loss': 2.9382096103757447, 'temp': 0.9358477008621595, 'alpha_loss': -2.29910995422356, 'alpha': 1.1282290032035427, 'critic_loss': 44.5739051082678, 'actor_loss': 16.76340614006533, 'time_step': 0.019962949362414623, 'td_error': 8.663104055184613, 'init_value': -29.898900985717773, 'ave_value': -15.434326020458395} step=2052
2022-04-20 15:26.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152534/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:26.13 [info     ] CQL_20220420152534: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.0002804883042274163, 'time_algorithm_update': 0.019648137845491107, 'temp_loss': 2.663342130811591, 'temp': 0.9259418810320179, 'alpha_loss': -0.6681488585366993, 'alpha': 1.1365267655305695, 'critic_loss': 56.47402820252535, 'actor_loss': 19.91304051527503, 'time_step': 0.01998743536876656, 'td_error': 10.917354571101864, 'init_value': -35.05663299560547, 'ave_value': -17.53155131658925} step=2394
2022-04-20 15:26.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152534/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:26.20 [info     ] CQL_20220420152534: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.00028024849138761824, 'time_algorithm_update': 0.01981164628302145, 'temp_loss': 2.398551197079887, 'temp': 0.9163824971656354, 'alpha_loss': 0.7956117443108593, 'alpha': 1.136082869175582, 'critic_loss': 69.87127030802051, 'actor_loss': 22.991669428975957, 'time_step': 0.020156124181914748, 'td_error': 13.524874444578305, 'init_value': -39.67713165283203, 'ave_value': -19.860270256514216} step=2736
2022-04-20 15:26.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152534/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:26.28 [info     ] CQL_20220420152534: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.0002774048966971057, 'time_algorithm_update': 0.020089269381517557, 'temp_loss': 2.169514064203229, 'temp': 0.9071817007678294, 'alpha_loss': 2.085253638580267, 'alpha': 1.1239562093863014, 'critic_loss': 83.19497283578616, 'actor_loss': 25.8721430984854, 'time_step': 0.02043065690157706, 'td_error': 16.11082288551461, 'init_value': -45.57305908203125, 'ave_value': -23.154276122766305} step=3078
2022-04-20 15:26.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152534/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:26.35 [info     ] CQL_20220420152534: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.0002596294670774226, 'time_algorithm_update': 0.019885519094634475, 'temp_loss': 1.9769406283808033, 'temp': 0.8982097358382933, 'alpha_loss': 3.155766461676324, 'alpha': 1.099697188675752, 'critic_loss': 97.48843243247585, 'actor_loss': 28.701012237727294, 'time_step': 0.020202327192875378, 'td_error': 18.337887488065128, 'init_value': -49.909019470214844, 'ave_value': -25.179999156834576} step=3420
2022-04-20 15:26.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152534/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:26.42 [info     ] CQL_20220420152534: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.00025189271447254204, 'time_algorithm_update': 0.019245840652644286, 'temp_loss': 1.8032231731721533, 'temp': 0.8894269783594455, 'alpha_loss': 4.078283559857753, 'alpha': 1.0659689861431456, 'critic_loss': 112.7532066880611, 'actor_loss': 31.447240695618746, 'time_step': 0.019555447394387762, 'td_error': 20.638474400599705, 'init_value': -53.547889709472656, 'ave_value': -26.764039897733443} step=3762
2022-04-20 15:26.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152534/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:26.49 [info     ] CQL_20220420152534: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.0002585175441719635, 'time_algorithm_update': 0.01982579593769988, 'temp_loss': 1.6453747613388194, 'temp': 0.8807961370861321, 'alpha_loss': 4.825385728426147, 'alpha': 1.0267228086789448, 'critic_loss': 128.69480811225043, 'actor_loss': 34.15708873704163, 'time_step': 0.020139695608127884, 'td_error': 24.157471200982005, 'init_value': -58.859092712402344, 'ave_value': -29.957725555147675} step=4104
2022-04-20 15:26.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152534/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:26.56 [info     ] CQL_20220420152534: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.00026177105150724714, 'time_algorithm_update': 0.019641411234760844, 'temp_loss': 1.5062071084976196, 'temp': 0.8723222925300487, 'alpha_loss': 5.401596190636618, 'alpha': 0.9861496960559086, 'critic_loss': 146.3911775287829, 'actor_loss': 36.80183727420562, 'time_step': 0.019962473919517117, 'td_error': 26.58597807895393, 'init_value': -63.13325119018555, 'ave_value': -31.944088277708033} step=4446
2022-04-20 15:26.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152534/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:27.04 [info     ] CQL_20220420152534: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.00025059674915514494, 'time_algorithm_update': 0.019420701857895878, 'temp_loss': 1.3819674671741955, 'temp': 0.8639660282441747, 'alpha_loss': 5.8686803028597465, 'alpha': 0.9464151948167566, 'critic_loss': 163.66076811851815, 'actor_loss': 39.39913812157703, 'time_step': 0.019726986076399597, 'td_error': 29.37531234429393, 'init_value': -66.87977600097656, 'ave_value': -34.201611890598194} step=4788
2022-04-20 15:27.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152534/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:27.11 [info     ] CQL_20220420152534: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.00025577851903368855, 'time_algorithm_update': 0.019806691080506086, 'temp_loss': 1.253501691142021, 'temp': 0.855755885442098, 'alpha_loss': 6.243327162419146, 'alpha': 0.908824514575869, 'critic_loss': 181.5954580474318, 'actor_loss': 41.79922461370278, 'time_step': 0.020118668762563963, 'td_error': 31.99611743851958, 'init_value': -71.88814544677734, 'ave_value': -36.87443957696895} step=5130
2022-04-20 15:27.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152534/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:27.18 [info     ] CQL_20220420152534: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.0002532688497799879, 'time_algorithm_update': 0.019170084194830288, 'temp_loss': 1.1411304513961948, 'temp': 0.8477467793470238, 'alpha_loss': 6.526010582321568, 'alpha': 0.8733997128860295, 'critic_loss': 200.59668499684474, 'actor_loss': 44.26398594058745, 'time_step': 0.019478304344311095, 'td_error': 33.85408122808713, 'init_value': -75.13690948486328, 'ave_value': -38.5880790502625} step=5472
2022-04-20 15:27.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152534/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:27.25 [info     ] CQL_20220420152534: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.0002840868910850837, 'time_algorithm_update': 0.020963311195373535, 'temp_loss': 1.0212184962821982, 'temp': 0.839831475800241, 'alpha_loss': 6.767211304073445, 'alpha': 0.8403903352238281, 'critic_loss': 220.0113562422189, 'actor_loss': 46.644080368398924, 'time_step': 0.021310594346788194, 'td_error': 35.86117779606607, 'init_value': -78.60836791992188, 'ave_value': -40.550290707186} step=5814
2022-04-20 15:27.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152534/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:27.33 [info     ] CQL_20220420152534: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.00026087105622765613, 'time_algorithm_update': 0.020419047589887652, 'temp_loss': 0.8922278526407933, 'temp': 0.8323353333779943, 'alpha_loss': 7.027656924654866, 'alpha': 0.8085317177730694, 'critic_loss': 239.28866081907037, 'actor_loss': 48.941594051338775, 'time_step': 0.020738690917254887, 'td_error': 38.58693277252147, 'init_value': -82.62318420410156, 'ave_value': -42.97358021654793} step=6156
2022-04-20 15:27.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152534/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:27.40 [info     ] CQL_20220420152534: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.0002572111218993427, 'time_algorithm_update': 0.01985492692356221, 'temp_loss': 0.823220905330446, 'temp': 0.8250460814662844, 'alpha_loss': 7.0593096826508726, 'alpha': 0.7788714571305883, 'critic_loss': 259.44379264429995, 'actor_loss': 51.175087912040844, 'time_step': 0.020170163689998157, 'td_error': 41.199968004455066, 'init_value': -87.8731918334961, 'ave_value': -44.825691720223105} step=6498
2022-04-20 15:27.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152534/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:27.47 [info     ] CQL_20220420152534: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.00025561887618393925, 'time_algorithm_update': 0.0197996535496405, 'temp_loss': 0.721279628356995, 'temp': 0.8178338669894034, 'alpha_loss': 7.082081853297719, 'alpha': 0.7510082570084354, 'critic_loss': 279.36863971732515, 'actor_loss': 53.30865748444496, 'time_step': 0.020111862678973996, 'td_error': 43.42894795235181, 'init_value': -89.31739807128906, 'ave_value': -46.12863785771636} step=6840
2022-04-20 15:27.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152534/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:27.55 [info     ] CQL_20220420152534: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.00028631003976565357, 'time_algorithm_update': 0.021144475156103657, 'temp_loss': 0.633731284402093, 'temp': 0.8109776687900923, 'alpha_loss': 7.075762751506783, 'alpha': 0.7249136994108122, 'critic_loss': 299.2438313891316, 'actor_loss': 55.32671971906696, 'time_step': 0.02149863410414311, 'td_error': 45.77108846749552, 'init_value': -94.11866760253906, 'ave_value': -49.08776934645466} step=7182
2022-04-20 15:27.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152534/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:28.02 [info     ] CQL_20220420152534: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.0002695900654932212, 'time_algorithm_update': 0.020249448324504652, 'temp_loss': 0.5509103420381135, 'temp': 0.8043513594315066, 'alpha_loss': 7.021861098663152, 'alpha': 0.6999300662537067, 'critic_loss': 318.5572909528052, 'actor_loss': 57.2068935639677, 'time_step': 0.020583048898574204, 'td_error': 48.062172667688756, 'init_value': -98.0148696899414, 'ave_value': -49.903619743558735} step=7524
2022-04-20 15:28.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152534/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:28.10 [info     ] CQL_20220420152534: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.00029595902091578434, 'time_algorithm_update': 0.02122845356924492, 'temp_loss': 0.5092544704022115, 'temp': 0.7978181887788383, 'alpha_loss': 6.938228423135323, 'alpha': 0.6762741570584259, 'critic_loss': 336.6133201554505, 'actor_loss': 59.0292131524337, 'time_step': 0.02159400711282652, 'td_error': 49.384384290749836, 'init_value': -100.36763763427734, 'ave_value': -51.902336589978624} step=7866
2022-04-20 15:28.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152534/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:28.18 [info     ] CQL_20220420152534: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.00028315273641842846, 'time_algorithm_update': 0.021684537156980637, 'temp_loss': 0.40931684202487356, 'temp': 0.7917869896219488, 'alpha_loss': 6.8872575383437304, 'alpha': 0.6537488477620464, 'critic_loss': 354.06085945709407, 'actor_loss': 60.79660586306923, 'time_step': 0.02204183876863, 'td_error': 51.00997465921226, 'init_value': -102.8238754272461, 'ave_value': -52.73318519773784} step=8208
2022-04-20 15:28.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152534/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:28.26 [info     ] CQL_20220420152534: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.0002886942255566692, 'time_algorithm_update': 0.022475138045193858, 'temp_loss': 0.35125786664062425, 'temp': 0.7861147233617236, 'alpha_loss': 6.811028694548802, 'alpha': 0.632101248405133, 'critic_loss': 370.20060462282413, 'actor_loss': 62.433637897870696, 'time_step': 0.022841496077197338, 'td_error': 52.45557750751601, 'init_value': -106.1680679321289, 'ave_value': -54.14831883468338} step=8550
2022-04-20 15:28.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152534/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:28.33 [info     ] CQL_20220420152534: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.0002886649460820427, 'time_algorithm_update': 0.02008907627641109, 'temp_loss': 0.2967205644307429, 'temp': 0.7806633627205565, 'alpha_loss': 6.689262634829471, 'alpha': 0.6114308729506376, 'critic_loss': 386.32127674838955, 'actor_loss': 63.92497041490343, 'time_step': 0.020450208619324086, 'td_error': 54.19902054462093, 'init_value': -110.85734558105469, 'ave_value': -56.846837228673536} step=8892
2022-04-20 15:28.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152534/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:28.41 [info     ] CQL_20220420152534: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.0002848356090791044, 'time_algorithm_update': 0.019932100647374204, 'temp_loss': 0.2219781149832303, 'temp': 0.7760670859562723, 'alpha_loss': 6.56637736510115, 'alpha': 0.5915463535409224, 'critic_loss': 400.06304423014325, 'actor_loss': 65.33236255980374, 'time_step': 0.020290148885626542, 'td_error': 55.70774384658848, 'init_value': -111.25892639160156, 'ave_value': -56.9507646690591} step=9234
2022-04-20 15:28.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152534/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:28.48 [info     ] CQL_20220420152534: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.0002853605482313368, 'time_algorithm_update': 0.01986779246413917, 'temp_loss': 0.2107843423305809, 'temp': 0.7715779328555391, 'alpha_loss': 6.441678003958094, 'alpha': 0.5725159516111452, 'critic_loss': 413.34070734392134, 'actor_loss': 66.64361742923134, 'time_step': 0.020226470908226324, 'td_error': 57.672955733202095, 'init_value': -114.3249282836914, 'ave_value': -57.85258488363511} step=9576
2022-04-20 15:28.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152534/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:28.55 [info     ] CQL_20220420152534: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.00028367488704926784, 'time_algorithm_update': 0.019225434950220655, 'temp_loss': 0.182654033093686, 'temp': 0.767079746514036, 'alpha_loss': 6.289291137143185, 'alpha': 0.5542164028388018, 'critic_loss': 426.47168629071865, 'actor_loss': 67.94986972474216, 'time_step': 0.01958015857384219, 'td_error': 57.92381968810781, 'init_value': -116.80570983886719, 'ave_value': -58.89122875289874} step=9918
2022-04-20 15:28.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152534/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:29.02 [info     ] CQL_20220420152534: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.0002826361628303751, 'time_algorithm_update': 0.019424538863332647, 'temp_loss': 0.11443175849836995, 'temp': 0.763526717821757, 'alpha_loss': 6.159633154060408, 'alpha': 0.5366837035494241, 'critic_loss': 439.0365872745625, 'actor_loss': 69.19050942805775, 'time_step': 0.019781105699594955, 'td_error': 58.99840437082611, 'init_value': -119.70328521728516, 'ave_value': -60.6198974671584} step=10260
2022-04-20 15:29.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152534/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:29.09 [info     ] CQL_20220420152534: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.000287971998515882, 'time_algorithm_update': 0.019410859074508936, 'temp_loss': 0.13701840210161362, 'temp': 0.7599676307181866, 'alpha_loss': 6.008271155301591, 'alpha': 0.5197466575262839, 'critic_loss': 450.928200883475, 'actor_loss': 70.4113569984659, 'time_step': 0.01976994045993738, 'td_error': 60.57472739351334, 'init_value': -121.5194320678711, 'ave_value': -61.711782574739544} step=10602
2022-04-20 15:29.09 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152534/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:29.16 [info     ] CQL_20220420152534: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.0002836916181776259, 'time_algorithm_update': 0.019178511106479935, 'temp_loss': 0.0886725961769882, 'temp': 0.7568293843004439, 'alpha_loss': 5.899514507829097, 'alpha': 0.5033543815738276, 'critic_loss': 462.0624993753712, 'actor_loss': 71.51904749730875, 'time_step': 0.019539071802507368, 'td_error': 61.819591028251025, 'init_value': -122.0890884399414, 'ave_value': -61.80746057987079} step=10944
2022-04-20 15:29.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152534/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:29.23 [info     ] CQL_20220420152534: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.000286890749345746, 'time_algorithm_update': 0.019797291672020628, 'temp_loss': 0.05014606446507033, 'temp': 0.7544261314715558, 'alpha_loss': 5.795320417448791, 'alpha': 0.4875535078738865, 'critic_loss': 472.71266352781777, 'actor_loss': 72.61927419517473, 'time_step': 0.020157242378993342, 'td_error': 63.33265098751856, 'init_value': -126.7601089477539, 'ave_value': -63.87090416478144} step=11286
2022-04-20 15:29.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152534/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:29.31 [info     ] CQL_20220420152534: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.00028360656827513934, 'time_algorithm_update': 0.019664038691604345, 'temp_loss': 0.0412114144885662, 'temp': 0.7528839974026931, 'alpha_loss': 5.661028263861673, 'alpha': 0.4721589111968091, 'critic_loss': 482.8188088400322, 'actor_loss': 73.60874178256208, 'time_step': 0.020020309944598996, 'td_error': 63.637076252539025, 'init_value': -126.87890625, 'ave_value': -63.63247897836136} step=11628
2022-04-20 15:29.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152534/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:29.38 [info     ] CQL_20220420152534: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.00028015995583339045, 'time_algorithm_update': 0.01952362897103293, 'temp_loss': -0.003443354261461754, 'temp': 0.7518052017828177, 'alpha_loss': 5.52632208734925, 'alpha': 0.45740967039127794, 'critic_loss': 491.8528631556104, 'actor_loss': 74.586194545902, 'time_step': 0.0198761064406724, 'td_error': 64.70814663791492, 'init_value': -128.90701293945312, 'ave_value': -64.81876830230962} step=11970
2022-04-20 15:29.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152534/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:29.45 [info     ] CQL_20220420152534: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.00028564218889202985, 'time_algorithm_update': 0.01976556596700211, 'temp_loss': -0.0065519364757670295, 'temp': 0.7521673744184929, 'alpha_loss': 5.353440150182847, 'alpha': 0.4432087587682824, 'critic_loss': 500.805522628695, 'actor_loss': 75.368146204809, 'time_step': 0.02012445424732409, 'td_error': 65.62124280717549, 'init_value': -131.67465209960938, 'ave_value': -65.94157663171356} step=12312
2022-04-20 15:29.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152534/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:29.52 [info     ] CQL_20220420152534: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.0002826626537836086, 'time_algorithm_update': 0.019544973708035655, 'temp_loss': 0.0053954324494415555, 'temp': 0.7525576276388782, 'alpha_loss': 5.224608790107638, 'alpha': 0.42936662432045963, 'critic_loss': 510.3223740427118, 'actor_loss': 76.24666147064744, 'time_step': 0.019899269293623362, 'td_error': 66.91819987695897, 'init_value': -133.6112823486328, 'ave_value': -66.66088672857967} step=12654
2022-04-20 15:29.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152534/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:29.59 [info     ] CQL_20220420152534: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.0002806019364741811, 'time_algorithm_update': 0.019360621073092634, 'temp_loss': 0.010807895659855758, 'temp': 0.7519178338218153, 'alpha_loss': 5.103255003516437, 'alpha': 0.4161016560263104, 'critic_loss': 518.9635755750868, 'actor_loss': 77.08785163031683, 'time_step': 0.01971321217497887, 'td_error': 69.57834864335302, 'init_value': -136.82992553710938, 'ave_value': -69.2418964893297} step=12996
2022-04-20 15:29.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152534/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:30.06 [info     ] CQL_20220420152534: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.0002834490168164348, 'time_algorithm_update': 0.01934039104751676, 'temp_loss': -0.013250963909453467, 'temp': 0.7520145212697704, 'alpha_loss': 4.909886843976919, 'alpha': 0.40326410305430316, 'critic_loss': 527.9308350323237, 'actor_loss': 77.9798818220172, 'time_step': 0.019696198011699476, 'td_error': 68.14325864592091, 'init_value': -137.49180603027344, 'ave_value': -68.75014729065401} step=13338
2022-04-20 15:30.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152534/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:30.13 [info     ] CQL_20220420152534: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.00028769802628901965, 'time_algorithm_update': 0.019560703757213572, 'temp_loss': -0.02371445398952006, 'temp': 0.7530257998154177, 'alpha_loss': 4.801849442615843, 'alpha': 0.39092232435070284, 'critic_loss': 536.5670901292946, 'actor_loss': 78.6530945649621, 'time_step': 0.019921940669678805, 'td_error': 69.46491289073762, 'init_value': -138.33029174804688, 'ave_value': -68.23161275968359} step=13680
2022-04-20 15:30.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152534/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:30.20 [info     ] CQL_20220420152534: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.00028400114405224895, 'time_algorithm_update': 0.019429172688757466, 'temp_loss': 0.005999256165055504, 'temp': 0.7529702756488532, 'alpha_loss': 4.626940399582623, 'alpha': 0.378984600828405, 'critic_loss': 544.0764945403874, 'actor_loss': 79.41016082317509, 'time_step': 0.019786845870882447, 'td_error': 69.17992068713886, 'init_value': -137.12750244140625, 'ave_value': -68.75853725790037} step=14022
2022-04-20 15:30.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152534/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:30.27 [info     ] CQL_20220420152534: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.00028025337130005597, 'time_algorithm_update': 0.019378061880145157, 'temp_loss': 0.0046787631768755045, 'temp': 0.7530783285871584, 'alpha_loss': 4.492697169906215, 'alpha': 0.3674038822016521, 'critic_loss': 553.0680643717448, 'actor_loss': 80.16364980998792, 'time_step': 0.019730724786457262, 'td_error': 70.11511047094064, 'init_value': -140.2802734375, 'ave_value': -69.77706687895967} step=14364
2022-04-20 15:30.27 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152534/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:30.35 [info     ] CQL_20220420152534: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.00028313042824728446, 'time_algorithm_update': 0.01953984004015114, 'temp_loss': 0.0048943951137756046, 'temp': 0.7527817102552158, 'alpha_loss': 4.3457813562705505, 'alpha': 0.35630758498844345, 'critic_loss': 560.1423980534425, 'actor_loss': 80.77936179177803, 'time_step': 0.019895881240130864, 'td_error': 71.08050151639996, 'init_value': -141.96414184570312, 'ave_value': -70.78115979569452} step=14706
2022-04-20 15:30.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152534/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:30.42 [info     ] CQL_20220420152534: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.0002821265605458042, 'time_algorithm_update': 0.01947974810126232, 'temp_loss': -0.007082464083571706, 'temp': 0.7526899276421084, 'alpha_loss': 4.2121977227473115, 'alpha': 0.34547767461391915, 'critic_loss': 567.2013354385108, 'actor_loss': 81.3400650024414, 'time_step': 0.019833603100469936, 'td_error': 70.30761494579762, 'init_value': -142.7819061279297, 'ave_value': -70.92153495067143} step=15048
2022-04-20 15:30.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152534/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:30.49 [info     ] CQL_20220420152534: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.00028684543587310967, 'time_algorithm_update': 0.021116483978360717, 'temp_loss': -0.0009811049305468971, 'temp': 0.7528041578524294, 'alpha_loss': 4.068895853750887, 'alpha': 0.33501200996644315, 'critic_loss': 572.603374458893, 'actor_loss': 81.86850760833562, 'time_step': 0.021477596104493617, 'td_error': 70.95304225350986, 'init_value': -143.7108612060547, 'ave_value': -72.23954239739491} step=15390
2022-04-20 15:30.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152534/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:30.57 [info     ] CQL_20220420152534: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.00028192160422341867, 'time_algorithm_update': 0.021565946919179103, 'temp_loss': 0.014447115448831815, 'temp': 0.7524289201226151, 'alpha_loss': 3.946840632728666, 'alpha': 0.32491836587936557, 'critic_loss': 580.1801311649077, 'actor_loss': 82.58914432191013, 'time_step': 0.021920566670378748, 'td_error': 71.7835027571008, 'init_value': -143.51260375976562, 'ave_value': -71.76526451808107} step=15732
2022-04-20 15:30.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152534/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:31.05 [info     ] CQL_20220420152534: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.00028424862532587774, 'time_algorithm_update': 0.021517576529965762, 'temp_loss': 0.006040708548579997, 'temp': 0.7522313636645936, 'alpha_loss': 3.812930495418303, 'alpha': 0.3150922081798141, 'critic_loss': 585.3902412102236, 'actor_loss': 82.94746178074887, 'time_step': 0.021873771795752454, 'td_error': 72.53752058540134, 'init_value': -144.0077667236328, 'ave_value': -71.68128294449669} step=16074
2022-04-20 15:31.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152534/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:31.13 [info     ] CQL_20220420152534: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.0002898019656800387, 'time_algorithm_update': 0.021519393251653304, 'temp_loss': -0.004266071394739444, 'temp': 0.7518179881991002, 'alpha_loss': 3.702192144784314, 'alpha': 0.3056065793100156, 'critic_loss': 590.8498179117838, 'actor_loss': 83.48767734549897, 'time_step': 0.021882775931330454, 'td_error': 72.85370512672388, 'init_value': -146.8727264404297, 'ave_value': -73.92397977151043} step=16416
2022-04-20 15:31.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152534/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:31.21 [info     ] CQL_20220420152534: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.0002802763766015482, 'time_algorithm_update': 0.021425261134989777, 'temp_loss': 0.021004231600363178, 'temp': 0.7519682542971003, 'alpha_loss': 3.5563970044342397, 'alpha': 0.2964205565856911, 'critic_loss': 595.4389803702371, 'actor_loss': 83.77931487769412, 'time_step': 0.02177739561649791, 'td_error': 72.3198782617446, 'init_value': -146.5990753173828, 'ave_value': -73.54109850035057} step=16758
2022-04-20 15:31.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152534/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:31.28 [info     ] CQL_20220420152534: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.0002811652177955672, 'time_algorithm_update': 0.021383887145951477, 'temp_loss': 0.030518451095585934, 'temp': 0.7504687363292739, 'alpha_loss': 3.4370003701650607, 'alpha': 0.28753623889203656, 'critic_loss': 599.6273708231965, 'actor_loss': 84.28656251249258, 'time_step': 0.02173890704997102, 'td_error': 72.85884041231901, 'init_value': -147.4295196533203, 'ave_value': -74.1035629147412} step=17100
2022-04-20 15:31.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152534/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.519

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 15:31.29 [info     ] FQE_20220420153128: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00011589728206037039, 'time_algorithm_update': 0.0029746523822646543, 'loss': 0.00845171163329877, 'time_step': 0.00314443800822798, 'init_value': -0.07192904502153397, 'ave_value': 0.004621253848528943, 'soft_opc': nan} step=166




2022-04-20 15:31.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153128/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:31.30 [info     ] FQE_20220420153128: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00011841647596244353, 'time_algorithm_update': 0.0036169563431337655, 'loss': 0.006597153159385794, 'time_step': 0.0037927038698311313, 'init_value': -0.23399779200553894, 'ave_value': -0.09804365296747435, 'soft_opc': nan} step=332




2022-04-20 15:31.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153128/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:31.30 [info     ] FQE_20220420153128: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00011720140296292592, 'time_algorithm_update': 0.0035128090755049005, 'loss': 0.006101554850150034, 'time_step': 0.0036849042019212104, 'init_value': -0.31350892782211304, 'ave_value': -0.12984169653264455, 'soft_opc': nan} step=498




2022-04-20 15:31.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153128/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:31.31 [info     ] FQE_20220420153128: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00012238054390413216, 'time_algorithm_update': 0.003427995256630771, 'loss': 0.006076704522774342, 'time_step': 0.003605607044265931, 'init_value': -0.4292280673980713, 'ave_value': -0.20014343050912628, 'soft_opc': nan} step=664




2022-04-20 15:31.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153128/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:31.32 [info     ] FQE_20220420153128: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.0001195065946464079, 'time_algorithm_update': 0.003277252955609057, 'loss': 0.005778722780622004, 'time_step': 0.0034528827092733727, 'init_value': -0.5110035538673401, 'ave_value': -0.23051844910542305, 'soft_opc': nan} step=830




2022-04-20 15:31.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153128/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:31.32 [info     ] FQE_20220420153128: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00012260890868772943, 'time_algorithm_update': 0.003637216177331396, 'loss': 0.005548807063016547, 'time_step': 0.0038146642317254858, 'init_value': -0.5561954379081726, 'ave_value': -0.2433496821461967, 'soft_opc': nan} step=996




2022-04-20 15:31.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153128/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:31.33 [info     ] FQE_20220420153128: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.0001216207642153085, 'time_algorithm_update': 0.0035956839481032037, 'loss': 0.005306127408227648, 'time_step': 0.003773679216224027, 'init_value': -0.6119812726974487, 'ave_value': -0.25787602362164236, 'soft_opc': nan} step=1162




2022-04-20 15:31.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153128/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:31.34 [info     ] FQE_20220420153128: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.0001231015446674393, 'time_algorithm_update': 0.0030061911387615895, 'loss': 0.0049292615022936675, 'time_step': 0.003187073282448642, 'init_value': -0.6891440153121948, 'ave_value': -0.3275367494571853, 'soft_opc': nan} step=1328




2022-04-20 15:31.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153128/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:31.34 [info     ] FQE_20220420153128: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00012066852615540286, 'time_algorithm_update': 0.003538832607039486, 'loss': 0.004761647468107383, 'time_step': 0.003713665238345962, 'init_value': -0.7066217660903931, 'ave_value': -0.3211217395734747, 'soft_opc': nan} step=1494




2022-04-20 15:31.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153128/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:31.35 [info     ] FQE_20220420153128: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00012186061905091067, 'time_algorithm_update': 0.0035989887743111118, 'loss': 0.004567677560486797, 'time_step': 0.003776794456573854, 'init_value': -0.7858233451843262, 'ave_value': -0.3628132249732074, 'soft_opc': nan} step=1660




2022-04-20 15:31.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153128/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:31.36 [info     ] FQE_20220420153128: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.000125107994998794, 'time_algorithm_update': 0.0035858355372785085, 'loss': 0.004351619831622994, 'time_step': 0.003770054104816483, 'init_value': -0.8887953758239746, 'ave_value': -0.4320655565381654, 'soft_opc': nan} step=1826




2022-04-20 15:31.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153128/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:31.36 [info     ] FQE_20220420153128: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00011861324310302734, 'time_algorithm_update': 0.003152942082968103, 'loss': 0.00417293633785974, 'time_step': 0.0033260224813438325, 'init_value': -0.8850659728050232, 'ave_value': -0.40065198979667715, 'soft_opc': nan} step=1992




2022-04-20 15:31.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153128/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:31.37 [info     ] FQE_20220420153128: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00012283870972782732, 'time_algorithm_update': 0.0036920265979077443, 'loss': 0.004327378341653216, 'time_step': 0.00386808004723974, 'init_value': -0.9849611520767212, 'ave_value': -0.4689935069380177, 'soft_opc': nan} step=2158




2022-04-20 15:31.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153128/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:31.38 [info     ] FQE_20220420153128: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00012121286736913475, 'time_algorithm_update': 0.0035902606435568936, 'loss': 0.004252907153813684, 'time_step': 0.003774736301008477, 'init_value': -1.090500831604004, 'ave_value': -0.550412886171929, 'soft_opc': nan} step=2324




2022-04-20 15:31.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153128/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:31.38 [info     ] FQE_20220420153128: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.0001203582947512707, 'time_algorithm_update': 0.003095850887068783, 'loss': 0.004252119887485562, 'time_step': 0.0032716199576136576, 'init_value': -1.1054718494415283, 'ave_value': -0.5459933227179824, 'soft_opc': nan} step=2490




2022-04-20 15:31.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153128/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:31.39 [info     ] FQE_20220420153128: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00012297371783888484, 'time_algorithm_update': 0.00358184848923281, 'loss': 0.004414987087625651, 'time_step': 0.0037586487919451243, 'init_value': -1.1907987594604492, 'ave_value': -0.5895252176098995, 'soft_opc': nan} step=2656




2022-04-20 15:31.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153128/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:31.40 [info     ] FQE_20220420153128: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.0001245133848075407, 'time_algorithm_update': 0.003603553197470056, 'loss': 0.004642523344780083, 'time_step': 0.0037838967449693793, 'init_value': -1.2872751951217651, 'ave_value': -0.6366357563516578, 'soft_opc': nan} step=2822




2022-04-20 15:31.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153128/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:31.40 [info     ] FQE_20220420153128: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00011865345828504448, 'time_algorithm_update': 0.003473428358514625, 'loss': 0.004596973940483238, 'time_step': 0.003645404275641384, 'init_value': -1.3011163473129272, 'ave_value': -0.6379465135972242, 'soft_opc': nan} step=2988




2022-04-20 15:31.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153128/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:31.41 [info     ] FQE_20220420153128: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00011832168303340315, 'time_algorithm_update': 0.003186119608132236, 'loss': 0.0050838484992665875, 'time_step': 0.0033567569342004247, 'init_value': -1.3892521858215332, 'ave_value': -0.6605008792199262, 'soft_opc': nan} step=3154




2022-04-20 15:31.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153128/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:31.42 [info     ] FQE_20220420153128: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00012092992483851421, 'time_algorithm_update': 0.0035280132868203773, 'loss': 0.005437569377848497, 'time_step': 0.0037038469889077797, 'init_value': -1.568964958190918, 'ave_value': -0.786211410248736, 'soft_opc': nan} step=3320




2022-04-20 15:31.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153128/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:31.42 [info     ] FQE_20220420153128: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00011913173169974822, 'time_algorithm_update': 0.0034584783645997563, 'loss': 0.0059132763038737225, 'time_step': 0.003631035965609263, 'init_value': -1.6227502822875977, 'ave_value': -0.8108078376431992, 'soft_opc': nan} step=3486




2022-04-20 15:31.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153128/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:31.43 [info     ] FQE_20220420153128: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00011710517377738494, 'time_algorithm_update': 0.003147294722407697, 'loss': 0.006686711615843542, 'time_step': 0.003317983753709908, 'init_value': -1.7481887340545654, 'ave_value': -0.8979476541884848, 'soft_opc': nan} step=3652




2022-04-20 15:31.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153128/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:31.44 [info     ] FQE_20220420153128: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00012070156005491693, 'time_algorithm_update': 0.003632446369492864, 'loss': 0.007043845990128501, 'time_step': 0.003809882933834949, 'init_value': -1.800029993057251, 'ave_value': -0.9313241167849785, 'soft_opc': nan} step=3818




2022-04-20 15:31.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153128/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:31.44 [info     ] FQE_20220420153128: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00012163799929331585, 'time_algorithm_update': 0.0035030267324792333, 'loss': 0.0072245732544506175, 'time_step': 0.0036783835974084325, 'init_value': -1.9062471389770508, 'ave_value': -0.9829102724253594, 'soft_opc': nan} step=3984




2022-04-20 15:31.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153128/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:31.45 [info     ] FQE_20220420153128: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00012483510626367777, 'time_algorithm_update': 0.0036003402916781873, 'loss': 0.007960097391595951, 'time_step': 0.0037816806011889354, 'init_value': -2.010838031768799, 'ave_value': -1.0862888688238355, 'soft_opc': nan} step=4150




2022-04-20 15:31.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153128/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:31.46 [info     ] FQE_20220420153128: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00011800714285976915, 'time_algorithm_update': 0.002973662801535733, 'loss': 0.008503821984926486, 'time_step': 0.0031441047967198385, 'init_value': -2.1577115058898926, 'ave_value': -1.1700980978707471, 'soft_opc': nan} step=4316




2022-04-20 15:31.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153128/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:31.46 [info     ] FQE_20220420153128: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00012546993163694818, 'time_algorithm_update': 0.0036641905106693864, 'loss': 0.009107592437102133, 'time_step': 0.0038465491260390684, 'init_value': -2.18389892578125, 'ave_value': -1.1805636566971336, 'soft_opc': nan} step=4482




2022-04-20 15:31.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153128/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:31.47 [info     ] FQE_20220420153128: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00012308287333293133, 'time_algorithm_update': 0.0035949830549309053, 'loss': 0.009941097814589739, 'time_step': 0.0037739549774721444, 'init_value': -2.382781744003296, 'ave_value': -1.345622246047935, 'soft_opc': nan} step=4648




2022-04-20 15:31.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153128/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:31.48 [info     ] FQE_20220420153128: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00012201860726597798, 'time_algorithm_update': 0.003266676362738552, 'loss': 0.010351134642975473, 'time_step': 0.003444200538727174, 'init_value': -2.473565101623535, 'ave_value': -1.3985678747027843, 'soft_opc': nan} step=4814




2022-04-20 15:31.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153128/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:31.48 [info     ] FQE_20220420153128: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00012097301253353257, 'time_algorithm_update': 0.003585578447364899, 'loss': 0.011082830890755069, 'time_step': 0.0037625898797828032, 'init_value': -2.497828960418701, 'ave_value': -1.40893223730025, 'soft_opc': nan} step=4980




2022-04-20 15:31.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153128/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:31.49 [info     ] FQE_20220420153128: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00012094859617302217, 'time_algorithm_update': 0.0035700596958757884, 'loss': 0.011613076859215522, 'time_step': 0.003748352269092238, 'init_value': -2.590625286102295, 'ave_value': -1.490395445128282, 'soft_opc': nan} step=5146




2022-04-20 15:31.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153128/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:31.50 [info     ] FQE_20220420153128: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00012076331908444324, 'time_algorithm_update': 0.0035159760210887493, 'loss': 0.012541716808245618, 'time_step': 0.0036934915795383684, 'init_value': -2.755803346633911, 'ave_value': -1.6090662929120365, 'soft_opc': nan} step=5312




2022-04-20 15:31.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153128/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:31.50 [info     ] FQE_20220420153128: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00011345133723982845, 'time_algorithm_update': 0.002927276025335473, 'loss': 0.012563945207301617, 'time_step': 0.0030941647219370648, 'init_value': -2.7282943725585938, 'ave_value': -1.5792112354625454, 'soft_opc': nan} step=5478




2022-04-20 15:31.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153128/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:31.51 [info     ] FQE_20220420153128: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00012455216373305722, 'time_algorithm_update': 0.0036919705839042203, 'loss': 0.01360878826214382, 'time_step': 0.0038696786007249213, 'init_value': -2.811730146408081, 'ave_value': -1.6299835316232734, 'soft_opc': nan} step=5644




2022-04-20 15:31.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153128/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:31.52 [info     ] FQE_20220420153128: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00013512732034706208, 'time_algorithm_update': 0.0037290777068540275, 'loss': 0.014158270690668688, 'time_step': 0.003923562635858375, 'init_value': -2.840512990951538, 'ave_value': -1.6217671196874197, 'soft_opc': nan} step=5810




2022-04-20 15:31.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153128/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:31.52 [info     ] FQE_20220420153128: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00012270082910376858, 'time_algorithm_update': 0.003375063459557223, 'loss': 0.015161432761345508, 'time_step': 0.0035518364733960256, 'init_value': -2.9192094802856445, 'ave_value': -1.6868852028438637, 'soft_opc': nan} step=5976




2022-04-20 15:31.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153128/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:31.53 [info     ] FQE_20220420153128: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00011767680386462843, 'time_algorithm_update': 0.0033793435039290464, 'loss': 0.01537684051962345, 'time_step': 0.0035503959081259117, 'init_value': -3.066431999206543, 'ave_value': -1.8059622580537924, 'soft_opc': nan} step=6142




2022-04-20 15:31.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153128/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:31.54 [info     ] FQE_20220420153128: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00012006817093814712, 'time_algorithm_update': 0.0035412842968860305, 'loss': 0.016308592188491833, 'time_step': 0.003716435777135642, 'init_value': -3.1150565147399902, 'ave_value': -1.8114629399937552, 'soft_opc': nan} step=6308




2022-04-20 15:31.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153128/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:31.54 [info     ] FQE_20220420153128: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00012002508324312877, 'time_algorithm_update': 0.0036039510405207254, 'loss': 0.0171799433041723, 'time_step': 0.003780310412487352, 'init_value': -3.2713871002197266, 'ave_value': -1.9404106292101715, 'soft_opc': nan} step=6474




2022-04-20 15:31.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153128/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:31.55 [info     ] FQE_20220420153128: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00012091987104300994, 'time_algorithm_update': 0.0030435165726994894, 'loss': 0.01756045191285077, 'time_step': 0.003222649355968797, 'init_value': -3.3242475986480713, 'ave_value': -1.9475563090663772, 'soft_opc': nan} step=6640




2022-04-20 15:31.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153128/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:31.56 [info     ] FQE_20220420153128: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.0001258017068885895, 'time_algorithm_update': 0.0036729732191706278, 'loss': 0.01859892971031031, 'time_step': 0.003855031656931682, 'init_value': -3.4243407249450684, 'ave_value': -2.040349330813498, 'soft_opc': nan} step=6806




2022-04-20 15:31.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153128/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:31.56 [info     ] FQE_20220420153128: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00011821683630885848, 'time_algorithm_update': 0.0035169828368956783, 'loss': 0.019178298941577774, 'time_step': 0.0036907081144401826, 'init_value': -3.4901363849639893, 'ave_value': -2.039412506528803, 'soft_opc': nan} step=6972




2022-04-20 15:31.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153128/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:31.57 [info     ] FQE_20220420153128: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00012255002217120435, 'time_algorithm_update': 0.003421428691909974, 'loss': 0.020049075004139758, 'time_step': 0.003597304045435894, 'init_value': -3.524160146713257, 'ave_value': -2.0874205077419408, 'soft_opc': nan} step=7138




2022-04-20 15:31.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153128/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:31.58 [info     ] FQE_20220420153128: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00012108791305358151, 'time_algorithm_update': 0.0033080290599041677, 'loss': 0.020602049236606627, 'time_step': 0.003483150378767266, 'init_value': -3.753431797027588, 'ave_value': -2.259287312170407, 'soft_opc': nan} step=7304




2022-04-20 15:31.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153128/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:31.58 [info     ] FQE_20220420153128: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00012264481510024472, 'time_algorithm_update': 0.0035547894167612835, 'loss': 0.021693986426079535, 'time_step': 0.0037374956062041133, 'init_value': -3.757322311401367, 'ave_value': -2.237252050920113, 'soft_opc': nan} step=7470




2022-04-20 15:31.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153128/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:31.59 [info     ] FQE_20220420153128: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00012034680469926582, 'time_algorithm_update': 0.003533379141106663, 'loss': 0.021911200384205454, 'time_step': 0.0037105255816356243, 'init_value': -3.7627482414245605, 'ave_value': -2.2413744415517325, 'soft_opc': nan} step=7636




2022-04-20 15:31.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153128/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:31.59 [info     ] FQE_20220420153128: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.0001167733985257436, 'time_algorithm_update': 0.003158599497324013, 'loss': 0.022213046572939772, 'time_step': 0.0033288519066500375, 'init_value': -3.745932102203369, 'ave_value': -2.1890233096104486, 'soft_opc': nan} step=7802




2022-04-20 15:31.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153128/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:32.00 [info     ] FQE_20220420153128: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00012088252837399402, 'time_algorithm_update': 0.0036196981567934335, 'loss': 0.023022439499828875, 'time_step': 0.003798602575279144, 'init_value': -3.8664708137512207, 'ave_value': -2.280593298845463, 'soft_opc': nan} step=7968




2022-04-20 15:32.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153128/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:32.01 [info     ] FQE_20220420153128: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.0001230053154818983, 'time_algorithm_update': 0.003613530871379806, 'loss': 0.02409563067576747, 'time_step': 0.0037899433848369553, 'init_value': -3.821120023727417, 'ave_value': -2.2213301011973674, 'soft_opc': nan} step=8134




2022-04-20 15:32.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153128/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:32.02 [info     ] FQE_20220420153128: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00012184051145990211, 'time_algorithm_update': 0.003507098519658468, 'loss': 0.02490018113763128, 'time_step': 0.0036872668438647166, 'init_value': -3.9093222618103027, 'ave_value': -2.301977229373412, 'soft_opc': nan} step=8300




2022-04-20 15:32.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153128/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

2022-04-20 15:32.02 [debug    ] RoundIterator is selected.
2022-04-20 15:32.02 [info     ] Directory is created at d3rlpy_logs/FQE_20220420153202
2022-04-20 15:32.02 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 15:32.02 [debug    ] Building models...
2022-04-20 15:32.02 [debug    ] Models have been built.
2022-04-20 15:32.02 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420153202/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}},

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 15:32.03 [info     ] FQE_20220420153202: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00012543104415716126, 'time_algorithm_update': 0.003569607124772183, 'loss': 0.023309195863602812, 'time_step': 0.003754105678824491, 'init_value': -1.3307468891143799, 'ave_value': -1.3495639631071605, 'soft_opc': nan} step=344




2022-04-20 15:32.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153202/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:32.04 [info     ] FQE_20220420153202: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00012883543968200684, 'time_algorithm_update': 0.003387528103451396, 'loss': 0.02187071225963273, 'time_step': 0.003571858239728351, 'init_value': -2.182403564453125, 'ave_value': -2.2058532850833625, 'soft_opc': nan} step=688




2022-04-20 15:32.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153202/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:32.06 [info     ] FQE_20220420153202: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00012907524441563807, 'time_algorithm_update': 0.0035978916079499003, 'loss': 0.024904341558752537, 'time_step': 0.003785558218179747, 'init_value': -3.1536762714385986, 'ave_value': -3.2012950795996296, 'soft_opc': nan} step=1032




2022-04-20 15:32.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153202/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:32.07 [info     ] FQE_20220420153202: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.0001218402108480764, 'time_algorithm_update': 0.003290072430011838, 'loss': 0.027000810875189164, 'time_step': 0.0034679888292800548, 'init_value': -3.8504812717437744, 'ave_value': -3.941904924447472, 'soft_opc': nan} step=1376




2022-04-20 15:32.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153202/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:32.09 [info     ] FQE_20220420153202: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00012702442878900574, 'time_algorithm_update': 0.0035581401614255682, 'loss': 0.033088364373069515, 'time_step': 0.0037470640138138173, 'init_value': -4.8427228927612305, 'ave_value': -5.037720340946773, 'soft_opc': nan} step=1720




2022-04-20 15:32.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153202/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:32.10 [info     ] FQE_20220420153202: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00012473380842874216, 'time_algorithm_update': 0.003454879965893058, 'loss': 0.037590962821119574, 'time_step': 0.0036377456299094267, 'init_value': -5.323837757110596, 'ave_value': -5.589065331918699, 'soft_opc': nan} step=2064




2022-04-20 15:32.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153202/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:32.11 [info     ] FQE_20220420153202: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00012380647104839946, 'time_algorithm_update': 0.003297852222309556, 'loss': 0.04473372074087735, 'time_step': 0.003484830606815427, 'init_value': -6.274535655975342, 'ave_value': -6.63558360821492, 'soft_opc': nan} step=2408




2022-04-20 15:32.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153202/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:32.13 [info     ] FQE_20220420153202: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00013006149336349134, 'time_algorithm_update': 0.0036987087061238844, 'loss': 0.05452500982886874, 'time_step': 0.0038878966209500336, 'init_value': -6.7204742431640625, 'ave_value': -7.163279570330371, 'soft_opc': nan} step=2752




2022-04-20 15:32.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153202/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:32.14 [info     ] FQE_20220420153202: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00012875296348749206, 'time_algorithm_update': 0.003295903289040854, 'loss': 0.06375640743211734, 'time_step': 0.0034827888011932373, 'init_value': -7.256887435913086, 'ave_value': -7.84618440925538, 'soft_opc': nan} step=3096




2022-04-20 15:32.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153202/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:32.15 [info     ] FQE_20220420153202: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00013080308603685955, 'time_algorithm_update': 0.0036234335843906844, 'loss': 0.07324555107571054, 'time_step': 0.003814248844634655, 'init_value': -7.722574234008789, 'ave_value': -8.456964604059856, 'soft_opc': nan} step=3440




2022-04-20 15:32.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153202/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:32.17 [info     ] FQE_20220420153202: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00012592590132424997, 'time_algorithm_update': 0.0033805876277213875, 'loss': 0.08423392854219433, 'time_step': 0.0035667169925778413, 'init_value': -8.222314834594727, 'ave_value': -9.056657762355632, 'soft_opc': nan} step=3784




2022-04-20 15:32.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153202/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:32.18 [info     ] FQE_20220420153202: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00012800790542779968, 'time_algorithm_update': 0.0037872943767281466, 'loss': 0.09770087358747544, 'time_step': 0.0039742838504702544, 'init_value': -8.575399398803711, 'ave_value': -9.57631966573698, 'soft_opc': nan} step=4128




2022-04-20 15:32.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153202/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:32.20 [info     ] FQE_20220420153202: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.0001299020855925804, 'time_algorithm_update': 0.003481986217720564, 'loss': 0.10884078503532205, 'time_step': 0.003671105517897495, 'init_value': -9.046039581298828, 'ave_value': -10.232273289910308, 'soft_opc': nan} step=4472




2022-04-20 15:32.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153202/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:32.21 [info     ] FQE_20220420153202: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00012339201084403105, 'time_algorithm_update': 0.0033123569433079207, 'loss': 0.12543638058948917, 'time_step': 0.003495212211165317, 'init_value': -9.738762855529785, 'ave_value': -11.043869235655208, 'soft_opc': nan} step=4816




2022-04-20 15:32.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153202/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:32.22 [info     ] FQE_20220420153202: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00012716096501017726, 'time_algorithm_update': 0.0035380125045776367, 'loss': 0.13795456914285328, 'time_step': 0.003722618485605994, 'init_value': -9.956735610961914, 'ave_value': -11.33474440043037, 'soft_opc': nan} step=5160




2022-04-20 15:32.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153202/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:32.24 [info     ] FQE_20220420153202: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.0001250754955203034, 'time_algorithm_update': 0.00328892053559769, 'loss': 0.15384463558168432, 'time_step': 0.003470890050710634, 'init_value': -10.514259338378906, 'ave_value': -12.03047835515426, 'soft_opc': nan} step=5504




2022-04-20 15:32.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153202/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:32.25 [info     ] FQE_20220420153202: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00012927485066790913, 'time_algorithm_update': 0.003664410391519236, 'loss': 0.16789437103799956, 'time_step': 0.0038507164910782216, 'init_value': -10.682201385498047, 'ave_value': -12.277701830380671, 'soft_opc': nan} step=5848




2022-04-20 15:32.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153202/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:32.26 [info     ] FQE_20220420153202: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00012659194857575173, 'time_algorithm_update': 0.0032532644826312397, 'loss': 0.18185696712705893, 'time_step': 0.003436374109844829, 'init_value': -11.077859878540039, 'ave_value': -12.759619954899625, 'soft_opc': nan} step=6192




2022-04-20 15:32.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153202/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:32.28 [info     ] FQE_20220420153202: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.0001227696274602136, 'time_algorithm_update': 0.003568387308786082, 'loss': 0.20381075037415897, 'time_step': 0.0037471825300261032, 'init_value': -11.529923439025879, 'ave_value': -13.36867765037863, 'soft_opc': nan} step=6536




2022-04-20 15:32.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153202/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:32.29 [info     ] FQE_20220420153202: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00013225023136582484, 'time_algorithm_update': 0.003493952196697856, 'loss': 0.21835294276811615, 'time_step': 0.0036854210288025614, 'init_value': -11.865744590759277, 'ave_value': -13.720453134259662, 'soft_opc': nan} step=6880




2022-04-20 15:32.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153202/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:32.30 [info     ] FQE_20220420153202: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00012552045112432434, 'time_algorithm_update': 0.0033190423666044724, 'loss': 0.2373362525425808, 'time_step': 0.003501214260278746, 'init_value': -12.429733276367188, 'ave_value': -14.29102209457406, 'soft_opc': nan} step=7224




2022-04-20 15:32.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153202/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:32.32 [info     ] FQE_20220420153202: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00012985495633857194, 'time_algorithm_update': 0.0036421903344087824, 'loss': 0.25948273739777505, 'time_step': 0.0038343071937561035, 'init_value': -12.846309661865234, 'ave_value': -14.822159583826323, 'soft_opc': nan} step=7568




2022-04-20 15:32.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153202/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:32.33 [info     ] FQE_20220420153202: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00012674026711042538, 'time_algorithm_update': 0.003254708162573881, 'loss': 0.2822538568871096, 'time_step': 0.0034405325734338096, 'init_value': -13.245304107666016, 'ave_value': -15.163935065215773, 'soft_opc': nan} step=7912




2022-04-20 15:32.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153202/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:32.34 [info     ] FQE_20220420153202: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00012475806613301122, 'time_algorithm_update': 0.003507627997287484, 'loss': 0.2994645811768985, 'time_step': 0.003695266884426738, 'init_value': -13.826309204101562, 'ave_value': -15.810734973941837, 'soft_opc': nan} step=8256




2022-04-20 15:32.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153202/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:32.36 [info     ] FQE_20220420153202: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.0001282941463381745, 'time_algorithm_update': 0.00331264664960462, 'loss': 0.33273511125738625, 'time_step': 0.0034985112589459087, 'init_value': -14.003451347351074, 'ave_value': -16.049463085816786, 'soft_opc': nan} step=8600




2022-04-20 15:32.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153202/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:32.37 [info     ] FQE_20220420153202: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00013021189112995946, 'time_algorithm_update': 0.0035648429116537403, 'loss': 0.35132412439710353, 'time_step': 0.0037567594716715258, 'init_value': -14.537723541259766, 'ave_value': -16.693795627301878, 'soft_opc': nan} step=8944




2022-04-20 15:32.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153202/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:32.38 [info     ] FQE_20220420153202: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00012781938841176588, 'time_algorithm_update': 0.003329556348711945, 'loss': 0.3845399008484471, 'time_step': 0.0035138227218805356, 'init_value': -14.99679183959961, 'ave_value': -17.166509040948508, 'soft_opc': nan} step=9288




2022-04-20 15:32.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153202/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:32.40 [info     ] FQE_20220420153202: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00012853117876274642, 'time_algorithm_update': 0.0034474889899409095, 'loss': 0.41704593440192905, 'time_step': 0.0036361515522003174, 'init_value': -15.283171653747559, 'ave_value': -17.49512645306888, 'soft_opc': nan} step=9632




2022-04-20 15:32.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153202/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:32.41 [info     ] FQE_20220420153202: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00013012179108553155, 'time_algorithm_update': 0.0035811891389447587, 'loss': 0.4415152472346415, 'time_step': 0.003772915795791981, 'init_value': -15.592110633850098, 'ave_value': -17.870601451665433, 'soft_opc': nan} step=9976




2022-04-20 15:32.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153202/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:32.43 [info     ] FQE_20220420153202: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00013007188952246377, 'time_algorithm_update': 0.0033969719742619714, 'loss': 0.45504993550089556, 'time_step': 0.003584853438443916, 'init_value': -15.758525848388672, 'ave_value': -18.078115232785542, 'soft_opc': nan} step=10320




2022-04-20 15:32.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153202/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:32.44 [info     ] FQE_20220420153202: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00013125427933626397, 'time_algorithm_update': 0.0036600842032321664, 'loss': 0.4827281973963647, 'time_step': 0.003858037466226622, 'init_value': -16.15427017211914, 'ave_value': -18.442470943712973, 'soft_opc': nan} step=10664




2022-04-20 15:32.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153202/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:32.45 [info     ] FQE_20220420153202: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00012709858805634255, 'time_algorithm_update': 0.0033642351627349854, 'loss': 0.49688081584091104, 'time_step': 0.0035538645677788312, 'init_value': -16.450611114501953, 'ave_value': -18.911916696864207, 'soft_opc': nan} step=11008




2022-04-20 15:32.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153202/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:32.47 [info     ] FQE_20220420153202: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00012939891149831373, 'time_algorithm_update': 0.0035499244235282722, 'loss': 0.5239092680777229, 'time_step': 0.0037398095740828405, 'init_value': -16.7503604888916, 'ave_value': -19.361523759848364, 'soft_opc': nan} step=11352




2022-04-20 15:32.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153202/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:32.48 [info     ] FQE_20220420153202: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00013132774552633596, 'time_algorithm_update': 0.00347141471020011, 'loss': 0.5461119396373803, 'time_step': 0.0036612963953683545, 'init_value': -17.06509780883789, 'ave_value': -19.764776895550995, 'soft_opc': nan} step=11696




2022-04-20 15:32.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153202/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:32.49 [info     ] FQE_20220420153202: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00012801414312318314, 'time_algorithm_update': 0.0034294488818146463, 'loss': 0.5692695970051423, 'time_step': 0.003614635661590931, 'init_value': -17.460447311401367, 'ave_value': -20.378541516076336, 'soft_opc': nan} step=12040




2022-04-20 15:32.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153202/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:32.51 [info     ] FQE_20220420153202: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00012947792230650436, 'time_algorithm_update': 0.0035548549751902737, 'loss': 0.5812303258950801, 'time_step': 0.0037445294302563335, 'init_value': -17.71767807006836, 'ave_value': -20.70773195484737, 'soft_opc': nan} step=12384




2022-04-20 15:32.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153202/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:32.52 [info     ] FQE_20220420153202: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00013208874436311944, 'time_algorithm_update': 0.0034418916979501416, 'loss': 0.6069296947463827, 'time_step': 0.0036336301371108653, 'init_value': -18.030935287475586, 'ave_value': -21.085698258447216, 'soft_opc': nan} step=12728




2022-04-20 15:32.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153202/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:32.54 [info     ] FQE_20220420153202: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00012922702833663587, 'time_algorithm_update': 0.003606202990509743, 'loss': 0.6267954703138838, 'time_step': 0.0037992735241734704, 'init_value': -18.06890296936035, 'ave_value': -21.252092421914007, 'soft_opc': nan} step=13072




2022-04-20 15:32.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153202/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:32.55 [info     ] FQE_20220420153202: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00012827266094296477, 'time_algorithm_update': 0.00282789385595987, 'loss': 0.6428578793190333, 'time_step': 0.0030135124228721443, 'init_value': -17.996755599975586, 'ave_value': -21.2368518442721, 'soft_opc': nan} step=13416




2022-04-20 15:32.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153202/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:32.56 [info     ] FQE_20220420153202: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00012463885684346044, 'time_algorithm_update': 0.0030845424463582594, 'loss': 0.6551407126244158, 'time_step': 0.0032688143641449686, 'init_value': -18.156206130981445, 'ave_value': -21.622559676030736, 'soft_opc': nan} step=13760




2022-04-20 15:32.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153202/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:32.57 [info     ] FQE_20220420153202: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00012994297715120538, 'time_algorithm_update': 0.0030619550582974457, 'loss': 0.678092201608558, 'time_step': 0.0032503611819688664, 'init_value': -18.2945499420166, 'ave_value': -21.806154994626304, 'soft_opc': nan} step=14104




2022-04-20 15:32.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153202/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:32.58 [info     ] FQE_20220420153202: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00012762948524120242, 'time_algorithm_update': 0.003082002318182657, 'loss': 0.6861804658547044, 'time_step': 0.003271807071774505, 'init_value': -18.371252059936523, 'ave_value': -21.945650589922526, 'soft_opc': nan} step=14448




2022-04-20 15:32.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153202/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:33.00 [info     ] FQE_20220420153202: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00012601461521414824, 'time_algorithm_update': 0.003036196148672769, 'loss': 0.6925027173792206, 'time_step': 0.003221574910851412, 'init_value': -17.91817855834961, 'ave_value': -21.74747743611937, 'soft_opc': nan} step=14792




2022-04-20 15:33.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153202/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:33.01 [info     ] FQE_20220420153202: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.0001202551431434099, 'time_algorithm_update': 0.002952233303424924, 'loss': 0.6833099110030331, 'time_step': 0.0031294129615606265, 'init_value': -18.040727615356445, 'ave_value': -22.11867910658454, 'soft_opc': nan} step=15136




2022-04-20 15:33.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153202/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:33.02 [info     ] FQE_20220420153202: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00012793929077858148, 'time_algorithm_update': 0.003072695676670518, 'loss': 0.7090226835029763, 'time_step': 0.0032591417778369994, 'init_value': -17.983745574951172, 'ave_value': -22.034677000572014, 'soft_opc': nan} step=15480




2022-04-20 15:33.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153202/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:33.03 [info     ] FQE_20220420153202: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00012431726899257925, 'time_algorithm_update': 0.0030388437038244205, 'loss': 0.7148948123321197, 'time_step': 0.003221069657525351, 'init_value': -17.848310470581055, 'ave_value': -22.186486409000448, 'soft_opc': nan} step=15824




2022-04-20 15:33.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153202/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:33.04 [info     ] FQE_20220420153202: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00012909118519272914, 'time_algorithm_update': 0.003150609343550926, 'loss': 0.713542343501738, 'time_step': 0.0033443979052610174, 'init_value': -17.31300163269043, 'ave_value': -21.78233318601374, 'soft_opc': nan} step=16168




2022-04-20 15:33.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153202/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:33.06 [info     ] FQE_20220420153202: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00012943910997967388, 'time_algorithm_update': 0.003392130136489868, 'loss': 0.739173344937478, 'time_step': 0.0035827748997266902, 'init_value': -17.71878433227539, 'ave_value': -22.409161753334978, 'soft_opc': nan} step=16512




2022-04-20 15:33.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153202/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:33.07 [info     ] FQE_20220420153202: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00012894286665805551, 'time_algorithm_update': 0.0033378919889760572, 'loss': 0.7461643609933035, 'time_step': 0.0035258954347566116, 'init_value': -17.412336349487305, 'ave_value': -22.41756876437245, 'soft_opc': nan} step=16856




2022-04-20 15:33.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153202/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:33.08 [info     ] FQE_20220420153202: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00013106992078381916, 'time_algorithm_update': 0.0033381719921910485, 'loss': 0.7419047217198842, 'time_step': 0.0035273904024168503, 'init_value': -17.273632049560547, 'ave_value': -22.392122378300975, 'soft_opc': nan} step=17200




2022-04-20 15:33.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420153202/model_17200.pt
most optimal hyper params for cql at this point:  [0.0006716406409940611, 0.002206817187940385, 3.919949661777032e-05, 3]
search iteration:  1
using hyper params:  [0.0014647953939590536, 0.0011235408329594708, 6.975386681700787e-05, 5]
2022-04-20 15:33.08 [debug    ] RoundIterator is selected.
2022-04-20 15:33.08 [info     ] Directory is created at d3rlpy_logs/CQL_20220420153308
2022-04-20 15:33.08 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 15:33.08 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 15:33.08 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220420153308/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_

  minimum = torch.tensor(
  maximum = torch.tensor(


Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:33.16 [info     ] CQL_20220420153308: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.00030620753416541027, 'time_algorithm_update': 0.020161056379128618, 'temp_loss': 4.585174765503197, 'temp': 0.9884812551283697, 'alpha_loss': -18.521946120680425, 'alpha': 1.017855851970918, 'critic_loss': 66.06062593515854, 'actor_loss': 2.9633969745941853, 'time_step': 0.020541785056130926, 'td_error': 4.468933571033159, 'init_value': -5.462484836578369, 'ave_value': -4.487073856424708} step=342
2022-04-20 15:33.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420153308/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:33.23 [info     ] CQL_20220420153308: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.0003162657308299639, 'time_algorithm_update': 0.020576587894506622, 'temp_loss': 4.273830592283729, 'temp': 0.9662283779236308, 'alpha_loss': -15.06738547553793, 'alpha': 1.051458842224545, 'critic_loss': 44.75827110022829, 'actor_loss': 5.992472679294341, 'time_step': 0.020969288390979432, 'td_error': 3.5786955010142663, 'init_value': -10.532341957092285, 'ave_value': -7.83848995558347} step=684
2022-04-20 15:33.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420153308/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:33.31 [info     ] CQL_20220420153308: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.0003193024306269417, 'time_algorithm_update': 0.020260551519561232, 'temp_loss': 4.047400654407969, 'temp': 0.9446407370051445, 'alpha_loss': -11.462275432564361, 'alpha': 1.080624084375058, 'critic_loss': 41.929138529370405, 'actor_loss': 11.027454482184517, 'time_step': 0.020658204430028013, 'td_error': 4.493205063620096, 'init_value': -19.993417739868164, 'ave_value': -13.297956488105628} step=1026
2022-04-20 15:33.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420153308/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:33.39 [info     ] CQL_20220420153308: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.0003355281394824647, 'time_algorithm_update': 0.02176634052343536, 'temp_loss': 3.7319729669749386, 'temp': 0.9240884115124306, 'alpha_loss': -8.830235458954036, 'alpha': 1.1065039937956291, 'critic_loss': 42.86648483722531, 'actor_loss': 16.51691269735147, 'time_step': 0.022188172702900848, 'td_error': 6.171533809868586, 'init_value': -27.600749969482422, 'ave_value': -17.353389787162815} step=1368
2022-04-20 15:33.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420153308/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:33.47 [info     ] CQL_20220420153308: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.0003659411480552272, 'time_algorithm_update': 0.024211582384611432, 'temp_loss': 3.4232894314659967, 'temp': 0.9047913797069014, 'alpha_loss': -7.494959018383807, 'alpha': 1.1311591605693974, 'critic_loss': 46.501854745965254, 'actor_loss': 21.47476131595366, 'time_step': 0.024671639615332173, 'td_error': 8.819723721118788, 'init_value': -35.73812484741211, 'ave_value': -22.17761421282046} step=1710
2022-04-20 15:33.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420153308/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:33.56 [info     ] CQL_20220420153308: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.00036501396469205445, 'time_algorithm_update': 0.023772908233062565, 'temp_loss': 3.1591368704511407, 'temp': 0.8864639085984369, 'alpha_loss': -6.562916359706231, 'alpha': 1.1560634144565516, 'critic_loss': 51.76342365197968, 'actor_loss': 26.100248782955415, 'time_step': 0.02423165834438034, 'td_error': 11.576859126277766, 'init_value': -42.379173278808594, 'ave_value': -25.58864751565027} step=2052
2022-04-20 15:33.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420153308/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:34.05 [info     ] CQL_20220420153308: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.0003692218434740925, 'time_algorithm_update': 0.023855799819990905, 'temp_loss': 2.9519928895939165, 'temp': 0.8688132184290747, 'alpha_loss': -5.759647192313657, 'alpha': 1.1812164326857406, 'critic_loss': 58.19850594816152, 'actor_loss': 30.388301972060177, 'time_step': 0.024319915743599162, 'td_error': 14.546414089861734, 'init_value': -48.581756591796875, 'ave_value': -29.305014365269457} step=2394
2022-04-20 15:34.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420153308/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:34.14 [info     ] CQL_20220420153308: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.0003618559642144811, 'time_algorithm_update': 0.023892113339831256, 'temp_loss': 2.741763704701474, 'temp': 0.851709526881837, 'alpha_loss': -5.02242774851838, 'alpha': 1.2065366259095265, 'critic_loss': 64.94096693518566, 'actor_loss': 34.256115350109795, 'time_step': 0.02434686052868938, 'td_error': 18.011242747682065, 'init_value': -54.817909240722656, 'ave_value': -33.068006993796644} step=2736
2022-04-20 15:34.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420153308/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:34.22 [info     ] CQL_20220420153308: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.00036634478652686404, 'time_algorithm_update': 0.023919265172634905, 'temp_loss': 2.5598508224152683, 'temp': 0.8351490781669728, 'alpha_loss': -4.385460610975299, 'alpha': 1.231765513880211, 'critic_loss': 72.13136902747796, 'actor_loss': 37.70862728252745, 'time_step': 0.02437999513414171, 'td_error': 20.741376944211943, 'init_value': -58.85248565673828, 'ave_value': -34.96365279645063} step=3078
2022-04-20 15:34.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420153308/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:34.31 [info     ] CQL_20220420153308: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.00036159314607319077, 'time_algorithm_update': 0.02385849910869933, 'temp_loss': 2.408091940726453, 'temp': 0.8190250760979123, 'alpha_loss': -3.8226660375009502, 'alpha': 1.2570707710862856, 'critic_loss': 79.72567295052154, 'actor_loss': 40.948076660870115, 'time_step': 0.02431602366486488, 'td_error': 24.22388972199476, 'init_value': -63.64838409423828, 'ave_value': -38.21629982433743} step=3420
2022-04-20 15:34.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420153308/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:34.40 [info     ] CQL_20220420153308: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.00036880217100444595, 'time_algorithm_update': 0.023834543618542408, 'temp_loss': 2.2469631172063056, 'temp': 0.8032854889568529, 'alpha_loss': -3.245746804608239, 'alpha': 1.281859883090906, 'critic_loss': 86.66834829006976, 'actor_loss': 43.80260440759491, 'time_step': 0.024298020970751667, 'td_error': 27.399767541695713, 'init_value': -67.96953582763672, 'ave_value': -40.65750949520853} step=3762
2022-04-20 15:34.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420153308/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:34.48 [info     ] CQL_20220420153308: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.000362256814164725, 'time_algorithm_update': 0.023745849118595234, 'temp_loss': 2.117416756892065, 'temp': 0.7878963494161416, 'alpha_loss': -2.6974785498708314, 'alpha': 1.3061630331981948, 'critic_loss': 93.82672911080701, 'actor_loss': 46.4236612152635, 'time_step': 0.024200569119369776, 'td_error': 30.5897478830304, 'init_value': -71.89986419677734, 'ave_value': -42.759087894846864} step=4104
2022-04-20 15:34.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420153308/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:34.57 [info     ] CQL_20220420153308: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.00035796946252298636, 'time_algorithm_update': 0.023553143467819483, 'temp_loss': 1.971185648999019, 'temp': 0.7728625908929702, 'alpha_loss': -2.1513287574161737, 'alpha': 1.3286857256415294, 'critic_loss': 101.22623778226082, 'actor_loss': 48.82079638654029, 'time_step': 0.024002468376828914, 'td_error': 33.33205482212288, 'init_value': -75.28781127929688, 'ave_value': -44.782090961188075} step=4446
2022-04-20 15:34.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420153308/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:35.06 [info     ] CQL_20220420153308: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.0003600971043458459, 'time_algorithm_update': 0.023795441577309055, 'temp_loss': 1.843578170614633, 'temp': 0.758261730970695, 'alpha_loss': -1.619493363336037, 'alpha': 1.3489588077305352, 'critic_loss': 107.47660829867536, 'actor_loss': 50.96846159996345, 'time_step': 0.02424605617746275, 'td_error': 36.07716233877209, 'init_value': -78.60148620605469, 'ave_value': -46.88398815457751} step=4788
2022-04-20 15:35.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420153308/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:35.15 [info     ] CQL_20220420153308: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.0003616984127557766, 'time_algorithm_update': 0.0243642622964424, 'temp_loss': 1.7172418302959866, 'temp': 0.7440275304150163, 'alpha_loss': -1.0796553836829077, 'alpha': 1.365710607397626, 'critic_loss': 114.41049912659048, 'actor_loss': 52.94263134783471, 'time_step': 0.024817128627620942, 'td_error': 38.63128331666806, 'init_value': -81.35469055175781, 'ave_value': -48.55474410503364} step=5130
2022-04-20 15:35.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420153308/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:35.24 [info     ] CQL_20220420153308: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.0003721748876292803, 'time_algorithm_update': 0.025482398027565047, 'temp_loss': 1.5911588728079322, 'temp': 0.7301544564509252, 'alpha_loss': -0.5821670206741577, 'alpha': 1.3779514501666466, 'critic_loss': 120.65652659901401, 'actor_loss': 54.70252531174331, 'time_step': 0.025947552675392196, 'td_error': 40.909961256880095, 'init_value': -83.46459197998047, 'ave_value': -50.0876416984422} step=5472
2022-04-20 15:35.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420153308/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:35.33 [info     ] CQL_20220420153308: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.0003684598800034551, 'time_algorithm_update': 0.025854187402111744, 'temp_loss': 1.4756674201864945, 'temp': 0.716688345050254, 'alpha_loss': -0.12684187446503045, 'alpha': 1.3842738549611722, 'critic_loss': 127.70562411748875, 'actor_loss': 56.31156509800961, 'time_step': 0.026319851652223464, 'td_error': 43.078198990726705, 'init_value': -86.35186767578125, 'ave_value': -51.79674797908948} step=5814
2022-04-20 15:35.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420153308/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:35.43 [info     ] CQL_20220420153308: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.00036936196667409083, 'time_algorithm_update': 0.025581748164885225, 'temp_loss': 1.375344228674794, 'temp': 0.703564954605716, 'alpha_loss': 0.32235551906793175, 'alpha': 1.3819025772356848, 'critic_loss': 133.66095416866548, 'actor_loss': 57.681179135863545, 'time_step': 0.026047630616795946, 'td_error': 44.52141698916849, 'init_value': -87.21552276611328, 'ave_value': -52.23977875568308} step=6156
2022-04-20 15:35.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420153308/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:35.52 [info     ] CQL_20220420153308: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.0003687631317049439, 'time_algorithm_update': 0.025826836887158845, 'temp_loss': 1.2606193332644233, 'temp': 0.6907844121693171, 'alpha_loss': 0.7035379734154783, 'alpha': 1.3707482832217077, 'critic_loss': 139.8440334253144, 'actor_loss': 58.89472002174422, 'time_step': 0.02629150633226361, 'td_error': 46.21231581467077, 'init_value': -89.07756042480469, 'ave_value': -53.67860495616187} step=6498
2022-04-20 15:35.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420153308/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:36.01 [info     ] CQL_20220420153308: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.0003606485344513118, 'time_algorithm_update': 0.02534268125455979, 'temp_loss': 1.1788434607592242, 'temp': 0.6783141577452944, 'alpha_loss': 1.0362233999743513, 'alpha': 1.349533886588805, 'critic_loss': 146.44957427532353, 'actor_loss': 60.04531679655376, 'time_step': 0.02579595122420997, 'td_error': 47.04637077423638, 'init_value': -90.0620346069336, 'ave_value': -54.33398354989854} step=6840
2022-04-20 15:36.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420153308/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:36.11 [info     ] CQL_20220420153308: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.0003669694153188962, 'time_algorithm_update': 0.025726779162535192, 'temp_loss': 1.093583563266442, 'temp': 0.6660980728634617, 'alpha_loss': 1.3254085556353437, 'alpha': 1.3199705833580062, 'critic_loss': 152.6897887960512, 'actor_loss': 61.038091949552125, 'time_step': 0.02619090693735937, 'td_error': 47.71135686478949, 'init_value': -90.444580078125, 'ave_value': -54.70905502679783} step=7182
2022-04-20 15:36.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420153308/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:36.20 [info     ] CQL_20220420153308: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.0003652091611895645, 'time_algorithm_update': 0.025453340240389283, 'temp_loss': 1.018401247541807, 'temp': 0.6540690033059371, 'alpha_loss': 1.54383392086081, 'alpha': 1.2851313864975644, 'critic_loss': 159.65291468302408, 'actor_loss': 61.899603157712704, 'time_step': 0.0259124319455777, 'td_error': 49.44545869145429, 'init_value': -92.74087524414062, 'ave_value': -56.405896082957454} step=7524
2022-04-20 15:36.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420153308/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:36.29 [info     ] CQL_20220420153308: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.0003678199143437614, 'time_algorithm_update': 0.0257648243541606, 'temp_loss': 0.9451316516650351, 'temp': 0.6423103415478043, 'alpha_loss': 1.7549100370794448, 'alpha': 1.2456512677739238, 'critic_loss': 165.90420558996368, 'actor_loss': 62.70656035796941, 'time_step': 0.026228661425629553, 'td_error': 50.42260577805011, 'init_value': -93.19392395019531, 'ave_value': -56.56922036967165} step=7866
2022-04-20 15:36.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420153308/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:36.39 [info     ] CQL_20220420153308: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.0003729082687556395, 'time_algorithm_update': 0.025402372343498365, 'temp_loss': 0.8668632559608995, 'temp': 0.630882943408531, 'alpha_loss': 1.9047688300781258, 'alpha': 1.2053256951577482, 'critic_loss': 172.1981297766256, 'actor_loss': 63.42002414681061, 'time_step': 0.025869066952264798, 'td_error': 50.68130775373492, 'init_value': -93.3930892944336, 'ave_value': -56.82433292317229} step=8208
2022-04-20 15:36.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420153308/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:36.48 [info     ] CQL_20220420153308: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.00036700775748804996, 'time_algorithm_update': 0.02589616859168337, 'temp_loss': 0.8152633954209891, 'temp': 0.6196622187979737, 'alpha_loss': 2.03133729614361, 'alpha': 1.165075819394742, 'critic_loss': 179.47345813951995, 'actor_loss': 64.16645012124937, 'time_step': 0.0263575259705036, 'td_error': 51.216412460387694, 'init_value': -93.3297348022461, 'ave_value': -57.05353409684859} step=8550
2022-04-20 15:36.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420153308/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:36.58 [info     ] CQL_20220420153308: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.00037095421238949423, 'time_algorithm_update': 0.025515529844495986, 'temp_loss': 0.7485759378525249, 'temp': 0.608605837787104, 'alpha_loss': 2.1114997879618356, 'alpha': 1.126196223044256, 'critic_loss': 185.84397964031376, 'actor_loss': 64.66956096225314, 'time_step': 0.025981583790472378, 'td_error': 52.0487649288577, 'init_value': -94.80690002441406, 'ave_value': -58.0592870434273} step=8892
2022-04-20 15:36.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420153308/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:37.07 [info     ] CQL_20220420153308: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.0003707855068452177, 'time_algorithm_update': 0.02592682071596558, 'temp_loss': 0.6908318581636886, 'temp': 0.5979180383054834, 'alpha_loss': 2.188047558722789, 'alpha': 1.0879206451756216, 'critic_loss': 193.0761640671401, 'actor_loss': 65.26142813030042, 'time_step': 0.026394885882996676, 'td_error': 52.21947027066658, 'init_value': -94.95133209228516, 'ave_value': -58.25555070419166} step=9234
2022-04-20 15:37.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420153308/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:37.16 [info     ] CQL_20220420153308: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.0003611783535159819, 'time_algorithm_update': 0.0254892487274973, 'temp_loss': 0.640844702851354, 'temp': 0.5874061793611761, 'alpha_loss': 2.279118821623991, 'alpha': 1.0510127488632648, 'critic_loss': 199.06497112073396, 'actor_loss': 65.69478446558902, 'time_step': 0.02594224402779027, 'td_error': 52.709692046582184, 'init_value': -95.77781677246094, 'ave_value': -58.956480456307396} step=9576
2022-04-20 15:37.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420153308/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:37.26 [info     ] CQL_20220420153308: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.0003717008389924702, 'time_algorithm_update': 0.025704965256808095, 'temp_loss': 0.6006746503867602, 'temp': 0.5770370357217844, 'alpha_loss': 2.301653901714156, 'alpha': 1.0159272368539844, 'critic_loss': 204.4952790109735, 'actor_loss': 66.17824628060325, 'time_step': 0.026175402061283937, 'td_error': 53.207193120615784, 'init_value': -96.7098159790039, 'ave_value': -59.86438397999685} step=9918
2022-04-20 15:37.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420153308/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:37.35 [info     ] CQL_20220420153308: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.0003702668418661196, 'time_algorithm_update': 0.025510965034975644, 'temp_loss': 0.5423310162902576, 'temp': 0.5669350087294105, 'alpha_loss': 2.3423783900270685, 'alpha': 0.9822558811184956, 'critic_loss': 209.74765402810615, 'actor_loss': 66.58633352580823, 'time_step': 0.025976915805660493, 'td_error': 53.04698842217054, 'init_value': -96.47144317626953, 'ave_value': -60.03570758149565} step=10260
2022-04-20 15:37.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420153308/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:37.44 [info     ] CQL_20220420153308: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.0003646179946542483, 'time_algorithm_update': 0.025748839155275222, 'temp_loss': 0.5099218810815909, 'temp': 0.5572089901444508, 'alpha_loss': 2.3649011072185306, 'alpha': 0.9496492921260365, 'critic_loss': 214.85233485350136, 'actor_loss': 66.95057258829038, 'time_step': 0.02620860847116214, 'td_error': 53.62553824990906, 'init_value': -96.75718688964844, 'ave_value': -60.19521151366199} step=10602
2022-04-20 15:37.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420153308/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:37.54 [info     ] CQL_20220420153308: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.0003686195228532044, 'time_algorithm_update': 0.02561951101871959, 'temp_loss': 0.4750631045092616, 'temp': 0.5473602964864139, 'alpha_loss': 2.380781098383299, 'alpha': 0.9186731981255157, 'critic_loss': 219.46953779075577, 'actor_loss': 67.27284763849269, 'time_step': 0.02608080495867813, 'td_error': 53.22841935844598, 'init_value': -96.30406188964844, 'ave_value': -60.11849459241774} step=10944
2022-04-20 15:37.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420153308/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:38.03 [info     ] CQL_20220420153308: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.0003697356285407529, 'time_algorithm_update': 0.025746926229599624, 'temp_loss': 0.43841782791746986, 'temp': 0.5378448414175134, 'alpha_loss': 2.367736736454113, 'alpha': 0.8886158335627171, 'critic_loss': 224.69682213855765, 'actor_loss': 67.60100004408095, 'time_step': 0.026213999380145157, 'td_error': 53.08375838289769, 'init_value': -96.8479232788086, 'ave_value': -60.94514816660401} step=11286
2022-04-20 15:38.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420153308/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:38.12 [info     ] CQL_20220420153308: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.0003513913405568976, 'time_algorithm_update': 0.024696608733015452, 'temp_loss': 0.39815339988522364, 'temp': 0.5287005758424949, 'alpha_loss': 2.376483677859195, 'alpha': 0.8596276672262895, 'critic_loss': 229.09982237341808, 'actor_loss': 67.8669469956069, 'time_step': 0.025134573205869797, 'td_error': 53.37054885861815, 'init_value': -97.12384033203125, 'ave_value': -61.52191188806215} step=11628
2022-04-20 15:38.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420153308/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:38.21 [info     ] CQL_20220420153308: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.00035533012702451114, 'time_algorithm_update': 0.02439247307024504, 'temp_loss': 0.35420453108367866, 'temp': 0.5198702747710267, 'alpha_loss': 2.3819450351752733, 'alpha': 0.8319423819145961, 'critic_loss': 233.10381540220382, 'actor_loss': 68.1110759422793, 'time_step': 0.024841210995501246, 'td_error': 53.25986804182959, 'init_value': -97.17053985595703, 'ave_value': -61.75630022136343} step=11970
2022-04-20 15:38.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420153308/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:38.30 [info     ] CQL_20220420153308: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.0003653402216950355, 'time_algorithm_update': 0.02420165176280061, 'temp_loss': 0.319118596232164, 'temp': 0.5114718917866199, 'alpha_loss': 2.3570076234159414, 'alpha': 0.8054218299207632, 'critic_loss': 238.07619186312135, 'actor_loss': 68.27711186771505, 'time_step': 0.02466258598349945, 'td_error': 53.65144473916336, 'init_value': -97.71595764160156, 'ave_value': -62.05659225920299} step=12312
2022-04-20 15:38.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420153308/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:38.39 [info     ] CQL_20220420153308: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.00036402613098858395, 'time_algorithm_update': 0.024074470090587236, 'temp_loss': 0.30675764118283116, 'temp': 0.5031490585608789, 'alpha_loss': 2.321601465344429, 'alpha': 0.7796896717004609, 'critic_loss': 241.25180678339729, 'actor_loss': 68.56136246173702, 'time_step': 0.024531853129292094, 'td_error': 52.38838717745314, 'init_value': -96.46007537841797, 'ave_value': -61.760567847274174} step=12654
2022-04-20 15:38.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420153308/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:38.48 [info     ] CQL_20220420153308: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.00036467934212489436, 'time_algorithm_update': 0.02420862236915276, 'temp_loss': 0.2780054602336291, 'temp': 0.49482723364704534, 'alpha_loss': 2.319972480816102, 'alpha': 0.7552158001222109, 'critic_loss': 244.91738030506156, 'actor_loss': 68.74578472605923, 'time_step': 0.02466590850673921, 'td_error': 52.94953385643344, 'init_value': -96.98172760009766, 'ave_value': -61.56762287945092} step=12996
2022-04-20 15:38.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420153308/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:38.56 [info     ] CQL_20220420153308: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.0003675996211537144, 'time_algorithm_update': 0.023977782991197374, 'temp_loss': 0.24553135515882346, 'temp': 0.48681219863264186, 'alpha_loss': 2.28734099681963, 'alpha': 0.7315130061224887, 'critic_loss': 247.3967644763969, 'actor_loss': 68.8847329212211, 'time_step': 0.02444177064282155, 'td_error': 52.73354209312754, 'init_value': -96.79974365234375, 'ave_value': -61.92961684023475} step=13338
2022-04-20 15:38.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420153308/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:39.05 [info     ] CQL_20220420153308: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.0003676372661925199, 'time_algorithm_update': 0.023853217649180986, 'temp_loss': 0.2231789364626533, 'temp': 0.479523334151123, 'alpha_loss': 2.2465603890126213, 'alpha': 0.7082326260575077, 'critic_loss': 249.3238525390625, 'actor_loss': 69.05093110513965, 'time_step': 0.024314247376737538, 'td_error': 51.78016377658709, 'init_value': -97.0081558227539, 'ave_value': -62.849611668192054} step=13680
2022-04-20 15:39.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420153308/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:39.14 [info     ] CQL_20220420153308: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.00036813571439151877, 'time_algorithm_update': 0.02365817103469581, 'temp_loss': 0.21157156587006493, 'temp': 0.47185167095117403, 'alpha_loss': 2.21704678359436, 'alpha': 0.6864166285908013, 'critic_loss': 251.9068841766893, 'actor_loss': 69.21502678854424, 'time_step': 0.02412015722509016, 'td_error': 52.13990099286652, 'init_value': -96.66828918457031, 'ave_value': -62.13474462452101} step=14022
2022-04-20 15:39.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420153308/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:39.22 [info     ] CQL_20220420153308: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.000371970628437243, 'time_algorithm_update': 0.023999331987392136, 'temp_loss': 0.18002413817795745, 'temp': 0.46454882804761854, 'alpha_loss': 2.1643250999449375, 'alpha': 0.6654318694831335, 'critic_loss': 252.8667226423297, 'actor_loss': 69.34186906424182, 'time_step': 0.024461725999040212, 'td_error': 51.187550838114504, 'init_value': -96.2844467163086, 'ave_value': -62.29953501738944} step=14364
2022-04-20 15:39.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420153308/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:39.31 [info     ] CQL_20220420153308: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.000367021700095015, 'time_algorithm_update': 0.02403119432995891, 'temp_loss': 0.15934638216089436, 'temp': 0.4580581962016591, 'alpha_loss': 2.1331630968733837, 'alpha': 0.6447194607285728, 'critic_loss': 254.16124636109112, 'actor_loss': 69.46289265504358, 'time_step': 0.0244957355030796, 'td_error': 51.352400542130326, 'init_value': -95.67488098144531, 'ave_value': -62.26717512955671} step=14706
2022-04-20 15:39.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420153308/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:39.40 [info     ] CQL_20220420153308: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.000363315755163717, 'time_algorithm_update': 0.023975801746747648, 'temp_loss': 0.14445028797184167, 'temp': 0.45164710492418525, 'alpha_loss': 2.0539711395725173, 'alpha': 0.625089601989378, 'critic_loss': 256.06726373148246, 'actor_loss': 69.59410899424414, 'time_step': 0.024431547226264463, 'td_error': 50.18976971786467, 'init_value': -96.14530181884766, 'ave_value': -62.4576841643117} step=15048
2022-04-20 15:39.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420153308/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:39.49 [info     ] CQL_20220420153308: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.00036264302437765555, 'time_algorithm_update': 0.023851198062562105, 'temp_loss': 0.1297584317314128, 'temp': 0.4456120978670511, 'alpha_loss': 2.035665473895289, 'alpha': 0.6060424408717462, 'critic_loss': 257.1219204908226, 'actor_loss': 69.59668511953967, 'time_step': 0.024308641751607258, 'td_error': 50.032285611200386, 'init_value': -96.52616882324219, 'ave_value': -62.98830243733686} step=15390
2022-04-20 15:39.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420153308/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:39.57 [info     ] CQL_20220420153308: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.0003631916659617285, 'time_algorithm_update': 0.023753394160354345, 'temp_loss': 0.10863117501139641, 'temp': 0.4398538492924986, 'alpha_loss': 1.9910348644207794, 'alpha': 0.5875099505248823, 'critic_loss': 257.5882088733695, 'actor_loss': 69.70064845838044, 'time_step': 0.0242096429679826, 'td_error': 50.41817121149592, 'init_value': -95.75830078125, 'ave_value': -62.594556608128926} step=15732
2022-04-20 15:39.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420153308/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:40.06 [info     ] CQL_20220420153308: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.00035328404945239686, 'time_algorithm_update': 0.02397500701815064, 'temp_loss': 0.11424660909517903, 'temp': 0.43397358896439536, 'alpha_loss': 1.928931983242258, 'alpha': 0.5698726402040113, 'critic_loss': 257.5918096798902, 'actor_loss': 69.76972964772007, 'time_step': 0.02442283518830238, 'td_error': 49.770038059780305, 'init_value': -95.68217468261719, 'ave_value': -63.27355393550552} step=16074
2022-04-20 15:40.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420153308/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:40.15 [info     ] CQL_20220420153308: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.000363497009054262, 'time_algorithm_update': 0.023984923000224152, 'temp_loss': 0.08775002969025869, 'temp': 0.42878104066639616, 'alpha_loss': 1.8909174920174114, 'alpha': 0.5525153249676464, 'critic_loss': 257.9945478829724, 'actor_loss': 69.77832740649842, 'time_step': 0.02443984656306038, 'td_error': 49.095105770090576, 'init_value': -95.3621826171875, 'ave_value': -63.11240480788522} step=16416
2022-04-20 15:40.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420153308/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:40.23 [info     ] CQL_20220420153308: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.00035948850955182346, 'time_algorithm_update': 0.023787080195912143, 'temp_loss': 0.07920209968419016, 'temp': 0.4238641069472185, 'alpha_loss': 1.828130532252161, 'alpha': 0.5359861259920555, 'critic_loss': 258.0291829248618, 'actor_loss': 69.77434219672666, 'time_step': 0.02423797992237827, 'td_error': 49.46578771576133, 'init_value': -95.48771667480469, 'ave_value': -63.26759346320822} step=16758
2022-04-20 15:40.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420153308/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:40.32 [info     ] CQL_20220420153308: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.0003670000890542192, 'time_algorithm_update': 0.023980076550043118, 'temp_loss': 0.07706250157751883, 'temp': 0.41923299658368207, 'alpha_loss': 1.760301338737471, 'alpha': 0.5199056813242839, 'critic_loss': 257.7269808228253, 'actor_loss': 69.82527613500406, 'time_step': 0.024441056084214596, 'td_error': 48.47699318963063, 'init_value': -94.61043548583984, 'ave_value': -63.27411146468273} step=17100
2022-04-20 15:40.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420153308/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.519

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 15:40.33 [info     ] FQE_20220420154032: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00015236239835440395, 'time_algorithm_update': 0.003528768757739699, 'loss': 0.0077791849434577735, 'time_step': 0.003748926771692483, 'init_value': -0.48128795623779297, 'ave_value': -0.4382443096589398, 'soft_opc': nan} step=166




2022-04-20 15:40.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154032/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:40.34 [info     ] FQE_20220420154032: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00015249453395246024, 'time_algorithm_update': 0.0035606938672352985, 'loss': 0.006324938495632216, 'time_step': 0.003783161381641066, 'init_value': -0.637610673904419, 'ave_value': -0.5430524964590331, 'soft_opc': nan} step=332




2022-04-20 15:40.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154032/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:40.34 [info     ] FQE_20220420154032: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.0001567286181162639, 'time_algorithm_update': 0.003472107002534062, 'loss': 0.005934946250215352, 'time_step': 0.0036983317639454304, 'init_value': -0.734408974647522, 'ave_value': -0.608424014134987, 'soft_opc': nan} step=498




2022-04-20 15:40.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154032/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:40.35 [info     ] FQE_20220420154032: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00015106402247785087, 'time_algorithm_update': 0.003533855978264866, 'loss': 0.0060015550663088816, 'time_step': 0.003751553684832102, 'init_value': -0.8094945549964905, 'ave_value': -0.6516452543101869, 'soft_opc': nan} step=664




2022-04-20 15:40.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154032/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:40.36 [info     ] FQE_20220420154032: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00015181662088417145, 'time_algorithm_update': 0.003435358943709408, 'loss': 0.0059010532154443575, 'time_step': 0.003657573676971068, 'init_value': -0.877116322517395, 'ave_value': -0.6825027964740723, 'soft_opc': nan} step=830




2022-04-20 15:40.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154032/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:40.37 [info     ] FQE_20220420154032: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.0001534137381128518, 'time_algorithm_update': 0.003565844283046493, 'loss': 0.0055666994738269105, 'time_step': 0.0037874127008828773, 'init_value': -0.9036257863044739, 'ave_value': -0.6951355024098276, 'soft_opc': nan} step=996




2022-04-20 15:40.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154032/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:40.37 [info     ] FQE_20220420154032: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.0001543631036597562, 'time_algorithm_update': 0.00350656279598374, 'loss': 0.0054888679710474895, 'time_step': 0.0037311617150364153, 'init_value': -0.9537879824638367, 'ave_value': -0.7288531708422008, 'soft_opc': nan} step=1162




2022-04-20 15:40.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154032/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:40.38 [info     ] FQE_20220420154032: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00015583957534238515, 'time_algorithm_update': 0.0036031855158058994, 'loss': 0.005251576812623675, 'time_step': 0.0038335553134780332, 'init_value': -1.026475191116333, 'ave_value': -0.7641541246089849, 'soft_opc': nan} step=1328




2022-04-20 15:40.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154032/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:40.39 [info     ] FQE_20220420154032: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00015534837561917593, 'time_algorithm_update': 0.003445592271276267, 'loss': 0.005105527380133519, 'time_step': 0.0036720942301922536, 'init_value': -0.9983825087547302, 'ave_value': -0.7487642484339507, 'soft_opc': nan} step=1494




2022-04-20 15:40.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154032/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:40.39 [info     ] FQE_20220420154032: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00015352720237640013, 'time_algorithm_update': 0.0034954533519515074, 'loss': 0.004861309720450137, 'time_step': 0.0037172530070844903, 'init_value': -1.0653877258300781, 'ave_value': -0.7931966641129137, 'soft_opc': nan} step=1660




2022-04-20 15:40.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154032/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:40.40 [info     ] FQE_20220420154032: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00015269130109304404, 'time_algorithm_update': 0.003434464155909527, 'loss': 0.0046063755612248125, 'time_step': 0.0036558200077838206, 'init_value': -1.1191489696502686, 'ave_value': -0.8397921291557519, 'soft_opc': nan} step=1826




2022-04-20 15:40.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154032/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:40.41 [info     ] FQE_20220420154032: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00015627619731857116, 'time_algorithm_update': 0.0035041771739362233, 'loss': 0.004387234086563519, 'time_step': 0.00372896567884698, 'init_value': -1.1288946866989136, 'ave_value': -0.8388965412571624, 'soft_opc': nan} step=1992




2022-04-20 15:40.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154032/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:40.41 [info     ] FQE_20220420154032: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.0001504866473646049, 'time_algorithm_update': 0.0025778678526361303, 'loss': 0.00462015349970547, 'time_step': 0.0027967605246118753, 'init_value': -1.233465313911438, 'ave_value': -0.9165273904263437, 'soft_opc': nan} step=2158




2022-04-20 15:40.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154032/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:40.42 [info     ] FQE_20220420154032: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00016097850110157426, 'time_algorithm_update': 0.004543700850153544, 'loss': 0.004468692216253559, 'time_step': 0.004774758614689471, 'init_value': -1.2891491651535034, 'ave_value': -0.9549696721069447, 'soft_opc': nan} step=2324




2022-04-20 15:40.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154032/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:40.43 [info     ] FQE_20220420154032: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00015362774033144297, 'time_algorithm_update': 0.004463886640158044, 'loss': 0.004520027149266687, 'time_step': 0.004684511437473527, 'init_value': -1.2761253118515015, 'ave_value': -0.9361378792035687, 'soft_opc': nan} step=2490




2022-04-20 15:40.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154032/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:40.44 [info     ] FQE_20220420154032: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00015600905360945737, 'time_algorithm_update': 0.004498748894197395, 'loss': 0.004737918775006904, 'time_step': 0.00472759482372238, 'init_value': -1.417276382446289, 'ave_value': -1.0602826633297646, 'soft_opc': nan} step=2656




2022-04-20 15:40.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154032/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:40.45 [info     ] FQE_20220420154032: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00015383456126753106, 'time_algorithm_update': 0.0038830228598721057, 'loss': 0.00466460574219138, 'time_step': 0.004109236131231469, 'init_value': -1.4562394618988037, 'ave_value': -1.0727587563497527, 'soft_opc': nan} step=2822




2022-04-20 15:40.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154032/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:40.45 [info     ] FQE_20220420154032: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00015774692397519765, 'time_algorithm_update': 0.004578237073967256, 'loss': 0.004874629667730082, 'time_step': 0.00481328045029238, 'init_value': -1.5099725723266602, 'ave_value': -1.1052388530593735, 'soft_opc': nan} step=2988




2022-04-20 15:40.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154032/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:40.46 [info     ] FQE_20220420154032: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00015581659523837538, 'time_algorithm_update': 0.004506441484014672, 'loss': 0.004948027966353279, 'time_step': 0.004734108246952654, 'init_value': -1.551106572151184, 'ave_value': -1.1237858623668955, 'soft_opc': nan} step=3154




2022-04-20 15:40.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154032/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:40.47 [info     ] FQE_20220420154032: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00015239255974091678, 'time_algorithm_update': 0.0037375645465161427, 'loss': 0.005180832427780491, 'time_step': 0.003959171743278044, 'init_value': -1.604085922241211, 'ave_value': -1.18269126103805, 'soft_opc': nan} step=3320




2022-04-20 15:40.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154032/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:40.48 [info     ] FQE_20220420154032: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00015561551932828972, 'time_algorithm_update': 0.004532262503382671, 'loss': 0.005508205720156162, 'time_step': 0.00476102369377412, 'init_value': -1.6344250440597534, 'ave_value': -1.1947789386586025, 'soft_opc': nan} step=3486




2022-04-20 15:40.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154032/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:40.49 [info     ] FQE_20220420154032: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.000154848558356963, 'time_algorithm_update': 0.004476403615560876, 'loss': 0.005830577981422644, 'time_step': 0.00470332352511854, 'init_value': -1.643035888671875, 'ave_value': -1.1813978322253034, 'soft_opc': nan} step=3652




2022-04-20 15:40.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154032/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:40.50 [info     ] FQE_20220420154032: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.0001581275319478598, 'time_algorithm_update': 0.004462369953293398, 'loss': 0.0060570223940009, 'time_step': 0.004694786416478904, 'init_value': -1.708022117614746, 'ave_value': -1.2573001733100093, 'soft_opc': nan} step=3818




2022-04-20 15:40.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154032/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:40.50 [info     ] FQE_20220420154032: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00015573903738734233, 'time_algorithm_update': 0.0036804202091263003, 'loss': 0.006052584635621184, 'time_step': 0.003905269036810082, 'init_value': -1.6996726989746094, 'ave_value': -1.246064226852881, 'soft_opc': nan} step=3984




2022-04-20 15:40.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154032/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:40.51 [info     ] FQE_20220420154032: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00015553939773375728, 'time_algorithm_update': 0.004490562232143907, 'loss': 0.006787095177520336, 'time_step': 0.004715632243328784, 'init_value': -1.8115105628967285, 'ave_value': -1.3273854947506307, 'soft_opc': nan} step=4150




2022-04-20 15:40.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154032/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:40.52 [info     ] FQE_20220420154032: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00015466184501188347, 'time_algorithm_update': 0.004504397691014302, 'loss': 0.006784042574224028, 'time_step': 0.004729954593152885, 'init_value': -1.8631110191345215, 'ave_value': -1.387162944137513, 'soft_opc': nan} step=4316




2022-04-20 15:40.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154032/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:40.53 [info     ] FQE_20220420154032: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00015513293714408414, 'time_algorithm_update': 0.0038782185818775593, 'loss': 0.007245598710142076, 'time_step': 0.004107729498162327, 'init_value': -1.848358392715454, 'ave_value': -1.3690642429968796, 'soft_opc': nan} step=4482




2022-04-20 15:40.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154032/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:40.54 [info     ] FQE_20220420154032: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.0001562445996755577, 'time_algorithm_update': 0.004479435553033668, 'loss': 0.007548942305680634, 'time_step': 0.004706906985087567, 'init_value': -1.9475085735321045, 'ave_value': -1.442991827435053, 'soft_opc': nan} step=4648




2022-04-20 15:40.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154032/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:40.54 [info     ] FQE_20220420154032: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00016216772148408085, 'time_algorithm_update': 0.004505010972540063, 'loss': 0.007959448862876967, 'time_step': 0.004741155957601157, 'init_value': -1.9858721494674683, 'ave_value': -1.505052331251067, 'soft_opc': nan} step=4814




2022-04-20 15:40.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154032/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:40.55 [info     ] FQE_20220420154032: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00016490379011774637, 'time_algorithm_update': 0.004478878285511431, 'loss': 0.008101313215040838, 'time_step': 0.004719028989952731, 'init_value': -2.0013413429260254, 'ave_value': -1.5218850988794017, 'soft_opc': nan} step=4980




2022-04-20 15:40.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154032/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:40.56 [info     ] FQE_20220420154032: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.0001551128295530756, 'time_algorithm_update': 0.003669478807104639, 'loss': 0.008754499182273376, 'time_step': 0.0038964906370783426, 'init_value': -2.0000624656677246, 'ave_value': -1.5292281118599145, 'soft_opc': nan} step=5146




2022-04-20 15:40.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154032/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:40.57 [info     ] FQE_20220420154032: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.000152857906847115, 'time_algorithm_update': 0.004507722624813218, 'loss': 0.009015474921507559, 'time_step': 0.004732012748718262, 'init_value': -2.0276947021484375, 'ave_value': -1.5351941991899465, 'soft_opc': nan} step=5312




2022-04-20 15:40.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154032/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:40.58 [info     ] FQE_20220420154032: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00015606650386948184, 'time_algorithm_update': 0.004555163613284926, 'loss': 0.009088321587530873, 'time_step': 0.004782798778579895, 'init_value': -2.0095505714416504, 'ave_value': -1.5093307229983914, 'soft_opc': nan} step=5478




2022-04-20 15:40.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154032/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:40.59 [info     ] FQE_20220420154032: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00015545034983071936, 'time_algorithm_update': 0.003833444721727486, 'loss': 0.009820454378741378, 'time_step': 0.0040608227971088455, 'init_value': -2.1201224327087402, 'ave_value': -1.5954849237693591, 'soft_opc': nan} step=5644




2022-04-20 15:40.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154032/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:40.59 [info     ] FQE_20220420154032: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.0001589160367666957, 'time_algorithm_update': 0.004430562616830848, 'loss': 0.010046609863919398, 'time_step': 0.004663987332079784, 'init_value': -2.139456272125244, 'ave_value': -1.6229391217651266, 'soft_opc': nan} step=5810




2022-04-20 15:40.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154032/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:41.00 [info     ] FQE_20220420154032: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00015658068369670087, 'time_algorithm_update': 0.00451805792659162, 'loss': 0.010545409457051161, 'time_step': 0.0047479839210050655, 'init_value': -2.154360771179199, 'ave_value': -1.6187646582789785, 'soft_opc': nan} step=5976




2022-04-20 15:41.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154032/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:41.01 [info     ] FQE_20220420154032: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00015586399170289557, 'time_algorithm_update': 0.0044701645173222185, 'loss': 0.011322887077877096, 'time_step': 0.004698214760745864, 'init_value': -2.20400333404541, 'ave_value': -1.6404225239849037, 'soft_opc': nan} step=6142




2022-04-20 15:41.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154032/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:41.02 [info     ] FQE_20220420154032: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00015433437852974398, 'time_algorithm_update': 0.0036314970039459595, 'loss': 0.011778786452346852, 'time_step': 0.0038545519472604774, 'init_value': -2.2830817699432373, 'ave_value': -1.7518369517884813, 'soft_opc': nan} step=6308




2022-04-20 15:41.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154032/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:41.03 [info     ] FQE_20220420154032: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00015696272792586362, 'time_algorithm_update': 0.00450669426515878, 'loss': 0.01215138529702258, 'time_step': 0.004737276628793004, 'init_value': -2.3933780193328857, 'ave_value': -1.8156114091710733, 'soft_opc': nan} step=6474




2022-04-20 15:41.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154032/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:41.04 [info     ] FQE_20220420154032: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00015568158712731786, 'time_algorithm_update': 0.004566273057317159, 'loss': 0.012494932710759746, 'time_step': 0.004790260131100574, 'init_value': -2.5011236667633057, 'ave_value': -1.8696084032305056, 'soft_opc': nan} step=6640




2022-04-20 15:41.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154032/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:41.04 [info     ] FQE_20220420154032: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00015717529388795416, 'time_algorithm_update': 0.0038702588483511685, 'loss': 0.013285259869796819, 'time_step': 0.004099629011498876, 'init_value': -2.5688862800598145, 'ave_value': -1.9439256017423563, 'soft_opc': nan} step=6806




2022-04-20 15:41.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154032/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:41.05 [info     ] FQE_20220420154032: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00015789485839476068, 'time_algorithm_update': 0.004440793071884707, 'loss': 0.014213939248556158, 'time_step': 0.004670065569590373, 'init_value': -2.6350619792938232, 'ave_value': -1.956916188294286, 'soft_opc': nan} step=6972




2022-04-20 15:41.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154032/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:41.06 [info     ] FQE_20220420154032: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00015737636979803982, 'time_algorithm_update': 0.004542063517742847, 'loss': 0.015116270970278251, 'time_step': 0.00477358232061547, 'init_value': -2.705496311187744, 'ave_value': -2.0256506920867676, 'soft_opc': nan} step=7138




2022-04-20 15:41.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154032/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:41.07 [info     ] FQE_20220420154032: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00015658068369670087, 'time_algorithm_update': 0.004576250731226909, 'loss': 0.015324070105043027, 'time_step': 0.004806350512676929, 'init_value': -2.806326150894165, 'ave_value': -2.1272968357294664, 'soft_opc': nan} step=7304




2022-04-20 15:41.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154032/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:41.08 [info     ] FQE_20220420154032: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00016451312834957996, 'time_algorithm_update': 0.003691114575029856, 'loss': 0.01632075291968523, 'time_step': 0.003927914493055229, 'init_value': -2.7649080753326416, 'ave_value': -2.0457574851005464, 'soft_opc': nan} step=7470




2022-04-20 15:41.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154032/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:41.08 [info     ] FQE_20220420154032: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00015311499676072453, 'time_algorithm_update': 0.004474161619163421, 'loss': 0.01639826311617641, 'time_step': 0.004698761974472597, 'init_value': -2.8986902236938477, 'ave_value': -2.1248478213635584, 'soft_opc': nan} step=7636




2022-04-20 15:41.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154032/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:41.09 [info     ] FQE_20220420154032: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.0001590137022087373, 'time_algorithm_update': 0.004534141126885472, 'loss': 0.017196665358791375, 'time_step': 0.004767867455999535, 'init_value': -2.953484058380127, 'ave_value': -2.17616404133035, 'soft_opc': nan} step=7802




2022-04-20 15:41.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154032/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:41.10 [info     ] FQE_20220420154032: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00015548625624323465, 'time_algorithm_update': 0.0038892361054937525, 'loss': 0.017590795284563518, 'time_step': 0.004114712577268302, 'init_value': -2.991715669631958, 'ave_value': -2.151536100766378, 'soft_opc': nan} step=7968




2022-04-20 15:41.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154032/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:41.11 [info     ] FQE_20220420154032: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.0001569253852568477, 'time_algorithm_update': 0.004418223737234093, 'loss': 0.018153926777703888, 'time_step': 0.004646891570953001, 'init_value': -3.090238571166992, 'ave_value': -2.2118337784960933, 'soft_opc': nan} step=8134




2022-04-20 15:41.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154032/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:41.12 [info     ] FQE_20220420154032: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00015706613839390767, 'time_algorithm_update': 0.004546941044818924, 'loss': 0.018459989089681487, 'time_step': 0.004776921616979392, 'init_value': -3.0722639560699463, 'ave_value': -2.176739057485719, 'soft_opc': nan} step=8300




2022-04-20 15:41.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154032/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

2022-04-20 15:41.12 [info     ] Directory is created at d3rlpy_logs/FQE_20220420154112
2022-04-20 15:41.12 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 15:41.12 [debug    ] Building models...
2022-04-20 15:41.12 [debug    ] Models have been built.
2022-04-20 15:41.12 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420154112/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/355 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 15:41.14 [info     ] FQE_20220420154112: epoch=1 step=355 epoch=1 metrics={'time_sample_batch': 0.0001572931316536917, 'time_algorithm_update': 0.004055788147617393, 'loss': 0.022147840788771567, 'time_step': 0.004284487307911188, 'init_value': -1.164133071899414, 'ave_value': -1.1758283823460072, 'soft_opc': nan} step=355




2022-04-20 15:41.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154112/model_355.pt


Epoch 2/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 15:41.15 [info     ] FQE_20220420154112: epoch=2 step=710 epoch=2 metrics={'time_sample_batch': 0.00015828777366960552, 'time_algorithm_update': 0.004466098463031608, 'loss': 0.02101769180260074, 'time_step': 0.0046936606017636585, 'init_value': -2.3481199741363525, 'ave_value': -2.385022846827022, 'soft_opc': nan} step=710




2022-04-20 15:41.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154112/model_710.pt


Epoch 3/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 15:41.17 [info     ] FQE_20220420154112: epoch=3 step=1065 epoch=3 metrics={'time_sample_batch': 0.00015702717740770798, 'time_algorithm_update': 0.004182561014739561, 'loss': 0.022595627500977315, 'time_step': 0.004409976072714363, 'init_value': -2.9600138664245605, 'ave_value': -3.008944941564263, 'soft_opc': nan} step=1065




2022-04-20 15:41.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154112/model_1065.pt


Epoch 4/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 15:41.19 [info     ] FQE_20220420154112: epoch=4 step=1420 epoch=4 metrics={'time_sample_batch': 0.00016193927173883144, 'time_algorithm_update': 0.0040001036415637375, 'loss': 0.027824051814599775, 'time_step': 0.00423182702400315, 'init_value': -3.9471700191497803, 'ave_value': -4.046793335438388, 'soft_opc': nan} step=1420




2022-04-20 15:41.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154112/model_1420.pt


Epoch 5/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 15:41.21 [info     ] FQE_20220420154112: epoch=5 step=1775 epoch=5 metrics={'time_sample_batch': 0.00016155713041063766, 'time_algorithm_update': 0.004511098458733358, 'loss': 0.03363957783128594, 'time_step': 0.004746009934116417, 'init_value': -4.5681352615356445, 'ave_value': -4.721255138758067, 'soft_opc': nan} step=1775




2022-04-20 15:41.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154112/model_1775.pt


Epoch 6/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 15:41.22 [info     ] FQE_20220420154112: epoch=6 step=2130 epoch=6 metrics={'time_sample_batch': 0.00016085933631574604, 'time_algorithm_update': 0.004043754389588262, 'loss': 0.04165905671535244, 'time_step': 0.004279931834046269, 'init_value': -5.417399883270264, 'ave_value': -5.760281027851571, 'soft_opc': nan} step=2130




2022-04-20 15:41.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154112/model_2130.pt


Epoch 7/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 15:41.24 [info     ] FQE_20220420154112: epoch=7 step=2485 epoch=7 metrics={'time_sample_batch': 0.0001613811707832444, 'time_algorithm_update': 0.00452210332306338, 'loss': 0.04916848352992199, 'time_step': 0.004756648775557397, 'init_value': -5.901698112487793, 'ave_value': -6.4139561465041215, 'soft_opc': nan} step=2485




2022-04-20 15:41.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154112/model_2485.pt


Epoch 8/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 15:41.26 [info     ] FQE_20220420154112: epoch=8 step=2840 epoch=8 metrics={'time_sample_batch': 0.00016185196352676607, 'time_algorithm_update': 0.004076394900469713, 'loss': 0.06061629397191212, 'time_step': 0.0043101109249491084, 'init_value': -6.526691913604736, 'ave_value': -7.361584207327363, 'soft_opc': nan} step=2840




2022-04-20 15:41.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154112/model_2840.pt


Epoch 9/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 15:41.27 [info     ] FQE_20220420154112: epoch=9 step=3195 epoch=9 metrics={'time_sample_batch': 0.00015770415185203016, 'time_algorithm_update': 0.004158821911878989, 'loss': 0.06856197291119417, 'time_step': 0.0043902074786978705, 'init_value': -6.621739864349365, 'ave_value': -7.771566646409004, 'soft_opc': nan} step=3195




2022-04-20 15:41.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154112/model_3195.pt


Epoch 10/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 15:41.29 [info     ] FQE_20220420154112: epoch=10 step=3550 epoch=10 metrics={'time_sample_batch': 0.00016237782760405206, 'time_algorithm_update': 0.004483325044873735, 'loss': 0.08051086589694023, 'time_step': 0.0047210955284011195, 'init_value': -6.996174335479736, 'ave_value': -8.582834704041941, 'soft_opc': nan} step=3550




2022-04-20 15:41.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154112/model_3550.pt


Epoch 11/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 15:41.31 [info     ] FQE_20220420154112: epoch=11 step=3905 epoch=11 metrics={'time_sample_batch': 0.0001591205596923828, 'time_algorithm_update': 0.0040679689864037746, 'loss': 0.09127807922294022, 'time_step': 0.004300110105057838, 'init_value': -7.667613983154297, 'ave_value': -9.65912586703724, 'soft_opc': nan} step=3905




2022-04-20 15:41.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154112/model_3905.pt


Epoch 12/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 15:41.33 [info     ] FQE_20220420154112: epoch=12 step=4260 epoch=12 metrics={'time_sample_batch': 0.00015989491637323944, 'time_algorithm_update': 0.004470923920752297, 'loss': 0.10632630343693243, 'time_step': 0.004701236939766038, 'init_value': -7.681354999542236, 'ave_value': -10.103420760640766, 'soft_opc': nan} step=4260




2022-04-20 15:41.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154112/model_4260.pt


Epoch 13/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 15:41.34 [info     ] FQE_20220420154112: epoch=13 step=4615 epoch=13 metrics={'time_sample_batch': 0.00015652213298099143, 'time_algorithm_update': 0.004005522795126472, 'loss': 0.12190923477772256, 'time_step': 0.004231301159925864, 'init_value': -8.134114265441895, 'ave_value': -10.999423878042547, 'soft_opc': nan} step=4615




2022-04-20 15:41.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154112/model_4615.pt


Epoch 14/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 15:41.36 [info     ] FQE_20220420154112: epoch=14 step=4970 epoch=14 metrics={'time_sample_batch': 0.00016031869700257207, 'time_algorithm_update': 0.004369965405531332, 'loss': 0.140772166365469, 'time_step': 0.004598974845778774, 'init_value': -8.482793807983398, 'ave_value': -11.568450454579358, 'soft_opc': nan} step=4970




2022-04-20 15:41.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154112/model_4970.pt


Epoch 15/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 15:41.38 [info     ] FQE_20220420154112: epoch=15 step=5325 epoch=15 metrics={'time_sample_batch': 0.00016132341304295498, 'time_algorithm_update': 0.004285995053573394, 'loss': 0.1569787968868311, 'time_step': 0.004518147589455188, 'init_value': -9.1365385055542, 'ave_value': -12.490382322396703, 'soft_opc': nan} step=5325




2022-04-20 15:41.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154112/model_5325.pt


Epoch 16/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 15:41.39 [info     ] FQE_20220420154112: epoch=16 step=5680 epoch=16 metrics={'time_sample_batch': 0.00016020183831873074, 'time_algorithm_update': 0.003978945503772145, 'loss': 0.17803579807071618, 'time_step': 0.0042118804555543714, 'init_value': -9.314943313598633, 'ave_value': -12.875974654982603, 'soft_opc': nan} step=5680




2022-04-20 15:41.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154112/model_5680.pt


Epoch 17/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 15:41.41 [info     ] FQE_20220420154112: epoch=17 step=6035 epoch=17 metrics={'time_sample_batch': 0.00015915078176579006, 'time_algorithm_update': 0.004505264926964129, 'loss': 0.19795282289490734, 'time_step': 0.0047335154573682325, 'init_value': -9.89979362487793, 'ave_value': -13.717962572151169, 'soft_opc': nan} step=6035




2022-04-20 15:41.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154112/model_6035.pt


Epoch 18/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 15:41.43 [info     ] FQE_20220420154112: epoch=18 step=6390 epoch=18 metrics={'time_sample_batch': 0.000159560458760866, 'time_algorithm_update': 0.004026239019044688, 'loss': 0.2198293792477376, 'time_step': 0.004258392226528114, 'init_value': -10.178354263305664, 'ave_value': -14.163522887153208, 'soft_opc': nan} step=6390




2022-04-20 15:41.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154112/model_6390.pt


Epoch 19/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 15:41.44 [info     ] FQE_20220420154112: epoch=19 step=6745 epoch=19 metrics={'time_sample_batch': 0.00015881565255178532, 'time_algorithm_update': 0.0044959471259318606, 'loss': 0.2399721292385333, 'time_step': 0.004726270890571702, 'init_value': -10.720532417297363, 'ave_value': -14.883738618912691, 'soft_opc': nan} step=6745




2022-04-20 15:41.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154112/model_6745.pt


Epoch 20/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 15:41.46 [info     ] FQE_20220420154112: epoch=20 step=7100 epoch=20 metrics={'time_sample_batch': 0.00015925890962842484, 'time_algorithm_update': 0.004175147204331949, 'loss': 0.26559150439332907, 'time_step': 0.004406359497929962, 'init_value': -11.267888069152832, 'ave_value': -15.550284562071012, 'soft_opc': nan} step=7100




2022-04-20 15:41.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154112/model_7100.pt


Epoch 21/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 15:41.48 [info     ] FQE_20220420154112: epoch=21 step=7455 epoch=21 metrics={'time_sample_batch': 0.00015862088807871644, 'time_algorithm_update': 0.004024965662351796, 'loss': 0.2846714978644126, 'time_step': 0.004254875720386774, 'init_value': -11.957890510559082, 'ave_value': -16.29147913972997, 'soft_opc': nan} step=7455




2022-04-20 15:41.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154112/model_7455.pt


Epoch 22/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 15:41.50 [info     ] FQE_20220420154112: epoch=22 step=7810 epoch=22 metrics={'time_sample_batch': 0.00016399034312073612, 'time_algorithm_update': 0.004866643019125495, 'loss': 0.3174170969237744, 'time_step': 0.0051080495538845866, 'init_value': -12.611903190612793, 'ave_value': -17.014328447103193, 'soft_opc': nan} step=7810




2022-04-20 15:41.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154112/model_7810.pt


Epoch 23/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 15:41.51 [info     ] FQE_20220420154112: epoch=23 step=8165 epoch=23 metrics={'time_sample_batch': 0.00016046107654840174, 'time_algorithm_update': 0.00401711128127407, 'loss': 0.3456873270407529, 'time_step': 0.004252067082364794, 'init_value': -13.263999938964844, 'ave_value': -17.644975198680676, 'soft_opc': nan} step=8165




2022-04-20 15:41.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154112/model_8165.pt


Epoch 24/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 15:41.53 [info     ] FQE_20220420154112: epoch=24 step=8520 epoch=24 metrics={'time_sample_batch': 0.0001619829258448641, 'time_algorithm_update': 0.004549411988594163, 'loss': 0.36773079893433713, 'time_step': 0.004788595521953744, 'init_value': -13.700782775878906, 'ave_value': -18.06816374841344, 'soft_opc': nan} step=8520




2022-04-20 15:41.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154112/model_8520.pt


Epoch 25/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 15:41.55 [info     ] FQE_20220420154112: epoch=25 step=8875 epoch=25 metrics={'time_sample_batch': 0.0001604933134267028, 'time_algorithm_update': 0.0041472482009672785, 'loss': 0.4083234611321503, 'time_step': 0.004383022012844891, 'init_value': -14.259284019470215, 'ave_value': -18.561297411378593, 'soft_opc': nan} step=8875




2022-04-20 15:41.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154112/model_8875.pt


Epoch 26/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 15:41.56 [info     ] FQE_20220420154112: epoch=26 step=9230 epoch=26 metrics={'time_sample_batch': 0.00015990163238955215, 'time_algorithm_update': 0.004076597724162357, 'loss': 0.4383587017445497, 'time_step': 0.0043086051940917965, 'init_value': -14.996626853942871, 'ave_value': -19.38628263639207, 'soft_opc': nan} step=9230




2022-04-20 15:41.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154112/model_9230.pt


Epoch 27/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 15:41.58 [info     ] FQE_20220420154112: epoch=27 step=9585 epoch=27 metrics={'time_sample_batch': 0.0001647902206635811, 'time_algorithm_update': 0.0045267225990832695, 'loss': 0.4699131737922279, 'time_step': 0.00476466501262826, 'init_value': -14.926641464233398, 'ave_value': -19.298936038710742, 'soft_opc': nan} step=9585




2022-04-20 15:41.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154112/model_9585.pt


Epoch 28/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 15:42.00 [info     ] FQE_20220420154112: epoch=28 step=9940 epoch=28 metrics={'time_sample_batch': 0.00015980290694975516, 'time_algorithm_update': 0.004017925262451172, 'loss': 0.47825849472322096, 'time_step': 0.004252316918171627, 'init_value': -15.4757719039917, 'ave_value': -19.868216847422193, 'soft_opc': nan} step=9940




2022-04-20 15:42.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154112/model_9940.pt


Epoch 29/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 15:42.02 [info     ] FQE_20220420154112: epoch=29 step=10295 epoch=29 metrics={'time_sample_batch': 0.0001615007158736108, 'time_algorithm_update': 0.004496818193247621, 'loss': 0.5140985082047926, 'time_step': 0.004732847885346748, 'init_value': -15.662694931030273, 'ave_value': -20.184257880693238, 'soft_opc': nan} step=10295




2022-04-20 15:42.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154112/model_10295.pt


Epoch 30/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 15:42.03 [info     ] FQE_20220420154112: epoch=30 step=10650 epoch=30 metrics={'time_sample_batch': 0.0001608237414292886, 'time_algorithm_update': 0.004086523996272557, 'loss': 0.5464238626069167, 'time_step': 0.00432063962372256, 'init_value': -16.245494842529297, 'ave_value': -20.887670451241572, 'soft_opc': nan} step=10650




2022-04-20 15:42.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154112/model_10650.pt


Epoch 31/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 15:42.05 [info     ] FQE_20220420154112: epoch=31 step=11005 epoch=31 metrics={'time_sample_batch': 0.00016087545475489657, 'time_algorithm_update': 0.004277240054708132, 'loss': 0.5789903049895041, 'time_step': 0.004513199228635976, 'init_value': -17.054588317871094, 'ave_value': -21.638505109335313, 'soft_opc': nan} step=11005




2022-04-20 15:42.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154112/model_11005.pt


Epoch 32/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 15:42.07 [info     ] FQE_20220420154112: epoch=32 step=11360 epoch=32 metrics={'time_sample_batch': 0.00016355111565388424, 'time_algorithm_update': 0.004430948848455725, 'loss': 0.6008683373967947, 'time_step': 0.0046685856832584865, 'init_value': -17.206758499145508, 'ave_value': -21.90610028048126, 'soft_opc': nan} step=11360




2022-04-20 15:42.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154112/model_11360.pt


Epoch 33/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 15:42.08 [info     ] FQE_20220420154112: epoch=33 step=11715 epoch=33 metrics={'time_sample_batch': 0.00016134087468536808, 'time_algorithm_update': 0.004053949302350971, 'loss': 0.623584472115191, 'time_step': 0.004288252306656098, 'init_value': -17.498132705688477, 'ave_value': -22.225216901026176, 'soft_opc': nan} step=11715




2022-04-20 15:42.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154112/model_11715.pt


Epoch 34/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 15:42.10 [info     ] FQE_20220420154112: epoch=34 step=12070 epoch=34 metrics={'time_sample_batch': 0.00015977469968124174, 'time_algorithm_update': 0.004477159741898658, 'loss': 0.6541712511330843, 'time_step': 0.004712654167497662, 'init_value': -17.622716903686523, 'ave_value': -22.44915581591801, 'soft_opc': nan} step=12070




2022-04-20 15:42.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154112/model_12070.pt


Epoch 35/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 15:42.12 [info     ] FQE_20220420154112: epoch=35 step=12425 epoch=35 metrics={'time_sample_batch': 0.00015864439413581095, 'time_algorithm_update': 0.004063457166644889, 'loss': 0.6816689884421271, 'time_step': 0.004293330958191777, 'init_value': -17.820505142211914, 'ave_value': -22.82845194583694, 'soft_opc': nan} step=12425




2022-04-20 15:42.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154112/model_12425.pt


Epoch 36/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 15:42.14 [info     ] FQE_20220420154112: epoch=36 step=12780 epoch=36 metrics={'time_sample_batch': 0.0001628707832014057, 'time_algorithm_update': 0.004356889321770466, 'loss': 0.703792142631932, 'time_step': 0.004593915670690402, 'init_value': -18.02971839904785, 'ave_value': -23.241389048107187, 'soft_opc': nan} step=12780




2022-04-20 15:42.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154112/model_12780.pt


Epoch 37/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 15:42.15 [info     ] FQE_20220420154112: epoch=37 step=13135 epoch=37 metrics={'time_sample_batch': 0.00016044898771903885, 'time_algorithm_update': 0.0042725502605169595, 'loss': 0.7381561170528892, 'time_step': 0.004506420753371548, 'init_value': -18.71783447265625, 'ave_value': -24.05458304210059, 'soft_opc': nan} step=13135




2022-04-20 15:42.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154112/model_13135.pt


Epoch 38/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 15:42.17 [info     ] FQE_20220420154112: epoch=38 step=13490 epoch=38 metrics={'time_sample_batch': 0.00016407362172301388, 'time_algorithm_update': 0.0040739959394428095, 'loss': 0.7605822823312081, 'time_step': 0.0043126777863838305, 'init_value': -18.640609741210938, 'ave_value': -24.26392743432537, 'soft_opc': nan} step=13490




2022-04-20 15:42.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154112/model_13490.pt


Epoch 39/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 15:42.19 [info     ] FQE_20220420154112: epoch=39 step=13845 epoch=39 metrics={'time_sample_batch': 0.00016044160010109484, 'time_algorithm_update': 0.0045099016646264305, 'loss': 0.7877860594855648, 'time_step': 0.004745289305566063, 'init_value': -18.777494430541992, 'ave_value': -24.347748814782122, 'soft_opc': nan} step=13845




2022-04-20 15:42.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154112/model_13845.pt


Epoch 40/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 15:42.20 [info     ] FQE_20220420154112: epoch=40 step=14200 epoch=40 metrics={'time_sample_batch': 0.00015833075617400694, 'time_algorithm_update': 0.004112156344131685, 'loss': 0.8084244112103758, 'time_step': 0.004343731302610585, 'init_value': -18.634464263916016, 'ave_value': -24.565143163962485, 'soft_opc': nan} step=14200




2022-04-20 15:42.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154112/model_14200.pt


Epoch 41/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 15:42.22 [info     ] FQE_20220420154112: epoch=41 step=14555 epoch=41 metrics={'time_sample_batch': 0.00016273579127352002, 'time_algorithm_update': 0.004461031900325292, 'loss': 0.8295223652476996, 'time_step': 0.004698353753962987, 'init_value': -17.996719360351562, 'ave_value': -24.269080518625266, 'soft_opc': nan} step=14555




2022-04-20 15:42.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154112/model_14555.pt


Epoch 42/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 15:42.24 [info     ] FQE_20220420154112: epoch=42 step=14910 epoch=42 metrics={'time_sample_batch': 0.00016288220042913733, 'time_algorithm_update': 0.004234084948687486, 'loss': 0.8432686206005828, 'time_step': 0.004471223455079844, 'init_value': -18.742048263549805, 'ave_value': -25.08449730697439, 'soft_opc': nan} step=14910




2022-04-20 15:42.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154112/model_14910.pt


Epoch 43/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 15:42.25 [info     ] FQE_20220420154112: epoch=43 step=15265 epoch=43 metrics={'time_sample_batch': 0.00015841604958117847, 'time_algorithm_update': 0.00411439277756382, 'loss': 0.8627946598586482, 'time_step': 0.004348118204466054, 'init_value': -18.10987663269043, 'ave_value': -24.694036526688432, 'soft_opc': nan} step=15265




2022-04-20 15:42.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154112/model_15265.pt


Epoch 44/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 15:42.27 [info     ] FQE_20220420154112: epoch=44 step=15620 epoch=44 metrics={'time_sample_batch': 0.00016141004965338908, 'time_algorithm_update': 0.004470202620600311, 'loss': 0.885107129376749, 'time_step': 0.004707159122950594, 'init_value': -18.03306007385254, 'ave_value': -24.823836590194333, 'soft_opc': nan} step=15620




2022-04-20 15:42.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154112/model_15620.pt


Epoch 45/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 15:42.29 [info     ] FQE_20220420154112: epoch=45 step=15975 epoch=45 metrics={'time_sample_batch': 0.00016080359338035047, 'time_algorithm_update': 0.004091460268262406, 'loss': 0.8923990946368012, 'time_step': 0.004324565135257345, 'init_value': -18.401784896850586, 'ave_value': -25.218212363979223, 'soft_opc': nan} step=15975




2022-04-20 15:42.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154112/model_15975.pt


Epoch 46/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 15:42.31 [info     ] FQE_20220420154112: epoch=46 step=16330 epoch=46 metrics={'time_sample_batch': 0.00016083918826680787, 'time_algorithm_update': 0.004528466076918051, 'loss': 0.9204019630511462, 'time_step': 0.004766326555064026, 'init_value': -18.69255256652832, 'ave_value': -25.515864755794347, 'soft_opc': nan} step=16330




2022-04-20 15:42.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154112/model_16330.pt


Epoch 47/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 15:42.32 [info     ] FQE_20220420154112: epoch=47 step=16685 epoch=47 metrics={'time_sample_batch': 0.00016157123404489436, 'time_algorithm_update': 0.004153290600843833, 'loss': 0.9395402054026932, 'time_step': 0.004388196703413842, 'init_value': -18.586523056030273, 'ave_value': -25.669957602269555, 'soft_opc': nan} step=16685




2022-04-20 15:42.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154112/model_16685.pt


Epoch 48/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 15:42.34 [info     ] FQE_20220420154112: epoch=48 step=17040 epoch=48 metrics={'time_sample_batch': 0.0001595436687200842, 'time_algorithm_update': 0.0041737469149307465, 'loss': 0.9577202916250263, 'time_step': 0.0044057375948194045, 'init_value': -18.833799362182617, 'ave_value': -25.9709511955023, 'soft_opc': nan} step=17040




2022-04-20 15:42.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154112/model_17040.pt


Epoch 49/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 15:42.36 [info     ] FQE_20220420154112: epoch=49 step=17395 epoch=49 metrics={'time_sample_batch': 0.00016192248169804964, 'time_algorithm_update': 0.004511525597370846, 'loss': 0.9777180288826496, 'time_step': 0.004750249755214637, 'init_value': -18.39996337890625, 'ave_value': -25.792837757261488, 'soft_opc': nan} step=17395




2022-04-20 15:42.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154112/model_17395.pt


Epoch 50/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 15:42.37 [info     ] FQE_20220420154112: epoch=50 step=17750 epoch=50 metrics={'time_sample_batch': 0.00016313203623597052, 'time_algorithm_update': 0.004098326723340532, 'loss': 0.9982835920448874, 'time_step': 0.0043357439444098676, 'init_value': -18.381345748901367, 'ave_value': -25.96684782535816, 'soft_opc': nan} step=17750




2022-04-20 15:42.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420154112/model_17750.pt
search iteration:  2
using hyper params:  [0.008096776650201774, 0.0028515353905198488, 3.4445390341283954e-05, 5]
2022-04-20 15:42.38 [debug    ] RoundIterator is selected.
2022-04-20 15:42.38 [info     ] Directory is created at d3rlpy_logs/CQL_20220420154238
2022-04-20 15:42.38 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 15:42.38 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 15:42.38 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220420154238/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.008096776650201774, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'w

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:42.47 [info     ] CQL_20220420154238: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.0003593393236572979, 'time_algorithm_update': 0.025439981131525766, 'temp_loss': 4.569655593375714, 'temp': 0.994502309818714, 'alpha_loss': -16.941435646592524, 'alpha': 1.0173755835371407, 'critic_loss': 48.67130632567824, 'actor_loss': 3.151781189511394, 'time_step': 0.02589714945408336, 'td_error': 2.7029039596290545, 'init_value': -8.56751823425293, 'ave_value': -5.491705880620861} step=342
2022-04-20 15:42.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420154238/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:42.56 [info     ] CQL_20220420154238: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.00036428337208708827, 'time_algorithm_update': 0.025147842384918392, 'temp_loss': 4.275925103683917, 'temp': 0.9832013289482273, 'alpha_loss': -9.343316131167942, 'alpha': 1.044903624824613, 'critic_loss': 25.169623703984488, 'actor_loss': 7.5663174091026795, 'time_step': 0.025606225805672986, 'td_error': 4.243181565867523, 'init_value': -15.886909484863281, 'ave_value': -9.527229840953474} step=684
2022-04-20 15:42.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420154238/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:43.05 [info     ] CQL_20220420154238: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.00036337640550401475, 'time_algorithm_update': 0.02558634016248915, 'temp_loss': 3.7603567246108027, 'temp': 0.972715016171249, 'alpha_loss': -6.340278792799565, 'alpha': 1.0660170529320923, 'critic_loss': 30.832199336492526, 'actor_loss': 13.175124561577512, 'time_step': 0.026047638982360125, 'td_error': 5.8893076563543145, 'init_value': -23.67325210571289, 'ave_value': -14.007610597029194} step=1026
2022-04-20 15:43.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420154238/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:43.14 [info     ] CQL_20220420154238: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.00036037735074584247, 'time_algorithm_update': 0.024744112589205915, 'temp_loss': 3.38985882795345, 'temp': 0.9628324465096345, 'alpha_loss': -4.449216924215618, 'alpha': 1.0847058996819614, 'critic_loss': 41.45353786847745, 'actor_loss': 18.74990155125222, 'time_step': 0.025197869155839173, 'td_error': 8.111062218990549, 'init_value': -31.944156646728516, 'ave_value': -18.915611365449926} step=1368
2022-04-20 15:43.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420154238/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:43.23 [info     ] CQL_20220420154238: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.0003351182268376936, 'time_algorithm_update': 0.02241702177371198, 'temp_loss': 3.0679384748838103, 'temp': 0.9533803682578238, 'alpha_loss': -2.781830364849135, 'alpha': 1.1001758631209881, 'critic_loss': 55.323128281978136, 'actor_loss': 24.040553249113742, 'time_step': 0.02284185719071773, 'td_error': 11.425381059012613, 'init_value': -39.93975067138672, 'ave_value': -23.194174384860037} step=1710
2022-04-20 15:43.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420154238/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:43.31 [info     ] CQL_20220420154238: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.000367161126164665, 'time_algorithm_update': 0.023921201103611997, 'temp_loss': 2.7661066501461273, 'temp': 0.9442802568276724, 'alpha_loss': -1.1459938260830111, 'alpha': 1.1105649227287337, 'critic_loss': 72.0838482282315, 'actor_loss': 29.098706970437924, 'time_step': 0.02438607550503915, 'td_error': 14.298187933089379, 'init_value': -46.934104919433594, 'ave_value': -27.225678128946473} step=2052
2022-04-20 15:43.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420154238/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:43.40 [info     ] CQL_20220420154238: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.0003581925442344264, 'time_algorithm_update': 0.02377381101686355, 'temp_loss': 2.4635814226161665, 'temp': 0.9356112940269604, 'alpha_loss': 0.5017144743541027, 'alpha': 1.1123714614332767, 'critic_loss': 90.74148568493581, 'actor_loss': 33.86343204765989, 'time_step': 0.024224520426744608, 'td_error': 18.538260133714893, 'init_value': -53.56208419799805, 'ave_value': -30.931233287230135} step=2394
2022-04-20 15:43.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420154238/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:43.49 [info     ] CQL_20220420154238: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.00035569263480560127, 'time_algorithm_update': 0.023815670208624233, 'temp_loss': 2.215211868634698, 'temp': 0.9272812620589608, 'alpha_loss': 1.9943541653225558, 'alpha': 1.1024853029446295, 'critic_loss': 110.37052170156736, 'actor_loss': 38.45203447063067, 'time_step': 0.02426659712317394, 'td_error': 22.222269431633794, 'init_value': -59.886962890625, 'ave_value': -34.55263131626152} step=2736
2022-04-20 15:43.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420154238/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:43.57 [info     ] CQL_20220420154238: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.0003661042765567177, 'time_algorithm_update': 0.023845151153921385, 'temp_loss': 1.9665300887927675, 'temp': 0.9192785104812934, 'alpha_loss': 3.3854760944581868, 'alpha': 1.0796140024536534, 'critic_loss': 130.4803215606868, 'actor_loss': 42.7776592433104, 'time_step': 0.024306477161875943, 'td_error': 26.339033065313544, 'init_value': -67.88182067871094, 'ave_value': -39.380255668225054} step=3078
2022-04-20 15:43.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420154238/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:44.06 [info     ] CQL_20220420154238: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.0003603153061448482, 'time_algorithm_update': 0.02362879744747229, 'temp_loss': 1.7536879512998793, 'temp': 0.9115908768093377, 'alpha_loss': 4.764611235836096, 'alpha': 1.0438941887944762, 'critic_loss': 150.78834056296543, 'actor_loss': 47.003702799479164, 'time_step': 0.02408850471875821, 'td_error': 31.967692352566377, 'init_value': -74.30799102783203, 'ave_value': -43.27322502838733} step=3420
2022-04-20 15:44.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420154238/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:44.15 [info     ] CQL_20220420154238: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.0003598572915060478, 'time_algorithm_update': 0.023840856830976163, 'temp_loss': 1.5917552732236204, 'temp': 0.9040807201499828, 'alpha_loss': 5.596754511197408, 'alpha': 1.0025782752455326, 'critic_loss': 173.72632509644268, 'actor_loss': 50.872043074223036, 'time_step': 0.02429574135451289, 'td_error': 32.99223112481745, 'init_value': -78.44718933105469, 'ave_value': -45.92914605612153} step=3762
2022-04-20 15:44.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420154238/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:44.23 [info     ] CQL_20220420154238: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.00036409444976271245, 'time_algorithm_update': 0.023824441502665914, 'temp_loss': 1.4068121615557643, 'temp': 0.8968121629012259, 'alpha_loss': 6.16532576223563, 'alpha': 0.9613255394829644, 'critic_loss': 196.49535771420128, 'actor_loss': 54.49703345940127, 'time_step': 0.02428311632390608, 'td_error': 37.04667999705388, 'init_value': -82.87342834472656, 'ave_value': -48.091863215248864} step=4104
2022-04-20 15:44.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420154238/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:44.32 [info     ] CQL_20220420154238: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.0003552450771220246, 'time_algorithm_update': 0.023925874665466664, 'temp_loss': 1.3124538510166415, 'temp': 0.8895703423441502, 'alpha_loss': 6.549614849146346, 'alpha': 0.9226955992785114, 'critic_loss': 220.1729805929619, 'actor_loss': 58.06763042204561, 'time_step': 0.02437725401761239, 'td_error': 40.25594870314042, 'init_value': -88.90094757080078, 'ave_value': -52.06565154828333} step=4446
2022-04-20 15:44.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420154238/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:44.41 [info     ] CQL_20220420154238: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.0003632753216035185, 'time_algorithm_update': 0.023948548132913153, 'temp_loss': 1.1667695388807888, 'temp': 0.882536247285486, 'alpha_loss': 6.858157957506458, 'alpha': 0.8863069352350736, 'critic_loss': 242.93272261591682, 'actor_loss': 61.2393205876936, 'time_step': 0.024408414349918476, 'td_error': 43.513610159540875, 'init_value': -92.93717956542969, 'ave_value': -54.87695644095808} step=4788
2022-04-20 15:44.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420154238/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:44.50 [info     ] CQL_20220420154238: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.00036274550253884835, 'time_algorithm_update': 0.023852138491401897, 'temp_loss': 1.0313500768957082, 'temp': 0.8756024593498275, 'alpha_loss': 7.085014935822515, 'alpha': 0.8522595357128054, 'critic_loss': 266.7760044120209, 'actor_loss': 64.23902858488741, 'time_step': 0.024311697971053987, 'td_error': 45.56847398072249, 'init_value': -97.91996765136719, 'ave_value': -57.95107719678331} step=5130
2022-04-20 15:44.50 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420154238/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:44.58 [info     ] CQL_20220420154238: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.0003660248036970172, 'time_algorithm_update': 0.024145732846176417, 'temp_loss': 0.9294036862424069, 'temp': 0.8688956997547931, 'alpha_loss': 7.267385664041976, 'alpha': 0.8205040348203558, 'critic_loss': 289.8785558332477, 'actor_loss': 67.09205665365297, 'time_step': 0.024610313058596605, 'td_error': 47.065320267318356, 'init_value': -101.2090835571289, 'ave_value': -60.272935907875635} step=5472
2022-04-20 15:44.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420154238/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:45.07 [info     ] CQL_20220420154238: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.0003637186965050056, 'time_algorithm_update': 0.024017702069198878, 'temp_loss': 0.8045259125051443, 'temp': 0.8623912264729103, 'alpha_loss': 7.345651088402286, 'alpha': 0.7905008771963287, 'critic_loss': 311.94614869948714, 'actor_loss': 69.73945732562862, 'time_step': 0.024476889281245005, 'td_error': 49.52351117502067, 'init_value': -105.55789947509766, 'ave_value': -63.098009364737855} step=5814
2022-04-20 15:45.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420154238/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:45.16 [info     ] CQL_20220420154238: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.0003617409377070198, 'time_algorithm_update': 0.024091096649393004, 'temp_loss': 0.7496991568076158, 'temp': 0.8561384301436575, 'alpha_loss': 7.487141959151329, 'alpha': 0.7618794005516677, 'critic_loss': 333.4712556537829, 'actor_loss': 72.30439445986386, 'time_step': 0.024549845366450083, 'td_error': 50.80218537839294, 'init_value': -109.47003173828125, 'ave_value': -64.42031365288807} step=6156
2022-04-20 15:45.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420154238/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:45.25 [info     ] CQL_20220420154238: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.00035926263931899044, 'time_algorithm_update': 0.02392094804529558, 'temp_loss': 0.6554001605040148, 'temp': 0.849794000735757, 'alpha_loss': 7.440488468136704, 'alpha': 0.7351095761820586, 'critic_loss': 355.943309315464, 'actor_loss': 74.8486045145849, 'time_step': 0.024373631031192534, 'td_error': 52.658488800465676, 'init_value': -113.3971176147461, 'ave_value': -67.11289300533163} step=6498
2022-04-20 15:45.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420154238/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:45.33 [info     ] CQL_20220420154238: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.0003611867190801609, 'time_algorithm_update': 0.023966572438067164, 'temp_loss': 0.5411871051067236, 'temp': 0.8440944838245013, 'alpha_loss': 7.553568533289503, 'alpha': 0.709566912281583, 'critic_loss': 376.27748429147823, 'actor_loss': 77.03817952027795, 'time_step': 0.024423500250654612, 'td_error': 56.47709023468935, 'init_value': -114.42752838134766, 'ave_value': -68.82890360364371} step=6840
2022-04-20 15:45.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420154238/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:45.42 [info     ] CQL_20220420154238: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.00036012150390803463, 'time_algorithm_update': 0.024242707860400104, 'temp_loss': 0.4662967094911905, 'temp': 0.8385566378894606, 'alpha_loss': 7.613926647699367, 'alpha': 0.6848927755453433, 'critic_loss': 396.4715171055487, 'actor_loss': 79.25527690865142, 'time_step': 0.024698407329313936, 'td_error': 56.95252059206827, 'init_value': -118.29239654541016, 'ave_value': -70.0875039407611} step=7182
2022-04-20 15:45.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420154238/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:45.52 [info     ] CQL_20220420154238: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.0003678477995576914, 'time_algorithm_update': 0.02515876293182373, 'temp_loss': 0.3616750301815003, 'temp': 0.8336004451004385, 'alpha_loss': 7.580203685147024, 'alpha': 0.6615548053680108, 'critic_loss': 415.4206085205078, 'actor_loss': 81.20834984137998, 'time_step': 0.025620759579173306, 'td_error': 59.321552934072365, 'init_value': -123.51472473144531, 'ave_value': -74.6981026809911} step=7524
2022-04-20 15:45.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420154238/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:46.01 [info     ] CQL_20220420154238: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.00036493100618061264, 'time_algorithm_update': 0.02534468132152892, 'temp_loss': 0.3222837629646324, 'temp': 0.829028559532779, 'alpha_loss': 7.452136470560442, 'alpha': 0.6392621000607809, 'critic_loss': 435.7756849143937, 'actor_loss': 83.11220193606371, 'time_step': 0.025803022217332272, 'td_error': 58.05740965239349, 'init_value': -125.10074615478516, 'ave_value': -74.80445636720002} step=7866
2022-04-20 15:46.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420154238/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:46.10 [info     ] CQL_20220420154238: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.0003637535530224181, 'time_algorithm_update': 0.02517138726530019, 'temp_loss': 0.2803048730081721, 'temp': 0.8247053199692777, 'alpha_loss': 7.423752072261788, 'alpha': 0.6179207403060288, 'critic_loss': 453.85716292174936, 'actor_loss': 84.86327263904595, 'time_step': 0.025634067100391053, 'td_error': 60.21396715778058, 'init_value': -124.8772201538086, 'ave_value': -74.69413701403114} step=8208
2022-04-20 15:46.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420154238/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:46.19 [info     ] CQL_20220420154238: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.000367289398148743, 'time_algorithm_update': 0.02550638070580555, 'temp_loss': 0.20295608380253902, 'temp': 0.8208019780485254, 'alpha_loss': 7.318160756986741, 'alpha': 0.5974799173268658, 'critic_loss': 469.26200143914474, 'actor_loss': 86.26628487570244, 'time_step': 0.025972403978046617, 'td_error': 62.33055008928715, 'init_value': -127.3038101196289, 'ave_value': -76.94542391338999} step=8550
2022-04-20 15:46.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420154238/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:46.28 [info     ] CQL_20220420154238: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.0003609120497229504, 'time_algorithm_update': 0.024995222426297373, 'temp_loss': 0.19122640848464784, 'temp': 0.8168837827200081, 'alpha_loss': 7.139202509707178, 'alpha': 0.5782394945970055, 'critic_loss': 486.6475096585458, 'actor_loss': 87.8117725974635, 'time_step': 0.025452240168699743, 'td_error': 64.98842152443525, 'init_value': -133.22097778320312, 'ave_value': -79.7021776968214} step=8892
2022-04-20 15:46.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420154238/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:46.38 [info     ] CQL_20220420154238: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.00036454200744628906, 'time_algorithm_update': 0.025360965589333696, 'temp_loss': 0.1247080972646935, 'temp': 0.8139807407618963, 'alpha_loss': 7.010713085096482, 'alpha': 0.5593805741845516, 'critic_loss': 500.72977353815446, 'actor_loss': 89.10799657810502, 'time_step': 0.025822152868348953, 'td_error': 61.373960614358424, 'init_value': -133.3439483642578, 'ave_value': -79.47414853842946} step=9234
2022-04-20 15:46.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420154238/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:46.47 [info     ] CQL_20220420154238: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.00036698126653481646, 'time_algorithm_update': 0.025127040015326604, 'temp_loss': 0.1245770510340556, 'temp': 0.8112950199528745, 'alpha_loss': 6.820756613859657, 'alpha': 0.5415235688114723, 'critic_loss': 518.1177512721011, 'actor_loss': 90.56400598001758, 'time_step': 0.025590318685386613, 'td_error': 64.55427626685015, 'init_value': -135.84841918945312, 'ave_value': -81.86768689386614} step=9576
2022-04-20 15:46.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420154238/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:46.56 [info     ] CQL_20220420154238: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.0003619737792433354, 'time_algorithm_update': 0.02539260485018903, 'temp_loss': 0.08005131137889554, 'temp': 0.8087335185349336, 'alpha_loss': 6.708312652264422, 'alpha': 0.5243999581587943, 'critic_loss': 532.1992300825509, 'actor_loss': 91.6428953248855, 'time_step': 0.02585105380119636, 'td_error': 64.42349996709842, 'init_value': -136.46878051757812, 'ave_value': -82.42029552855941} step=9918
2022-04-20 15:46.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420154238/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:47.05 [info     ] CQL_20220420154238: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.00036123203255279717, 'time_algorithm_update': 0.025127841018096744, 'temp_loss': 0.06254615610715938, 'temp': 0.8070276850148251, 'alpha_loss': 6.683893762833891, 'alpha': 0.5077042724305426, 'critic_loss': 546.3850798132823, 'actor_loss': 92.90678303144132, 'time_step': 0.025582867756224516, 'td_error': 65.12832867147712, 'init_value': -140.5232696533203, 'ave_value': -84.36639631488033} step=10260
2022-04-20 15:47.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420154238/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:47.15 [info     ] CQL_20220420154238: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.0003657940535517464, 'time_algorithm_update': 0.025390961713958205, 'temp_loss': 0.05461225793179539, 'temp': 0.8052597187067333, 'alpha_loss': 6.455684568449768, 'alpha': 0.4916360632717958, 'critic_loss': 560.67674451683, 'actor_loss': 94.09389667622527, 'time_step': 0.025853817923027173, 'td_error': 62.77280504724577, 'init_value': -137.9063720703125, 'ave_value': -84.31363778270244} step=10602
2022-04-20 15:47.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420154238/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:47.24 [info     ] CQL_20220420154238: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.00036605268891094723, 'time_algorithm_update': 0.02515276342804669, 'temp_loss': 0.019826573129111562, 'temp': 0.8042766281038697, 'alpha_loss': 6.385928146323265, 'alpha': 0.47601649539861063, 'critic_loss': 571.5540796469527, 'actor_loss': 95.01115698563426, 'time_step': 0.02561457882150572, 'td_error': 62.90221739326015, 'init_value': -137.15115356445312, 'ave_value': -82.5610662837773} step=10944
2022-04-20 15:47.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420154238/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:47.33 [info     ] CQL_20220420154238: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.0003645712869209156, 'time_algorithm_update': 0.025337791582297164, 'temp_loss': -0.023008699697709224, 'temp': 0.8043279982449716, 'alpha_loss': 6.256318177396094, 'alpha': 0.4609731393076523, 'critic_loss': 581.7778677243239, 'actor_loss': 95.99841406749702, 'time_step': 0.025800165377165143, 'td_error': 63.7412896447085, 'init_value': -139.29312133789062, 'ave_value': -84.29726933837071} step=11286
2022-04-20 15:47.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420154238/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:47.42 [info     ] CQL_20220420154238: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.00036535834708409, 'time_algorithm_update': 0.024991914542794924, 'temp_loss': -0.05734441090614824, 'temp': 0.805237954297261, 'alpha_loss': 6.278958327588979, 'alpha': 0.4462844741623304, 'critic_loss': 589.6662735966911, 'actor_loss': 96.7768655966597, 'time_step': 0.02545364418922112, 'td_error': 64.86356725949446, 'init_value': -145.63595581054688, 'ave_value': -86.52282719868663} step=11628
2022-04-20 15:47.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420154238/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:47.52 [info     ] CQL_20220420154238: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.00037003748598154524, 'time_algorithm_update': 0.025398423100075528, 'temp_loss': -0.11613911493240218, 'temp': 0.808481250241486, 'alpha_loss': 6.053273828406083, 'alpha': 0.4320084409058443, 'critic_loss': 596.2094258983233, 'actor_loss': 97.27348026476409, 'time_step': 0.025864612289339478, 'td_error': 65.62026945465435, 'init_value': -144.89248657226562, 'ave_value': -86.10128383615196} step=11970
2022-04-20 15:47.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420154238/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:48.01 [info     ] CQL_20220420154238: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.0003561143986662926, 'time_algorithm_update': 0.02474255241148653, 'temp_loss': -0.11439377909297482, 'temp': 0.8122888650113379, 'alpha_loss': 5.777205299215707, 'alpha': 0.4186264836823034, 'critic_loss': 597.5862448173657, 'actor_loss': 97.48318224901344, 'time_step': 0.025191024032949705, 'td_error': 65.71157347740491, 'init_value': -145.77880859375, 'ave_value': -86.9397464749801} step=12312
2022-04-20 15:48.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420154238/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:48.10 [info     ] CQL_20220420154238: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.0003439243773967899, 'time_algorithm_update': 0.024504520739728246, 'temp_loss': -0.09774831509250298, 'temp': 0.815341138526013, 'alpha_loss': 5.57428737202583, 'alpha': 0.40579861132373585, 'critic_loss': 601.0734818664907, 'actor_loss': 97.87279258415712, 'time_step': 0.024938944487543833, 'td_error': 61.84417295866398, 'init_value': -144.6110382080078, 'ave_value': -85.95211413423019} step=12654
2022-04-20 15:48.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420154238/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:48.19 [info     ] CQL_20220420154238: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.0003670614365248652, 'time_algorithm_update': 0.025303466975340368, 'temp_loss': -0.04011322738744362, 'temp': 0.8175338098877355, 'alpha_loss': 5.323593821441918, 'alpha': 0.39351208734582044, 'critic_loss': 604.128269753261, 'actor_loss': 98.21690901817634, 'time_step': 0.025767347966021266, 'td_error': 64.83457995643451, 'init_value': -148.52017211914062, 'ave_value': -87.89610547813982} step=12996
2022-04-20 15:48.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420154238/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:48.28 [info     ] CQL_20220420154238: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.0003697112289785642, 'time_algorithm_update': 0.02575862825962535, 'temp_loss': -0.04969393116100664, 'temp': 0.8184941004939944, 'alpha_loss': 5.157850297570926, 'alpha': 0.3817065528958862, 'critic_loss': 607.6917384632847, 'actor_loss': 98.64913239953114, 'time_step': 0.026225322171261434, 'td_error': 63.91459578002841, 'init_value': -148.53335571289062, 'ave_value': -87.62972646586329} step=13338
2022-04-20 15:48.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420154238/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:48.37 [info     ] CQL_20220420154238: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.0003667156598721331, 'time_algorithm_update': 0.025333594857600696, 'temp_loss': -0.03892759412897435, 'temp': 0.820399922934192, 'alpha_loss': 4.959742378770259, 'alpha': 0.37020152214674923, 'critic_loss': 608.4414344475283, 'actor_loss': 98.81791046767206, 'time_step': 0.02579514534152739, 'td_error': 68.34408257753974, 'init_value': -151.0658721923828, 'ave_value': -88.59280710936734} step=13680
2022-04-20 15:48.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420154238/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:48.47 [info     ] CQL_20220420154238: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.000363307389599538, 'time_algorithm_update': 0.024946580853378565, 'temp_loss': -0.030043665237986204, 'temp': 0.8217635975595106, 'alpha_loss': 4.788060946771276, 'alpha': 0.35909686956489295, 'critic_loss': 608.4022033869871, 'actor_loss': 98.96158454850404, 'time_step': 0.025405009587605793, 'td_error': 65.87669185321025, 'init_value': -149.0559539794922, 'ave_value': -88.62222652636804} step=14022
2022-04-20 15:48.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420154238/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:48.55 [info     ] CQL_20220420154238: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.0003659272054482622, 'time_algorithm_update': 0.02407372485824496, 'temp_loss': -0.012722330603596062, 'temp': 0.8224418001216754, 'alpha_loss': 4.59047924426564, 'alpha': 0.34843254516347805, 'critic_loss': 608.9189813625046, 'actor_loss': 99.04771878426536, 'time_step': 0.024537129708897997, 'td_error': 63.49213516988018, 'init_value': -147.5091552734375, 'ave_value': -87.82351139304606} step=14364
2022-04-20 15:48.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420154238/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:49.04 [info     ] CQL_20220420154238: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.0003644883284094738, 'time_algorithm_update': 0.024078208103514555, 'temp_loss': 0.01751365586679581, 'temp': 0.8222041926537341, 'alpha_loss': 4.477665434803879, 'alpha': 0.33808624195425135, 'critic_loss': 609.0743096781056, 'actor_loss': 99.21248113621049, 'time_step': 0.024537840781853212, 'td_error': 59.684626625402565, 'init_value': -147.0665740966797, 'ave_value': -87.72343310375337} step=14706
2022-04-20 15:49.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420154238/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:49.13 [info     ] CQL_20220420154238: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.0003696143278601574, 'time_algorithm_update': 0.023994739292657864, 'temp_loss': 0.011012711121063484, 'temp': 0.8216742508941226, 'alpha_loss': 4.307209451296176, 'alpha': 0.3279696548717064, 'critic_loss': 607.7040562322962, 'actor_loss': 99.19401447675382, 'time_step': 0.024460028486642225, 'td_error': 65.85706777757929, 'init_value': -151.8430633544922, 'ave_value': -89.48874629755446} step=15048
2022-04-20 15:49.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420154238/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:49.22 [info     ] CQL_20220420154238: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.0003678659249467459, 'time_algorithm_update': 0.023899720426191363, 'temp_loss': 0.028249251274991106, 'temp': 0.8211061771501574, 'alpha_loss': 4.123129698965284, 'alpha': 0.31818287466701706, 'critic_loss': 608.936445358901, 'actor_loss': 99.46106260422377, 'time_step': 0.024361419398882235, 'td_error': 63.829822197917444, 'init_value': -150.06776428222656, 'ave_value': -88.66622780888602} step=15390
2022-04-20 15:49.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420154238/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:49.30 [info     ] CQL_20220420154238: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.0003643314740811175, 'time_algorithm_update': 0.0240492458231965, 'temp_loss': 0.031729976412409926, 'temp': 0.8202254672845205, 'alpha_loss': 4.001145903130023, 'alpha': 0.3087969360470075, 'critic_loss': 606.086576852185, 'actor_loss': 99.26821227938112, 'time_step': 0.024509182450366995, 'td_error': 61.12011631395194, 'init_value': -146.6929168701172, 'ave_value': -87.58292615750788} step=15732
2022-04-20 15:49.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420154238/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:49.39 [info     ] CQL_20220420154238: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.0003716520398680927, 'time_algorithm_update': 0.02362810310564543, 'temp_loss': 0.029675368900405386, 'temp': 0.8193005534291965, 'alpha_loss': 3.835736095556739, 'alpha': 0.29957837014518984, 'critic_loss': 602.8816249914337, 'actor_loss': 99.27407665141145, 'time_step': 0.02409772357048347, 'td_error': 58.518761526995995, 'init_value': -143.2991943359375, 'ave_value': -87.55046788804401} step=16074
2022-04-20 15:49.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420154238/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:49.48 [info     ] CQL_20220420154238: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.0003617890397010491, 'time_algorithm_update': 0.024169383690371152, 'temp_loss': 0.06457864201752937, 'temp': 0.8175881227903199, 'alpha_loss': 3.647832499610053, 'alpha': 0.290800044672531, 'critic_loss': 601.4917803312603, 'actor_loss': 99.2100741960849, 'time_step': 0.0246284816697327, 'td_error': 60.35613369957922, 'init_value': -148.63233947753906, 'ave_value': -88.72017911343626} step=16416
2022-04-20 15:49.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420154238/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:49.57 [info     ] CQL_20220420154238: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.00037165970830192345, 'time_algorithm_update': 0.0241964748728345, 'temp_loss': 0.07449897124884072, 'temp': 0.8145885591269934, 'alpha_loss': 3.5548549744120814, 'alpha': 0.28226588152305426, 'critic_loss': 598.375111183925, 'actor_loss': 99.08106459232799, 'time_step': 0.0246657335270218, 'td_error': 60.98806815155663, 'init_value': -148.0045623779297, 'ave_value': -88.2861598937284} step=16758
2022-04-20 15:49.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420154238/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:50.05 [info     ] CQL_20220420154238: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.0003674364926522238, 'time_algorithm_update': 0.02389246678491782, 'temp_loss': 0.1438080133674176, 'temp': 0.811600832388415, 'alpha_loss': 3.249569892883301, 'alpha': 0.2740552850634034, 'critic_loss': 598.6665127402857, 'actor_loss': 99.26663504706488, 'time_step': 0.02435680021319473, 'td_error': 58.192903009530866, 'init_value': -148.5013427734375, 'ave_value': -89.27896885240938} step=17100
2022-04-20 15:50.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420154238/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.5191004

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 15:50.06 [info     ] FQE_20220420155006: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00015291104833763767, 'time_algorithm_update': 0.0035453805004257754, 'loss': 0.008909579252835408, 'time_step': 0.0037740626967096903, 'init_value': -0.26613277196884155, 'ave_value': -0.20373924239496657, 'soft_opc': nan} step=166




2022-04-20 15:50.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155006/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:50.07 [info     ] FQE_20220420155006: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00015331032764480775, 'time_algorithm_update': 0.003489586244146508, 'loss': 0.006087622275654541, 'time_step': 0.0037096121225012354, 'init_value': -0.3801109790802002, 'ave_value': -0.2535347253495307, 'soft_opc': nan} step=332




2022-04-20 15:50.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155006/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:50.08 [info     ] FQE_20220420155006: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.0001517275729811335, 'time_algorithm_update': 0.0035200894597065017, 'loss': 0.0057098018566527995, 'time_step': 0.0037426733108888188, 'init_value': -0.43203920125961304, 'ave_value': -0.2837012877410932, 'soft_opc': nan} step=498




2022-04-20 15:50.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155006/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:50.08 [info     ] FQE_20220420155006: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00015446651412780025, 'time_algorithm_update': 0.0033597256763871894, 'loss': 0.005687261749260935, 'time_step': 0.003583954041262707, 'init_value': -0.4764871299266815, 'ave_value': -0.30018714192734514, 'soft_opc': nan} step=664




2022-04-20 15:50.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155006/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:50.09 [info     ] FQE_20220420155006: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00015229058552937335, 'time_algorithm_update': 0.0035210158451493964, 'loss': 0.005377728177840452, 'time_step': 0.0037397304213190653, 'init_value': -0.5415724515914917, 'ave_value': -0.3126636356669101, 'soft_opc': nan} step=830




2022-04-20 15:50.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155006/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:50.10 [info     ] FQE_20220420155006: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.000152003334229251, 'time_algorithm_update': 0.003415359071938388, 'loss': 0.00500331870813192, 'time_step': 0.003635739705648767, 'init_value': -0.592496395111084, 'ave_value': -0.3479863874790435, 'soft_opc': nan} step=996




2022-04-20 15:50.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155006/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:50.10 [info     ] FQE_20220420155006: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00015541157090520285, 'time_algorithm_update': 0.0035922182611672275, 'loss': 0.004835727783088314, 'time_step': 0.00381850047283862, 'init_value': -0.6956788897514343, 'ave_value': -0.40443293623424864, 'soft_opc': nan} step=1162




2022-04-20 15:50.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155006/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:50.11 [info     ] FQE_20220420155006: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00015249309769595963, 'time_algorithm_update': 0.003316770116966891, 'loss': 0.004609304506202644, 'time_step': 0.003537881805236081, 'init_value': -0.7801369428634644, 'ave_value': -0.4502669557836746, 'soft_opc': nan} step=1328




2022-04-20 15:50.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155006/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:50.12 [info     ] FQE_20220420155006: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00015637099024761155, 'time_algorithm_update': 0.0035828711038612456, 'loss': 0.004330025339241038, 'time_step': 0.0038109041122068843, 'init_value': -0.8005213141441345, 'ave_value': -0.4471062877156713, 'soft_opc': nan} step=1494




2022-04-20 15:50.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155006/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:50.12 [info     ] FQE_20220420155006: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00015305323773119823, 'time_algorithm_update': 0.0034407449055867023, 'loss': 0.004411280619440578, 'time_step': 0.0036675585321633212, 'init_value': -0.9232006072998047, 'ave_value': -0.512533571264027, 'soft_opc': nan} step=1660




2022-04-20 15:50.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155006/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:50.13 [info     ] FQE_20220420155006: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00015434299606874766, 'time_algorithm_update': 0.0035851317715932087, 'loss': 0.004325289812208299, 'time_step': 0.003810605370854757, 'init_value': -1.0444086790084839, 'ave_value': -0.582333865995007, 'soft_opc': nan} step=1826




2022-04-20 15:50.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155006/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:50.14 [info     ] FQE_20220420155006: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.0001528535980776132, 'time_algorithm_update': 0.003331592284053205, 'loss': 0.004259904262368533, 'time_step': 0.0035554687660860726, 'init_value': -1.1142133474349976, 'ave_value': -0.6173791295706151, 'soft_opc': nan} step=1992




2022-04-20 15:50.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155006/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:50.14 [info     ] FQE_20220420155006: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00015908551503376788, 'time_algorithm_update': 0.003484952880675534, 'loss': 0.004442252548332943, 'time_step': 0.0037146031138408616, 'init_value': -1.2220574617385864, 'ave_value': -0.670924024085875, 'soft_opc': nan} step=2158




2022-04-20 15:50.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155006/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:50.15 [info     ] FQE_20220420155006: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00015199615294674793, 'time_algorithm_update': 0.0034225662070584586, 'loss': 0.004489989855876529, 'time_step': 0.003647735319941877, 'init_value': -1.2981680631637573, 'ave_value': -0.689913250469007, 'soft_opc': nan} step=2324




2022-04-20 15:50.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155006/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:50.16 [info     ] FQE_20220420155006: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00015258358185549816, 'time_algorithm_update': 0.0035170316696166992, 'loss': 0.004651335833863501, 'time_step': 0.003738910318857216, 'init_value': -1.4043550491333008, 'ave_value': -0.7512355368869664, 'soft_opc': nan} step=2490




2022-04-20 15:50.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155006/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:50.16 [info     ] FQE_20220420155006: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00015784171690423805, 'time_algorithm_update': 0.0034174531339162804, 'loss': 0.0048811616991781236, 'time_step': 0.003647122038416116, 'init_value': -1.5130656957626343, 'ave_value': -0.8004210750094136, 'soft_opc': nan} step=2656




2022-04-20 15:50.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155006/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:50.17 [info     ] FQE_20220420155006: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.0001572413616869823, 'time_algorithm_update': 0.003592208207371723, 'loss': 0.004982589182292436, 'time_step': 0.0038193162665309676, 'init_value': -1.6028556823730469, 'ave_value': -0.8530329738684871, 'soft_opc': nan} step=2822




2022-04-20 15:50.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155006/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:50.18 [info     ] FQE_20220420155006: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00015223744403885072, 'time_algorithm_update': 0.0034009577280067534, 'loss': 0.00523557221862005, 'time_step': 0.003621099943138031, 'init_value': -1.6716296672821045, 'ave_value': -0.8890466241249898, 'soft_opc': nan} step=2988




2022-04-20 15:50.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155006/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:50.19 [info     ] FQE_20220420155006: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.0001625856721257589, 'time_algorithm_update': 0.0035625811082771026, 'loss': 0.0056591890804912805, 'time_step': 0.0037965946886912884, 'init_value': -1.7481987476348877, 'ave_value': -0.9090811792987625, 'soft_opc': nan} step=3154




2022-04-20 15:50.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155006/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:50.19 [info     ] FQE_20220420155006: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00015905391739075444, 'time_algorithm_update': 0.0034950727439788452, 'loss': 0.006011669890091093, 'time_step': 0.0037285261843577923, 'init_value': -1.8464257717132568, 'ave_value': -0.9656685987202165, 'soft_opc': nan} step=3320




2022-04-20 15:50.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155006/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:50.20 [info     ] FQE_20220420155006: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00015667260411274, 'time_algorithm_update': 0.0034885406494140625, 'loss': 0.006214409900046548, 'time_step': 0.0037197980536035746, 'init_value': -1.981508493423462, 'ave_value': -1.049760293752492, 'soft_opc': nan} step=3486




2022-04-20 15:50.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155006/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:50.21 [info     ] FQE_20220420155006: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.0001568162297628012, 'time_algorithm_update': 0.0033985893410372445, 'loss': 0.006897492773959929, 'time_step': 0.0036276377827288158, 'init_value': -2.036569118499756, 'ave_value': -1.0555982653269762, 'soft_opc': nan} step=3652




2022-04-20 15:50.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155006/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:50.21 [info     ] FQE_20220420155006: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00015095917575330618, 'time_algorithm_update': 0.003554232149239046, 'loss': 0.0070398036798697905, 'time_step': 0.0037758594535919556, 'init_value': -2.1519436836242676, 'ave_value': -1.119346591212728, 'soft_opc': nan} step=3818




2022-04-20 15:50.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155006/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:50.22 [info     ] FQE_20220420155006: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00015573041984833866, 'time_algorithm_update': 0.003366486135735569, 'loss': 0.007572788853164628, 'time_step': 0.0035936401551028333, 'init_value': -2.199018716812134, 'ave_value': -1.1302099223726907, 'soft_opc': nan} step=3984




2022-04-20 15:50.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155006/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:50.23 [info     ] FQE_20220420155006: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00015466902629438653, 'time_algorithm_update': 0.003558166055794222, 'loss': 0.007738836922441575, 'time_step': 0.0037855082247630657, 'init_value': -2.3408498764038086, 'ave_value': -1.2004284768642204, 'soft_opc': nan} step=4150




2022-04-20 15:50.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155006/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:50.23 [info     ] FQE_20220420155006: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.0001570101243903838, 'time_algorithm_update': 0.003443572894636407, 'loss': 0.00889807778090822, 'time_step': 0.0036715254726180113, 'init_value': -2.4728565216064453, 'ave_value': -1.2719721168481015, 'soft_opc': nan} step=4316




2022-04-20 15:50.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155006/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:50.24 [info     ] FQE_20220420155006: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00015771245381918298, 'time_algorithm_update': 0.0034849859145750484, 'loss': 0.009572091905809715, 'time_step': 0.003713146749749241, 'init_value': -2.523641347885132, 'ave_value': -1.2793322358934862, 'soft_opc': nan} step=4482




2022-04-20 15:50.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155006/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:50.25 [info     ] FQE_20220420155006: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00015880975378565042, 'time_algorithm_update': 0.0034630399152456998, 'loss': 0.00998170416401026, 'time_step': 0.003700382738228304, 'init_value': -2.655817985534668, 'ave_value': -1.3578335177086227, 'soft_opc': nan} step=4648




2022-04-20 15:50.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155006/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:50.25 [info     ] FQE_20220420155006: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00015758606324712914, 'time_algorithm_update': 0.003564629210046975, 'loss': 0.010903878410273871, 'time_step': 0.0037943182221378186, 'init_value': -2.778130054473877, 'ave_value': -1.4290122127982678, 'soft_opc': nan} step=4814




2022-04-20 15:50.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155006/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:50.26 [info     ] FQE_20220420155006: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00015754872057811324, 'time_algorithm_update': 0.003575155533939959, 'loss': 0.011364162541892245, 'time_step': 0.0038074800767094255, 'init_value': -2.849891185760498, 'ave_value': -1.4521963306442573, 'soft_opc': nan} step=4980




2022-04-20 15:50.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155006/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:50.27 [info     ] FQE_20220420155006: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00015458715967385164, 'time_algorithm_update': 0.00349271297454834, 'loss': 0.012448607850679567, 'time_step': 0.0037215330514563136, 'init_value': -2.9321885108947754, 'ave_value': -1.4928301828502266, 'soft_opc': nan} step=5146




2022-04-20 15:50.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155006/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:50.27 [info     ] FQE_20220420155006: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.0001567300543727645, 'time_algorithm_update': 0.0035053304879062146, 'loss': 0.012730388721193087, 'time_step': 0.0037328852228371494, 'init_value': -3.0358777046203613, 'ave_value': -1.5601882279061854, 'soft_opc': nan} step=5312




2022-04-20 15:50.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155006/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:50.28 [info     ] FQE_20220420155006: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.0001542525119092091, 'time_algorithm_update': 0.0034935187144451833, 'loss': 0.013192454284247774, 'time_step': 0.003719186208334314, 'init_value': -3.0204780101776123, 'ave_value': -1.4986676898541618, 'soft_opc': nan} step=5478




2022-04-20 15:50.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155006/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:50.29 [info     ] FQE_20220420155006: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.0001561598605420216, 'time_algorithm_update': 0.0035302667732698373, 'loss': 0.014255789471145001, 'time_step': 0.0037577267152717315, 'init_value': -3.2203149795532227, 'ave_value': -1.6421096946481983, 'soft_opc': nan} step=5644




2022-04-20 15:50.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155006/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:50.30 [info     ] FQE_20220420155006: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00015954224460096243, 'time_algorithm_update': 0.003508732979556164, 'loss': 0.015068168336026236, 'time_step': 0.003743931471583355, 'init_value': -3.343716859817505, 'ave_value': -1.7363034076888013, 'soft_opc': nan} step=5810




2022-04-20 15:50.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155006/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:50.30 [info     ] FQE_20220420155006: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.0001601081296622035, 'time_algorithm_update': 0.003613754927393902, 'loss': 0.015130817481311015, 'time_step': 0.003844130470092038, 'init_value': -3.3690295219421387, 'ave_value': -1.697139008667085, 'soft_opc': nan} step=5976




2022-04-20 15:50.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155006/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:50.31 [info     ] FQE_20220420155006: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.000158419092017484, 'time_algorithm_update': 0.0034750886710293322, 'loss': 0.01724649916882683, 'time_step': 0.0037053895283894367, 'init_value': -3.3519208431243896, 'ave_value': -1.6876851327264228, 'soft_opc': nan} step=6142




2022-04-20 15:50.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155006/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:50.32 [info     ] FQE_20220420155006: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00016076737139598433, 'time_algorithm_update': 0.003602297909288521, 'loss': 0.017134167121425944, 'time_step': 0.0038361707365656473, 'init_value': -3.4253320693969727, 'ave_value': -1.723398775108964, 'soft_opc': nan} step=6308




2022-04-20 15:50.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155006/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:50.32 [info     ] FQE_20220420155006: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00015647727322865682, 'time_algorithm_update': 0.0034085986125900083, 'loss': 0.017764870936460567, 'time_step': 0.0036330409796841174, 'init_value': -3.51993465423584, 'ave_value': -1.7632737597534518, 'soft_opc': nan} step=6474




2022-04-20 15:50.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155006/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:50.33 [info     ] FQE_20220420155006: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.0001547365303499153, 'time_algorithm_update': 0.003519282283553158, 'loss': 0.018324868252826292, 'time_step': 0.003746006862226739, 'init_value': -3.596388816833496, 'ave_value': -1.8051413421024074, 'soft_opc': nan} step=6640




2022-04-20 15:50.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155006/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:50.34 [info     ] FQE_20220420155006: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00015310781547822147, 'time_algorithm_update': 0.003339096724268902, 'loss': 0.019148600240748555, 'time_step': 0.0035638407052281393, 'init_value': -3.591683864593506, 'ave_value': -1.7610463976339847, 'soft_opc': nan} step=6806




2022-04-20 15:50.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155006/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:50.34 [info     ] FQE_20220420155006: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.0001556643520493105, 'time_algorithm_update': 0.003569512482149055, 'loss': 0.020174573683348226, 'time_step': 0.0037922744291374482, 'init_value': -3.6823296546936035, 'ave_value': -1.8161522307584212, 'soft_opc': nan} step=6972




2022-04-20 15:50.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155006/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:50.35 [info     ] FQE_20220420155006: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.0001549419150295028, 'time_algorithm_update': 0.003330652972301805, 'loss': 0.01940320227407368, 'time_step': 0.0035557617624121978, 'init_value': -3.7005512714385986, 'ave_value': -1.8178038852113123, 'soft_opc': nan} step=7138




2022-04-20 15:50.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155006/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:50.36 [info     ] FQE_20220420155006: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00015592431447592126, 'time_algorithm_update': 0.0034749910055872904, 'loss': 0.02037905209981107, 'time_step': 0.003700799252613481, 'init_value': -3.7930498123168945, 'ave_value': -1.9083112988872706, 'soft_opc': nan} step=7304




2022-04-20 15:50.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155006/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:50.36 [info     ] FQE_20220420155006: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.0001553828457751906, 'time_algorithm_update': 0.0034739095044423298, 'loss': 0.02023713440839089, 'time_step': 0.003699726369007524, 'init_value': -3.787959098815918, 'ave_value': -1.8945861146139267, 'soft_opc': nan} step=7470




2022-04-20 15:50.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155006/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:50.37 [info     ] FQE_20220420155006: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00015411175877214913, 'time_algorithm_update': 0.0035109879022621245, 'loss': 0.021725646647938853, 'time_step': 0.0037374051220445746, 'init_value': -3.9006099700927734, 'ave_value': -1.9519117227436655, 'soft_opc': nan} step=7636




2022-04-20 15:50.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155006/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:50.38 [info     ] FQE_20220420155006: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00015316813825124717, 'time_algorithm_update': 0.0034284146435289496, 'loss': 0.02248651147693822, 'time_step': 0.003650721297206649, 'init_value': -3.9611639976501465, 'ave_value': -1.9942022077554653, 'soft_opc': nan} step=7802




2022-04-20 15:50.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155006/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:50.38 [info     ] FQE_20220420155006: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00015593724078442677, 'time_algorithm_update': 0.003515904208263719, 'loss': 0.02366139425710966, 'time_step': 0.0037433052637490883, 'init_value': -4.013954162597656, 'ave_value': -2.025504771222335, 'soft_opc': nan} step=7968




2022-04-20 15:50.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155006/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:50.39 [info     ] FQE_20220420155006: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00015555806906826524, 'time_algorithm_update': 0.003422777336764048, 'loss': 0.023998609462090915, 'time_step': 0.0036466638725924203, 'init_value': -4.131939888000488, 'ave_value': -2.1124921095062485, 'soft_opc': nan} step=8134




2022-04-20 15:50.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155006/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:50.40 [info     ] FQE_20220420155006: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.0001555149813732469, 'time_algorithm_update': 0.003504201590296734, 'loss': 0.024486883049718588, 'time_step': 0.003730949149074325, 'init_value': -4.167132377624512, 'ave_value': -2.13779607775898, 'soft_opc': nan} step=8300




2022-04-20 15:50.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155006/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

2022-04-20 15:50.40 [info     ] Directory is created at d3rlpy_logs/FQE_20220420155040
2022-04-20 15:50.40 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 15:50.40 [debug    ] Building models...
2022-04-20 15:50.40 [debug    ] Models have been built.
2022-04-20 15:50.40 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420155040/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 15:50.42 [info     ] FQE_20220420155040: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.0001601826312930085, 'time_algorithm_update': 0.003592755212340244, 'loss': 0.025039499046194347, 'time_step': 0.0038245375766310583, 'init_value': -1.1137810945510864, 'ave_value': -1.1119234929213653, 'soft_opc': nan} step=344




2022-04-20 15:50.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155040/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:50.43 [info     ] FQE_20220420155040: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00015832657037779342, 'time_algorithm_update': 0.003477638544038285, 'loss': 0.02306547449349404, 'time_step': 0.003706273644469505, 'init_value': -1.9017218351364136, 'ave_value': -1.879210159048304, 'soft_opc': nan} step=688




2022-04-20 15:50.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155040/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:50.44 [info     ] FQE_20220420155040: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00016473268353661826, 'time_algorithm_update': 0.0034923560397569524, 'loss': 0.025184882238918786, 'time_step': 0.0037319923556128213, 'init_value': -2.8582749366760254, 'ave_value': -2.820148993196251, 'soft_opc': nan} step=1032




2022-04-20 15:50.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155040/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:50.46 [info     ] FQE_20220420155040: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00016187928443731262, 'time_algorithm_update': 0.003440395344135373, 'loss': 0.027406590175814927, 'time_step': 0.0036723149377246235, 'init_value': -3.569769859313965, 'ave_value': -3.527686042345322, 'soft_opc': nan} step=1376




2022-04-20 15:50.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155040/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:50.47 [info     ] FQE_20220420155040: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00015804726024006688, 'time_algorithm_update': 0.0035311392573423163, 'loss': 0.034531697224782304, 'time_step': 0.0037634525188179903, 'init_value': -4.48218297958374, 'ave_value': -4.455279464254508, 'soft_opc': nan} step=1720




2022-04-20 15:50.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155040/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:50.49 [info     ] FQE_20220420155040: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.0001648532789806987, 'time_algorithm_update': 0.0034683215063671734, 'loss': 0.040879116315113075, 'time_step': 0.003705894531205643, 'init_value': -4.999579429626465, 'ave_value': -4.932492141828344, 'soft_opc': nan} step=2064




2022-04-20 15:50.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155040/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:50.50 [info     ] FQE_20220420155040: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.000161225019499313, 'time_algorithm_update': 0.0034340730933255926, 'loss': 0.04955657450536396, 'time_step': 0.003670530263767686, 'init_value': -5.797905921936035, 'ave_value': -5.745897919261778, 'soft_opc': nan} step=2408




2022-04-20 15:50.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155040/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:50.51 [info     ] FQE_20220420155040: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.0001633395982343097, 'time_algorithm_update': 0.0034226948438688767, 'loss': 0.059933544883807734, 'time_step': 0.0036584138870239258, 'init_value': -6.184256553649902, 'ave_value': -6.130562978862105, 'soft_opc': nan} step=2752




2022-04-20 15:50.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155040/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:50.53 [info     ] FQE_20220420155040: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00016002668890842173, 'time_algorithm_update': 0.003619062346081401, 'loss': 0.06912165372036831, 'time_step': 0.0038516888784807784, 'init_value': -6.736062049865723, 'ave_value': -6.731165421613165, 'soft_opc': nan} step=3096




2022-04-20 15:50.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155040/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:50.54 [info     ] FQE_20220420155040: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00016376514767491542, 'time_algorithm_update': 0.0044482516687969825, 'loss': 0.08631487492320322, 'time_step': 0.004685677761255309, 'init_value': -7.30891227722168, 'ave_value': -7.4573106905327995, 'soft_opc': nan} step=3440




2022-04-20 15:50.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155040/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:50.56 [info     ] FQE_20220420155040: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00016231592311415562, 'time_algorithm_update': 0.004277249408322711, 'loss': 0.10129031289076476, 'time_step': 0.004513190929279771, 'init_value': -7.745396614074707, 'ave_value': -8.027870401703156, 'soft_opc': nan} step=3784




2022-04-20 15:50.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155040/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:50.58 [info     ] FQE_20220420155040: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.0001643722833589066, 'time_algorithm_update': 0.0040443802988806435, 'loss': 0.11818973345649537, 'time_step': 0.004282920859580816, 'init_value': -8.015392303466797, 'ave_value': -8.4883426617788, 'soft_opc': nan} step=4128




2022-04-20 15:50.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155040/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:50.59 [info     ] FQE_20220420155040: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00016431198563686636, 'time_algorithm_update': 0.004489249961320744, 'loss': 0.13183205354103256, 'time_step': 0.00472622555355693, 'init_value': -8.342391967773438, 'ave_value': -8.990159256681842, 'soft_opc': nan} step=4472




2022-04-20 15:50.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155040/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:51.01 [info     ] FQE_20220420155040: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00016233325004577637, 'time_algorithm_update': 0.004068198592163796, 'loss': 0.15398774441158355, 'time_step': 0.004303627929022146, 'init_value': -8.669576644897461, 'ave_value': -9.675536058160795, 'soft_opc': nan} step=4816




2022-04-20 15:51.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155040/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:51.03 [info     ] FQE_20220420155040: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00016000866889953613, 'time_algorithm_update': 0.004471868969673334, 'loss': 0.1742468833338556, 'time_step': 0.004701968542365141, 'init_value': -8.901158332824707, 'ave_value': -10.025852111609325, 'soft_opc': nan} step=5160




2022-04-20 15:51.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155040/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:51.04 [info     ] FQE_20220420155040: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.0001632883105167123, 'time_algorithm_update': 0.004072133191796236, 'loss': 0.18996643166187718, 'time_step': 0.004310198301492736, 'init_value': -9.56249713897705, 'ave_value': -10.833667271005762, 'soft_opc': nan} step=5504




2022-04-20 15:51.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155040/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:51.06 [info     ] FQE_20220420155040: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00016417406326116516, 'time_algorithm_update': 0.004407276940900226, 'loss': 0.21552758117552934, 'time_step': 0.004643427771191264, 'init_value': -9.603206634521484, 'ave_value': -11.069332607962645, 'soft_opc': nan} step=5848




2022-04-20 15:51.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155040/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:51.08 [info     ] FQE_20220420155040: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00016450881958007812, 'time_algorithm_update': 0.004298083310903505, 'loss': 0.2361081226317342, 'time_step': 0.0045372185318969015, 'init_value': -9.93903923034668, 'ave_value': -11.523749253641697, 'soft_opc': nan} step=6192




2022-04-20 15:51.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155040/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:51.09 [info     ] FQE_20220420155040: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00016216275303862815, 'time_algorithm_update': 0.0041677508243294645, 'loss': 0.25810458099058026, 'time_step': 0.004403993833896725, 'init_value': -10.38236141204834, 'ave_value': -12.201022570563463, 'soft_opc': nan} step=6536




2022-04-20 15:51.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155040/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:51.11 [info     ] FQE_20220420155040: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.0001675542010817417, 'time_algorithm_update': 0.00450438191724378, 'loss': 0.278873048641524, 'time_step': 0.0047478966934736385, 'init_value': -10.548734664916992, 'ave_value': -12.415644236571818, 'soft_opc': nan} step=6880




2022-04-20 15:51.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155040/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:51.13 [info     ] FQE_20220420155040: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00016359672989956167, 'time_algorithm_update': 0.004045775463414746, 'loss': 0.3026088658222106, 'time_step': 0.004284708305846813, 'init_value': -10.92747688293457, 'ave_value': -12.698603443730086, 'soft_opc': nan} step=7224




2022-04-20 15:51.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155040/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:51.15 [info     ] FQE_20220420155040: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00017075205958166787, 'time_algorithm_update': 0.004512459732765375, 'loss': 0.3228347347561963, 'time_step': 0.004761038824569347, 'init_value': -11.145231246948242, 'ave_value': -12.781765490785023, 'soft_opc': nan} step=7568




2022-04-20 15:51.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155040/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:51.16 [info     ] FQE_20220420155040: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00016067679538283238, 'time_algorithm_update': 0.004084390263224757, 'loss': 0.3474183894347313, 'time_step': 0.004318540179452231, 'init_value': -11.807022094726562, 'ave_value': -13.30250626565083, 'soft_opc': nan} step=7912




2022-04-20 15:51.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155040/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:51.18 [info     ] FQE_20220420155040: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.0001653571461522302, 'time_algorithm_update': 0.004415867633597795, 'loss': 0.36912671767370125, 'time_step': 0.004654885724533436, 'init_value': -12.164843559265137, 'ave_value': -13.561221935484676, 'soft_opc': nan} step=8256




2022-04-20 15:51.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155040/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:51.20 [info     ] FQE_20220420155040: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00015957826791807662, 'time_algorithm_update': 0.00430705796840579, 'loss': 0.398077649762854, 'time_step': 0.004540729661320531, 'init_value': -12.802289962768555, 'ave_value': -14.123520012361025, 'soft_opc': nan} step=8600




2022-04-20 15:51.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155040/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:51.21 [info     ] FQE_20220420155040: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00016267147175101347, 'time_algorithm_update': 0.004039907871290695, 'loss': 0.4171316107453475, 'time_step': 0.004276426032532093, 'init_value': -12.959516525268555, 'ave_value': -14.249240553522114, 'soft_opc': nan} step=8944




2022-04-20 15:51.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155040/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:51.23 [info     ] FQE_20220420155040: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00016308939734170603, 'time_algorithm_update': 0.004516701365626136, 'loss': 0.4311536700345662, 'time_step': 0.004752731600473094, 'init_value': -13.421089172363281, 'ave_value': -14.559948391607241, 'soft_opc': nan} step=9288




2022-04-20 15:51.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155040/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:51.25 [info     ] FQE_20220420155040: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.0001591201438460239, 'time_algorithm_update': 0.004076727600984795, 'loss': 0.4588926543080963, 'time_step': 0.0043117681214975756, 'init_value': -13.871186256408691, 'ave_value': -14.995271325071954, 'soft_opc': nan} step=9632




2022-04-20 15:51.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155040/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:51.26 [info     ] FQE_20220420155040: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00016440485799035362, 'time_algorithm_update': 0.00453975519468618, 'loss': 0.48721668057057055, 'time_step': 0.00477787644364113, 'init_value': -14.45172119140625, 'ave_value': -15.48444941794439, 'soft_opc': nan} step=9976




2022-04-20 15:51.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155040/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:51.28 [info     ] FQE_20220420155040: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00016335969747498978, 'time_algorithm_update': 0.004026272269182427, 'loss': 0.5182380465368286, 'time_step': 0.00426336221916731, 'init_value': -14.960320472717285, 'ave_value': -15.790828044536347, 'soft_opc': nan} step=10320




2022-04-20 15:51.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155040/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:51.30 [info     ] FQE_20220420155040: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00016206641529881678, 'time_algorithm_update': 0.004344128592069759, 'loss': 0.534596464948642, 'time_step': 0.004579218321068342, 'init_value': -15.147275924682617, 'ave_value': -16.066875338019948, 'soft_opc': nan} step=10664




2022-04-20 15:51.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155040/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:51.31 [info     ] FQE_20220420155040: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00016595943029536756, 'time_algorithm_update': 0.004341514997704085, 'loss': 0.566977184606976, 'time_step': 0.0045821770679119024, 'init_value': -15.322385787963867, 'ave_value': -16.134733700329388, 'soft_opc': nan} step=11008




2022-04-20 15:51.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155040/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:51.33 [info     ] FQE_20220420155040: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00016158334044523017, 'time_algorithm_update': 0.004021694493848224, 'loss': 0.5804987319171169, 'time_step': 0.0042565756065900935, 'init_value': -15.522127151489258, 'ave_value': -16.42692412730298, 'soft_opc': nan} step=11352




2022-04-20 15:51.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155040/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:51.35 [info     ] FQE_20220420155040: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00016378039537474166, 'time_algorithm_update': 0.0045235718405523965, 'loss': 0.5930484559083747, 'time_step': 0.004762490821439166, 'init_value': -15.46310806274414, 'ave_value': -16.462707846561553, 'soft_opc': nan} step=11696




2022-04-20 15:51.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155040/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:51.36 [info     ] FQE_20220420155040: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00016261186710623808, 'time_algorithm_update': 0.004095523856406988, 'loss': 0.6192414436305245, 'time_step': 0.004332650539486907, 'init_value': -15.465656280517578, 'ave_value': -16.64731754923983, 'soft_opc': nan} step=12040




2022-04-20 15:51.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155040/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:51.38 [info     ] FQE_20220420155040: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00017114988593168036, 'time_algorithm_update': 0.004508448894633804, 'loss': 0.6432738018622839, 'time_step': 0.004755132420118465, 'init_value': -16.139646530151367, 'ave_value': -17.267407432616885, 'soft_opc': nan} step=12384




2022-04-20 15:51.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155040/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:51.40 [info     ] FQE_20220420155040: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00016427663869636003, 'time_algorithm_update': 0.0040718303170315055, 'loss': 0.6744522291213967, 'time_step': 0.004309562056563621, 'init_value': -16.549348831176758, 'ave_value': -17.715489158329664, 'soft_opc': nan} step=12728




2022-04-20 15:51.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155040/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:51.41 [info     ] FQE_20220420155040: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00016475763431815214, 'time_algorithm_update': 0.004379207311674606, 'loss': 0.6993414034178957, 'time_step': 0.004615556362063386, 'init_value': -16.625011444091797, 'ave_value': -17.879060703663676, 'soft_opc': nan} step=13072




2022-04-20 15:51.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155040/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:51.43 [info     ] FQE_20220420155040: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.0001674994479778201, 'time_algorithm_update': 0.00446029735165973, 'loss': 0.7233581090681688, 'time_step': 0.004703544600065364, 'init_value': -16.926511764526367, 'ave_value': -18.170030952479927, 'soft_opc': nan} step=13416




2022-04-20 15:51.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155040/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:51.45 [info     ] FQE_20220420155040: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.0001662449781284776, 'time_algorithm_update': 0.004122340401937795, 'loss': 0.7549110983097709, 'time_step': 0.004362104937087658, 'init_value': -16.895612716674805, 'ave_value': -18.293542012003428, 'soft_opc': nan} step=13760




2022-04-20 15:51.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155040/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:51.46 [info     ] FQE_20220420155040: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.0001658797264099121, 'time_algorithm_update': 0.004540479460427928, 'loss': 0.7690373767752113, 'time_step': 0.004781602427016857, 'init_value': -16.83074378967285, 'ave_value': -18.638858743908035, 'soft_opc': nan} step=14104




2022-04-20 15:51.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155040/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:51.48 [info     ] FQE_20220420155040: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00016459060269732807, 'time_algorithm_update': 0.004108550936676735, 'loss': 0.8047652562354627, 'time_step': 0.004348055567852286, 'init_value': -16.612377166748047, 'ave_value': -18.799631844920754, 'soft_opc': nan} step=14448




2022-04-20 15:51.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155040/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:51.50 [info     ] FQE_20220420155040: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00016307692195093908, 'time_algorithm_update': 0.00452353510745736, 'loss': 0.81280099398045, 'time_step': 0.004760549512020377, 'init_value': -16.611125946044922, 'ave_value': -18.95132373205537, 'soft_opc': nan} step=14792




2022-04-20 15:51.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155040/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:51.51 [info     ] FQE_20220420155040: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00016375752382500228, 'time_algorithm_update': 0.004153738881266395, 'loss': 0.8322774592602928, 'time_step': 0.004391275173009828, 'init_value': -16.86091423034668, 'ave_value': -19.63613472745986, 'soft_opc': nan} step=15136




2022-04-20 15:51.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155040/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:51.53 [info     ] FQE_20220420155040: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00016160759814949922, 'time_algorithm_update': 0.004205206105875415, 'loss': 0.8733292579661708, 'time_step': 0.0044434250787247056, 'init_value': -16.94464683532715, 'ave_value': -19.77283091209619, 'soft_opc': nan} step=15480




2022-04-20 15:51.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155040/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:51.55 [info     ] FQE_20220420155040: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00016801856284917786, 'time_algorithm_update': 0.004490783741307813, 'loss': 0.896426095470223, 'time_step': 0.004731159570605256, 'init_value': -17.50005531311035, 'ave_value': -20.697869409738225, 'soft_opc': nan} step=15824




2022-04-20 15:51.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155040/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:51.56 [info     ] FQE_20220420155040: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00016208651453949685, 'time_algorithm_update': 0.004076045612956203, 'loss': 0.9133137187184116, 'time_step': 0.00430936522262041, 'init_value': -17.054214477539062, 'ave_value': -20.62022460140613, 'soft_opc': nan} step=16168




2022-04-20 15:51.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155040/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:51.58 [info     ] FQE_20220420155040: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.000166549239047738, 'time_algorithm_update': 0.004489285308261251, 'loss': 0.9271976626748869, 'time_step': 0.0047320092833319375, 'init_value': -16.787437438964844, 'ave_value': -20.846144126697133, 'soft_opc': nan} step=16512




2022-04-20 15:51.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155040/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:52.00 [info     ] FQE_20220420155040: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00015948608864185422, 'time_algorithm_update': 0.004046711117722267, 'loss': 0.9410177324898541, 'time_step': 0.004279073587683744, 'init_value': -17.352798461914062, 'ave_value': -21.462613136557547, 'soft_opc': nan} step=16856




2022-04-20 15:52.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155040/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 15:52.02 [info     ] FQE_20220420155040: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.000166047451108001, 'time_algorithm_update': 0.004471958376640497, 'loss': 0.9674893495204404, 'time_step': 0.004713391148766806, 'init_value': -17.190921783447266, 'ave_value': -21.456933030589333, 'soft_opc': nan} step=17200




2022-04-20 15:52.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155040/model_17200.pt
most optimal hyper params for cql at this point:  [0.008096776650201774, 0.0028515353905198488, 3.4445390341283954e-05, 5]
search iteration:  3
using hyper params:  [0.006478216278601315, 0.006419595903539504, 6.329045586121242e-05, 3]
2022-04-20 15:52.02 [debug    ] RoundIterator is selected.
2022-04-20 15:52.02 [info     ] Directory is created at d3rlpy_logs/CQL_20220420155202
2022-04-20 15:52.02 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 15:52.02 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 15:52.02 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220420155202/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_l

  minimum = torch.tensor(
  maximum = torch.tensor(


Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:52.11 [info     ] CQL_20220420155202: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.00033698862756204886, 'time_algorithm_update': 0.025976560269182887, 'temp_loss': 4.603432383453637, 'temp': 0.9894114730999484, 'alpha_loss': -13.360177596410116, 'alpha': 1.015844190329836, 'critic_loss': 25.05879012883058, 'actor_loss': 1.7128189408781322, 'time_step': 0.026410877356055188, 'td_error': 4.616067063463322, 'init_value': -6.798381805419922, 'ave_value': -3.4264036937574813} step=342
2022-04-20 15:52.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420155202/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:52.20 [info     ] CQL_20220420155202: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.00034808415418479874, 'time_algorithm_update': 0.025446875750669957, 'temp_loss': 3.647199031205205, 'temp': 0.969947308999056, 'alpha_loss': -5.005110136598175, 'alpha': 1.0374611703275938, 'critic_loss': 22.154469543033176, 'actor_loss': 5.235131797734757, 'time_step': 0.025892168457745113, 'td_error': 5.077146466968612, 'init_value': -10.584111213684082, 'ave_value': -5.163511632658743} step=684
2022-04-20 15:52.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420155202/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:52.30 [info     ] CQL_20220420155202: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.0003409525107221994, 'time_algorithm_update': 0.02552725348556251, 'temp_loss': 2.8605031744081373, 'temp': 0.9534231985521595, 'alpha_loss': -0.5546473522795833, 'alpha': 1.0469940723034374, 'critic_loss': 43.29274553443953, 'actor_loss': 8.937011692259047, 'time_step': 0.025968871618572035, 'td_error': 6.830001482848471, 'init_value': -16.06545066833496, 'ave_value': -7.584899129616516} step=1026
2022-04-20 15:52.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420155202/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:52.39 [info     ] CQL_20220420155202: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.00034913542674996, 'time_algorithm_update': 0.02554310971533346, 'temp_loss': 2.3109361867458498, 'temp': 0.938600101143296, 'alpha_loss': 2.7970752900586144, 'alpha': 1.0408863777305648, 'critic_loss': 71.44543860809148, 'actor_loss': 13.083989268855044, 'time_step': 0.025992774823952836, 'td_error': 9.503141700452385, 'init_value': -23.135839462280273, 'ave_value': -11.392788795124035} step=1368
2022-04-20 15:52.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420155202/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:52.48 [info     ] CQL_20220420155202: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.00034334157642565275, 'time_algorithm_update': 0.02573686803293507, 'temp_loss': 1.8582624385231419, 'temp': 0.9252578738140084, 'alpha_loss': 5.279656936202133, 'alpha': 1.0169016269215367, 'critic_loss': 101.59171843947026, 'actor_loss': 17.065247452049924, 'time_step': 0.026178884227373446, 'td_error': 13.850136214662951, 'init_value': -30.49762535095215, 'ave_value': -14.87246897378349} step=1710
2022-04-20 15:52.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420155202/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:52.58 [info     ] CQL_20220420155202: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.0003405202899062843, 'time_algorithm_update': 0.025272477439969604, 'temp_loss': 1.5147481494479709, 'temp': 0.9131091549034007, 'alpha_loss': 7.1546165831604895, 'alpha': 0.9806198990135863, 'critic_loss': 135.4830557393749, 'actor_loss': 21.25523494698151, 'time_step': 0.02571073679896126, 'td_error': 20.876029557591945, 'init_value': -37.315528869628906, 'ave_value': -18.775810887357135} step=2052
2022-04-20 15:52.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420155202/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:53.07 [info     ] CQL_20220420155202: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.00034377170585052314, 'time_algorithm_update': 0.02583346729390105, 'temp_loss': 1.1534209626982783, 'temp': 0.9021680762893275, 'alpha_loss': 8.809313361407721, 'alpha': 0.939730249301732, 'critic_loss': 172.8208565070615, 'actor_loss': 25.42631999791017, 'time_step': 0.026277625072769255, 'td_error': 21.40548599978522, 'init_value': -44.85784149169922, 'ave_value': -22.946524803627717} step=2394
2022-04-20 15:53.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420155202/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:53.16 [info     ] CQL_20220420155202: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.00034554799397786457, 'time_algorithm_update': 0.025355749660068087, 'temp_loss': 0.8898277741078048, 'temp': 0.8929013285720557, 'alpha_loss': 10.369771868164776, 'alpha': 0.8984748928519021, 'critic_loss': 215.21915609794752, 'actor_loss': 29.788366618909333, 'time_step': 0.025799442452994006, 'td_error': 38.02908162411207, 'init_value': -53.35966110229492, 'ave_value': -26.85832205480955} step=2736
2022-04-20 15:53.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420155202/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:53.26 [info     ] CQL_20220420155202: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.00033869868830630655, 'time_algorithm_update': 0.025580239574811613, 'temp_loss': 0.6887261982222921, 'temp': 0.8845786474950132, 'alpha_loss': 11.478736153820105, 'alpha': 0.8595167919557695, 'critic_loss': 267.4395299543414, 'actor_loss': 34.56693386613277, 'time_step': 0.026018403426945558, 'td_error': 45.269202083819984, 'init_value': -58.493263244628906, 'ave_value': -29.604130071193516} step=3078
2022-04-20 15:53.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420155202/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:53.35 [info     ] CQL_20220420155202: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.00034383793323360687, 'time_algorithm_update': 0.025691353089628163, 'temp_loss': 0.4297351306903432, 'temp': 0.877759224315833, 'alpha_loss': 13.088145862545883, 'alpha': 0.8227419658013951, 'critic_loss': 326.1418257150037, 'actor_loss': 39.6316813976444, 'time_step': 0.026134677100599857, 'td_error': 53.793956962264865, 'init_value': -68.40309143066406, 'ave_value': -33.79783404431096} step=3420
2022-04-20 15:53.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420155202/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:53.44 [info     ] CQL_20220420155202: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.0003324314864755374, 'time_algorithm_update': 0.02358916349578322, 'temp_loss': 0.27305383498818553, 'temp': 0.8728304271809539, 'alpha_loss': 14.137420113323724, 'alpha': 0.7881347556211795, 'critic_loss': 396.639296503792, 'actor_loss': 44.89871121010585, 'time_step': 0.02401559673554716, 'td_error': 114.07790276268639, 'init_value': -81.6840591430664, 'ave_value': -39.4591246493644} step=3762
2022-04-20 15:53.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420155202/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:53.52 [info     ] CQL_20220420155202: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.0003280200456318102, 'time_algorithm_update': 0.023292862880996794, 'temp_loss': 0.12294828610178846, 'temp': 0.8695090965569368, 'alpha_loss': 15.895205943905122, 'alpha': 0.7555019752672542, 'critic_loss': 478.2144341719778, 'actor_loss': 50.868839888544805, 'time_step': 0.02371469296907124, 'td_error': 191.22087183914797, 'init_value': -98.80743408203125, 'ave_value': -46.9625012420279} step=4104
2022-04-20 15:53.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420155202/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:54.01 [info     ] CQL_20220420155202: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.0003406053398087708, 'time_algorithm_update': 0.02412178223593193, 'temp_loss': 0.034017406360447755, 'temp': 0.868394002231241, 'alpha_loss': 15.848281969103898, 'alpha': 0.7240336483333543, 'critic_loss': 589.127161037155, 'actor_loss': 57.6770595528229, 'time_step': 0.02455827227809973, 'td_error': 119.64863063096547, 'init_value': -108.00276947021484, 'ave_value': -50.13162875200445} step=4446
2022-04-20 15:54.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420155202/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:54.10 [info     ] CQL_20220420155202: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.00034220246543661194, 'time_algorithm_update': 0.023951158188937, 'temp_loss': -0.005823673054096643, 'temp': 0.8678333930104797, 'alpha_loss': 15.013854065833733, 'alpha': 0.6984723830780788, 'critic_loss': 687.9774575038263, 'actor_loss': 63.2327753256636, 'time_step': 0.024390746278372424, 'td_error': 198.35919849360042, 'init_value': -122.26594543457031, 'ave_value': -57.48208803654925} step=4788
2022-04-20 15:54.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420155202/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:54.18 [info     ] CQL_20220420155202: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.0003388562397650111, 'time_algorithm_update': 0.02394482894250524, 'temp_loss': -0.16157692589127182, 'temp': 0.870137525929345, 'alpha_loss': 15.650038273013823, 'alpha': 0.673172251348607, 'critic_loss': 790.8374592742027, 'actor_loss': 69.52536579600552, 'time_step': 0.02438155322047005, 'td_error': 173.11714584407892, 'init_value': -135.8458251953125, 'ave_value': -62.77627097918375} step=5130
2022-04-20 15:54.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420155202/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:54.27 [info     ] CQL_20220420155202: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.00034681746834202816, 'time_algorithm_update': 0.02397725874917549, 'temp_loss': -0.23833210788100784, 'temp': 0.875941934118494, 'alpha_loss': 15.387988492062217, 'alpha': 0.6500142178340265, 'critic_loss': 880.4111636870089, 'actor_loss': 74.46501853452091, 'time_step': 0.024422506142778005, 'td_error': 306.1660198832777, 'init_value': -149.49386596679688, 'ave_value': -68.40074995816157} step=5472
2022-04-20 15:54.27 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420155202/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:54.36 [info     ] CQL_20220420155202: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.0003380036493491011, 'time_algorithm_update': 0.023988921042771366, 'temp_loss': -0.33975043607472677, 'temp': 0.8851160922942803, 'alpha_loss': 15.697417917307357, 'alpha': 0.6274341912994608, 'critic_loss': 970.4307684647409, 'actor_loss': 79.73502227158575, 'time_step': 0.024427075832210786, 'td_error': 317.36230634596495, 'init_value': -164.84933471679688, 'ave_value': -73.9990376360889} step=5814
2022-04-20 15:54.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420155202/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:54.45 [info     ] CQL_20220420155202: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.00034183995765552186, 'time_algorithm_update': 0.024015662962930243, 'temp_loss': -0.38335217331802984, 'temp': 0.8981100280731045, 'alpha_loss': 16.38919469069319, 'alpha': 0.6057149323455074, 'critic_loss': 1065.501512315538, 'actor_loss': 85.20126715319896, 'time_step': 0.024457170949344745, 'td_error': 329.20222961020414, 'init_value': -180.28323364257812, 'ave_value': -77.73963066097181} step=6156
2022-04-20 15:54.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420155202/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:54.53 [info     ] CQL_20220420155202: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.0003392933404933639, 'time_algorithm_update': 0.024229670825757478, 'temp_loss': -0.4227597130277352, 'temp': 0.9127576990782866, 'alpha_loss': 17.63638767443205, 'alpha': 0.5838214359436816, 'critic_loss': 1168.704692282872, 'actor_loss': 91.33077960544162, 'time_step': 0.02466889989306355, 'td_error': 450.50727994508577, 'init_value': -201.9973602294922, 'ave_value': -85.57849534477468} step=6498
2022-04-20 15:54.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420155202/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:55.02 [info     ] CQL_20220420155202: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.00034321121305052997, 'time_algorithm_update': 0.024443492554781728, 'temp_loss': -0.3999745884463627, 'temp': 0.9280303530177177, 'alpha_loss': 15.852468271701657, 'alpha': 0.5637791979382609, 'critic_loss': 1271.7861933122601, 'actor_loss': 96.51305547792312, 'time_step': 0.024883022085267898, 'td_error': 603.4134969418932, 'init_value': -220.68490600585938, 'ave_value': -90.12265780766268} step=6840
2022-04-20 15:55.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420155202/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:55.11 [info     ] CQL_20220420155202: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.00034191524773313287, 'time_algorithm_update': 0.024107003769679378, 'temp_loss': -0.3963000588227957, 'temp': 0.9437091125730883, 'alpha_loss': 16.28277027118973, 'alpha': 0.5449818874660292, 'critic_loss': 1370.939484534905, 'actor_loss': 102.37914762441177, 'time_step': 0.02454653539155659, 'td_error': 675.7511917910567, 'init_value': -239.94345092773438, 'ave_value': -93.96629097982301} step=7182
2022-04-20 15:55.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420155202/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:55.20 [info     ] CQL_20220420155202: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.00034566790039776364, 'time_algorithm_update': 0.02426741764559383, 'temp_loss': -0.3884750668678367, 'temp': 0.9585490427170581, 'alpha_loss': 17.620757812645003, 'alpha': 0.526343785357057, 'critic_loss': 1483.7402732804505, 'actor_loss': 108.89316576684428, 'time_step': 0.024712616240071972, 'td_error': 743.9382077313605, 'init_value': -275.83056640625, 'ave_value': -101.58398219569712} step=7524
2022-04-20 15:55.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420155202/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:55.29 [info     ] CQL_20220420155202: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.00033571357615509923, 'time_algorithm_update': 0.024221947318629214, 'temp_loss': -0.336013654916825, 'temp': 0.9732574736862852, 'alpha_loss': 15.057750977967915, 'alpha': 0.5089763950186166, 'critic_loss': 1608.904276529948, 'actor_loss': 114.58891432466562, 'time_step': 0.024656886942902502, 'td_error': 893.7630703670504, 'init_value': -283.48101806640625, 'ave_value': -101.10699451938406} step=7866
2022-04-20 15:55.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420155202/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:55.38 [info     ] CQL_20220420155202: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.0003401682390804179, 'time_algorithm_update': 0.024180799199823747, 'temp_loss': -0.2559852015379577, 'temp': 0.9854280589616786, 'alpha_loss': 12.179847696371246, 'alpha': 0.4952298292116812, 'critic_loss': 1688.7599705032437, 'actor_loss': 117.76760355631511, 'time_step': 0.024620942902146725, 'td_error': 602.9362847230344, 'init_value': -293.2423095703125, 'ave_value': -105.05014758339068} step=8208
2022-04-20 15:55.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420155202/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:55.46 [info     ] CQL_20220420155202: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.000338479789376956, 'time_algorithm_update': 0.024205539658752797, 'temp_loss': -0.27021115019438213, 'temp': 0.9967385770981771, 'alpha_loss': 14.835183828197724, 'alpha': 0.4809891736646842, 'critic_loss': 1755.6803078456232, 'actor_loss': 122.38244831910608, 'time_step': 0.02464315138365093, 'td_error': 893.4663575614509, 'init_value': -309.5614013671875, 'ave_value': -111.69239583966565} step=8550
2022-04-20 15:55.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420155202/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:55.55 [info     ] CQL_20220420155202: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.0003433959525928163, 'time_algorithm_update': 0.024209747537534836, 'temp_loss': -0.247706853969796, 'temp': 1.008941224095417, 'alpha_loss': 15.937215816207797, 'alpha': 0.4642282480384871, 'critic_loss': 1868.9783917700338, 'actor_loss': 128.8837189256099, 'time_step': 0.02465521870997914, 'td_error': 1330.6430344745302, 'init_value': -327.2801818847656, 'ave_value': -116.20547985146845} step=8892
2022-04-20 15:55.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420155202/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:56.04 [info     ] CQL_20220420155202: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.00034228542394805375, 'time_algorithm_update': 0.02405074047066315, 'temp_loss': -0.16190181187849645, 'temp': 1.0178268987532946, 'alpha_loss': 15.341747482617697, 'alpha': 0.44909786332769003, 'critic_loss': 1979.1802225391766, 'actor_loss': 134.13255867762874, 'time_step': 0.02449371173367863, 'td_error': 1354.1292827328857, 'init_value': -358.5378112792969, 'ave_value': -128.54730465801316} step=9234
2022-04-20 15:56.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420155202/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:56.13 [info     ] CQL_20220420155202: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.00034330114286545423, 'time_algorithm_update': 0.025676209327073124, 'temp_loss': -0.12915357357511917, 'temp': 1.0250415258240282, 'alpha_loss': 14.77922637141936, 'alpha': 0.4344839281157443, 'critic_loss': 2100.3540278206096, 'actor_loss': 140.25177465963085, 'time_step': 0.026120164938140334, 'td_error': 1875.1327212288388, 'init_value': -374.4715270996094, 'ave_value': -132.27751333714068} step=9576
2022-04-20 15:56.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420155202/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:56.23 [info     ] CQL_20220420155202: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.0003471925244693868, 'time_algorithm_update': 0.025454085472731564, 'temp_loss': -0.17988204337351504, 'temp': 1.0330986729142262, 'alpha_loss': 15.622724199852748, 'alpha': 0.42035619165116583, 'critic_loss': 2216.4878700657896, 'actor_loss': 146.17553916172673, 'time_step': 0.02590013038345248, 'td_error': 2328.69033743892, 'init_value': -383.37603759765625, 'ave_value': -134.61855435747015} step=9918
2022-04-20 15:56.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420155202/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:56.32 [info     ] CQL_20220420155202: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.0003466139062803391, 'time_algorithm_update': 0.02566951060155679, 'temp_loss': -0.09846483012497947, 'temp': 1.041172730992412, 'alpha_loss': 12.985233044066625, 'alpha': 0.4073477559096632, 'critic_loss': 2320.731480291712, 'actor_loss': 150.22084029794436, 'time_step': 0.026116040017869737, 'td_error': 1579.278614189549, 'init_value': -394.6904296875, 'ave_value': -141.53752460443087} step=10260
2022-04-20 15:56.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420155202/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:56.41 [info     ] CQL_20220420155202: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.00034589377063059667, 'time_algorithm_update': 0.025383637662519488, 'temp_loss': -0.08208571650000693, 'temp': 1.0458429162962395, 'alpha_loss': 12.703752632029573, 'alpha': 0.39599485128943684, 'critic_loss': 2390.8121120162873, 'actor_loss': 153.42026530929476, 'time_step': 0.025830880243178696, 'td_error': 1473.0899667251958, 'init_value': -401.3492431640625, 'ave_value': -142.58719754740596} step=10602
2022-04-20 15:56.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420155202/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:56.51 [info     ] CQL_20220420155202: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.0003428040889271519, 'time_algorithm_update': 0.02577408294231571, 'temp_loss': -0.026447762898577934, 'temp': 1.0474441260622258, 'alpha_loss': 10.30077764374471, 'alpha': 0.38543157549629437, 'critic_loss': 2441.8281339232685, 'actor_loss': 155.29450051826342, 'time_step': 0.026215776365402847, 'td_error': 542.6477825126193, 'init_value': -408.88153076171875, 'ave_value': -144.50979923664048} step=10944
2022-04-20 15:56.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420155202/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:57.00 [info     ] CQL_20220420155202: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.0003391706455520719, 'time_algorithm_update': 0.02525686102303845, 'temp_loss': -0.040463128170728335, 'temp': 1.050260211292066, 'alpha_loss': 8.450750036546362, 'alpha': 0.3764189961883757, 'critic_loss': 2465.467099909197, 'actor_loss': 156.26651212904187, 'time_step': 0.02569303665941919, 'td_error': 579.356435970069, 'init_value': -411.9388732910156, 'ave_value': -150.17841398933314} step=11286
2022-04-20 15:57.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420155202/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:57.09 [info     ] CQL_20220420155202: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.00034339874111420925, 'time_algorithm_update': 0.02557181405742266, 'temp_loss': 0.0044847226739801165, 'temp': 1.0523821715025874, 'alpha_loss': 9.108042833400749, 'alpha': 0.36752115007032427, 'critic_loss': 2472.501732184873, 'actor_loss': 156.95566097058747, 'time_step': 0.026013009729441147, 'td_error': 961.1851499756924, 'init_value': -414.2622985839844, 'ave_value': -146.70459006742584} step=11628
2022-04-20 15:57.09 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420155202/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:57.18 [info     ] CQL_20220420155202: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.00033812216150830364, 'time_algorithm_update': 0.025323545723630672, 'temp_loss': 0.051714961765584544, 'temp': 1.0502271547652127, 'alpha_loss': 8.63564986513372, 'alpha': 0.3580360613894044, 'critic_loss': 2479.5023889374315, 'actor_loss': 157.36677131875913, 'time_step': 0.0257617367638482, 'td_error': 533.9808226996931, 'init_value': -412.3194885253906, 'ave_value': -146.2315359049928} step=11970
2022-04-20 15:57.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420155202/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:57.28 [info     ] CQL_20220420155202: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.00033876700708043505, 'time_algorithm_update': 0.025680133473803424, 'temp_loss': 0.07105203707054344, 'temp': 1.0463529494073656, 'alpha_loss': 7.281987980792397, 'alpha': 0.349263544929655, 'critic_loss': 2473.9134885553726, 'actor_loss': 157.10800195437426, 'time_step': 0.026117639234888624, 'td_error': 864.4688018376045, 'init_value': -404.8490295410156, 'ave_value': -147.53315429234826} step=12312
2022-04-20 15:57.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420155202/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:57.37 [info     ] CQL_20220420155202: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.0003424987458346183, 'time_algorithm_update': 0.025368682125158477, 'temp_loss': -0.01098555584073241, 'temp': 1.044938812130376, 'alpha_loss': 7.050860236959847, 'alpha': 0.340808467296829, 'critic_loss': 2464.028636553134, 'actor_loss': 157.35915345755237, 'time_step': 0.02580707742456804, 'td_error': 673.2950598228985, 'init_value': -409.722412109375, 'ave_value': -147.64411853132617} step=12654
2022-04-20 15:57.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420155202/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:57.46 [info     ] CQL_20220420155202: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.00034173120532119485, 'time_algorithm_update': 0.02559098514199954, 'temp_loss': 0.06712647628758037, 'temp': 1.0425644123066238, 'alpha_loss': 6.499694428248712, 'alpha': 0.33248860027357846, 'critic_loss': 2450.467476471126, 'actor_loss': 156.68985469438877, 'time_step': 0.026031851768493652, 'td_error': 544.7301335047043, 'init_value': -412.37060546875, 'ave_value': -149.6004632122989} step=12996
2022-04-20 15:57.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420155202/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:57.56 [info     ] CQL_20220420155202: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.00033950875377097324, 'time_algorithm_update': 0.025424481135362772, 'temp_loss': 0.05672684785697544, 'temp': 1.0379206625341673, 'alpha_loss': 7.792715064266272, 'alpha': 0.32356279705002994, 'critic_loss': 2444.2920264528507, 'actor_loss': 157.59312169035974, 'time_step': 0.025863980689243962, 'td_error': 1178.998381262169, 'init_value': -397.6607971191406, 'ave_value': -145.65424419090084} step=13338
2022-04-20 15:57.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420155202/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:58.05 [info     ] CQL_20220420155202: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.000342366988198799, 'time_algorithm_update': 0.0256915371320401, 'temp_loss': 0.023576173610026726, 'temp': 1.0359075351765281, 'alpha_loss': 10.579174794648823, 'alpha': 0.31209821118945963, 'critic_loss': 2464.057984112299, 'actor_loss': 159.8214677955672, 'time_step': 0.026133367889805845, 'td_error': 1499.8904095332496, 'init_value': -407.1195373535156, 'ave_value': -150.8881877528923} step=13680
2022-04-20 15:58.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420155202/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:58.14 [info     ] CQL_20220420155202: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.0003194906558209693, 'time_algorithm_update': 0.024291800476654232, 'temp_loss': -0.06520324667686955, 'temp': 1.039043526551877, 'alpha_loss': 11.270300461534868, 'alpha': 0.2993748859006759, 'critic_loss': 2532.4082623755025, 'actor_loss': 163.44261385822853, 'time_step': 0.024703080194038256, 'td_error': 1391.7827184170717, 'init_value': -419.9081115722656, 'ave_value': -156.33303836687324} step=14022
2022-04-20 15:58.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420155202/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:58.23 [info     ] CQL_20220420155202: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.0003410556860137404, 'time_algorithm_update': 0.025597972479480053, 'temp_loss': 0.07099463846207711, 'temp': 1.0385160191714415, 'alpha_loss': 9.290155772577252, 'alpha': 0.28878919632114164, 'critic_loss': 2595.5354257327076, 'actor_loss': 165.35082931964718, 'time_step': 0.02603915211750053, 'td_error': 471.8380890369469, 'init_value': -403.33392333984375, 'ave_value': -153.68475113280118} step=14364
2022-04-20 15:58.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420155202/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:58.33 [info     ] CQL_20220420155202: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.0003400044134485791, 'time_algorithm_update': 0.025326779014185855, 'temp_loss': 0.12762696490955283, 'temp': 1.0311936087775648, 'alpha_loss': 5.496208281893479, 'alpha': 0.28186694641559445, 'critic_loss': 2600.2359158699974, 'actor_loss': 164.35078287403485, 'time_step': 0.025763890896624293, 'td_error': 605.8824359459989, 'init_value': -407.82958984375, 'ave_value': -153.5585703184006} step=14706
2022-04-20 15:58.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420155202/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:58.42 [info     ] CQL_20220420155202: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.0003425649732177021, 'time_algorithm_update': 0.02561707245676141, 'temp_loss': 0.1517990450456477, 'temp': 1.0221288110080518, 'alpha_loss': 5.640059998160915, 'alpha': 0.27592723899417454, 'critic_loss': 2581.6223615679824, 'actor_loss': 163.79602635255333, 'time_step': 0.026056984711808766, 'td_error': 500.8544696795207, 'init_value': -405.13555908203125, 'ave_value': -154.32285509493704} step=15048
2022-04-20 15:58.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420155202/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:58.51 [info     ] CQL_20220420155202: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.0003397890001709698, 'time_algorithm_update': 0.025373410760310657, 'temp_loss': 0.18492048754416712, 'temp': 1.0125206956389354, 'alpha_loss': 5.371627567804348, 'alpha': 0.2695496990318187, 'critic_loss': 2572.6774795264528, 'actor_loss': 163.56047067028737, 'time_step': 0.025812251526012756, 'td_error': 573.4857298444767, 'init_value': -399.83123779296875, 'ave_value': -154.25402021707714} step=15390
2022-04-20 15:58.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420155202/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:59.00 [info     ] CQL_20220420155202: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.00033549816287748997, 'time_algorithm_update': 0.025493000683031584, 'temp_loss': 0.11896227480440635, 'temp': 1.0033076231242621, 'alpha_loss': 4.8228595563542775, 'alpha': 0.2633311568993574, 'critic_loss': 2555.6343001873174, 'actor_loss': 162.68744106181182, 'time_step': 0.02592399733805517, 'td_error': 461.7068575697748, 'init_value': -389.283935546875, 'ave_value': -152.13743100506764} step=15732
2022-04-20 15:59.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420155202/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:59.10 [info     ] CQL_20220420155202: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.00033821836549636217, 'time_algorithm_update': 0.024919014228017705, 'temp_loss': 0.14618785495799014, 'temp': 0.9958021067387877, 'alpha_loss': 5.53786157446298, 'alpha': 0.25673193384332266, 'critic_loss': 2539.740408914131, 'actor_loss': 162.67906876056514, 'time_step': 0.025357705807825277, 'td_error': 670.2453804952137, 'init_value': -394.56121826171875, 'ave_value': -152.73968227060664} step=16074
2022-04-20 15:59.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420155202/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:59.18 [info     ] CQL_20220420155202: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.00033949202264261526, 'time_algorithm_update': 0.024147431055704754, 'temp_loss': 0.1470301176764463, 'temp': 0.9872398641374376, 'alpha_loss': 4.879768151985972, 'alpha': 0.24960461202246403, 'critic_loss': 2537.1131252712676, 'actor_loss': 162.51210985685648, 'time_step': 0.024587655625148128, 'td_error': 377.86179121215235, 'init_value': -381.33123779296875, 'ave_value': -151.2932352349068} step=16416
2022-04-20 15:59.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420155202/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:59.27 [info     ] CQL_20220420155202: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.0003446807638246414, 'time_algorithm_update': 0.02407736875857526, 'temp_loss': 0.16081333293290864, 'temp': 0.977411097776123, 'alpha_loss': 4.4463873906442295, 'alpha': 0.24326488018384454, 'critic_loss': 2519.3694761398942, 'actor_loss': 161.91178465726082, 'time_step': 0.024520184561523082, 'td_error': 324.21698027420763, 'init_value': -383.2672424316406, 'ave_value': -153.26386118906038} step=16758
2022-04-20 15:59.27 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420155202/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 15:59.36 [info     ] CQL_20220420155202: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.0003423237661172075, 'time_algorithm_update': 0.02410922831262064, 'temp_loss': 0.129804938917228, 'temp': 0.9691791621565121, 'alpha_loss': 5.244708520627161, 'alpha': 0.2367695057165553, 'critic_loss': 2497.82006122076, 'actor_loss': 161.47780573437785, 'time_step': 0.024547163506000363, 'td_error': 562.136969110748, 'init_value': -370.22998046875, 'ave_value': -148.51863918727716} step=17100
2022-04-20 15:59.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420155202/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.51910049e

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 15:59.37 [info     ] FQE_20220420155936: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.0001571293336799346, 'time_algorithm_update': 0.0036234956189810513, 'loss': 0.0050541822446498975, 'time_step': 0.0038554984402943806, 'init_value': -0.23958680033683777, 'ave_value': -0.21287006742849543, 'soft_opc': nan} step=166




2022-04-20 15:59.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155936/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.37 [info     ] FQE_20220420155936: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00015028700771101987, 'time_algorithm_update': 0.0034943632332675428, 'loss': 0.0039951779840356975, 'time_step': 0.0037168206938778064, 'init_value': -0.3168467581272125, 'ave_value': -0.2593071951069408, 'soft_opc': nan} step=332




2022-04-20 15:59.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155936/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.38 [info     ] FQE_20220420155936: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00015171895544212986, 'time_algorithm_update': 0.003505466932273773, 'loss': 0.003526286649831896, 'time_step': 0.0037267768239400475, 'init_value': -0.4169902801513672, 'ave_value': -0.33273601019503296, 'soft_opc': nan} step=498




2022-04-20 15:59.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155936/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.39 [info     ] FQE_20220420155936: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00015305323773119823, 'time_algorithm_update': 0.0035224994981145285, 'loss': 0.0035219607041610114, 'time_step': 0.0037404744021863824, 'init_value': -0.5089678168296814, 'ave_value': -0.3948588451219572, 'soft_opc': nan} step=664




2022-04-20 15:59.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155936/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.40 [info     ] FQE_20220420155936: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00015075809984322055, 'time_algorithm_update': 0.0035230108054287463, 'loss': 0.003416492884238082, 'time_step': 0.0037460197885352447, 'init_value': -0.5476579666137695, 'ave_value': -0.4041983925678709, 'soft_opc': nan} step=830




2022-04-20 15:59.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155936/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.40 [info     ] FQE_20220420155936: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00015200189797275038, 'time_algorithm_update': 0.003523592489311494, 'loss': 0.003516176512818335, 'time_step': 0.003743875457579831, 'init_value': -0.5684852600097656, 'ave_value': -0.4142509826249233, 'soft_opc': nan} step=996




2022-04-20 15:59.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155936/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.41 [info     ] FQE_20220420155936: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00015151500701904297, 'time_algorithm_update': 0.00347426138728498, 'loss': 0.003511742192197097, 'time_step': 0.0036965651684496776, 'init_value': -0.6682817339897156, 'ave_value': -0.49776249443692666, 'soft_opc': nan} step=1162




2022-04-20 15:59.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155936/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.42 [info     ] FQE_20220420155936: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00015391930040106717, 'time_algorithm_update': 0.003500409873135119, 'loss': 0.0036524067533269793, 'time_step': 0.003726167851183788, 'init_value': -0.71448814868927, 'ave_value': -0.52807368690836, 'soft_opc': nan} step=1328




2022-04-20 15:59.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155936/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.42 [info     ] FQE_20220420155936: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00014977282788380082, 'time_algorithm_update': 0.003456645701304976, 'loss': 0.003548471437323928, 'time_step': 0.003675410546452166, 'init_value': -0.7200172543525696, 'ave_value': -0.5240240966146057, 'soft_opc': nan} step=1494




2022-04-20 15:59.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155936/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.43 [info     ] FQE_20220420155936: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00015442917145878435, 'time_algorithm_update': 0.003580265734569136, 'loss': 0.003609616676884064, 'time_step': 0.0038053357457540123, 'init_value': -0.7675719857215881, 'ave_value': -0.5373153144074184, 'soft_opc': nan} step=1660




2022-04-20 15:59.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155936/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.44 [info     ] FQE_20220420155936: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.0001564514206116458, 'time_algorithm_update': 0.00354069686797728, 'loss': 0.0038573310706281967, 'time_step': 0.003769697913204331, 'init_value': -0.8063735961914062, 'ave_value': -0.556861023265902, 'soft_opc': nan} step=1826




2022-04-20 15:59.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155936/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.44 [info     ] FQE_20220420155936: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00015550061880824077, 'time_algorithm_update': 0.003498131970325148, 'loss': 0.0038269429189904808, 'time_step': 0.0037247373397091784, 'init_value': -0.8604981303215027, 'ave_value': -0.5889223366371683, 'soft_opc': nan} step=1992




2022-04-20 15:59.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155936/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.45 [info     ] FQE_20220420155936: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.0001536306128444442, 'time_algorithm_update': 0.003408200769539339, 'loss': 0.0039379700677503035, 'time_step': 0.003628917487270861, 'init_value': -0.9092816114425659, 'ave_value': -0.6033413314638105, 'soft_opc': nan} step=2158




2022-04-20 15:59.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155936/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.46 [info     ] FQE_20220420155936: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00015146473804152156, 'time_algorithm_update': 0.003360248473753412, 'loss': 0.0039865673345749275, 'time_step': 0.003579699849507895, 'init_value': -0.9639230370521545, 'ave_value': -0.6372539600139265, 'soft_opc': nan} step=2324




2022-04-20 15:59.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155936/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.46 [info     ] FQE_20220420155936: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00015394371676157755, 'time_algorithm_update': 0.0034318228802049018, 'loss': 0.004154081553843501, 'time_step': 0.003657052315861346, 'init_value': -1.0109798908233643, 'ave_value': -0.6586792994794007, 'soft_opc': nan} step=2490




2022-04-20 15:59.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155936/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.47 [info     ] FQE_20220420155936: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.0001537900373160121, 'time_algorithm_update': 0.0034591318613075347, 'loss': 0.004262621089513133, 'time_step': 0.0036851684731173226, 'init_value': -1.0779451131820679, 'ave_value': -0.6970348028113713, 'soft_opc': nan} step=2656




2022-04-20 15:59.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155936/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.48 [info     ] FQE_20220420155936: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00015249453395246024, 'time_algorithm_update': 0.003414123891347862, 'loss': 0.004436037295598941, 'time_step': 0.0036376915782330983, 'init_value': -1.1711876392364502, 'ave_value': -0.7593441531934717, 'soft_opc': nan} step=2822




2022-04-20 15:59.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155936/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.48 [info     ] FQE_20220420155936: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00014805362885256848, 'time_algorithm_update': 0.0034573480307337747, 'loss': 0.004638227729508317, 'time_step': 0.0036750730261745222, 'init_value': -1.214592456817627, 'ave_value': -0.7651999888052274, 'soft_opc': nan} step=2988




2022-04-20 15:59.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155936/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.49 [info     ] FQE_20220420155936: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00015302594885768662, 'time_algorithm_update': 0.0034502241984907403, 'loss': 0.004887135594196797, 'time_step': 0.0036753416061401367, 'init_value': -1.3412225246429443, 'ave_value': -0.8483172246934594, 'soft_opc': nan} step=3154




2022-04-20 15:59.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155936/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.50 [info     ] FQE_20220420155936: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00015575339995234846, 'time_algorithm_update': 0.0034912092139921993, 'loss': 0.005182629896479334, 'time_step': 0.003717702555369182, 'init_value': -1.4011292457580566, 'ave_value': -0.8687732202177112, 'soft_opc': nan} step=3320




2022-04-20 15:59.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155936/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.50 [info     ] FQE_20220420155936: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00015374407710799253, 'time_algorithm_update': 0.0034639217767370753, 'loss': 0.005504319301935429, 'time_step': 0.003689233079014054, 'init_value': -1.4763555526733398, 'ave_value': -0.900329925583021, 'soft_opc': nan} step=3486




2022-04-20 15:59.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155936/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.51 [info     ] FQE_20220420155936: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.0001512047756149108, 'time_algorithm_update': 0.002607414521366717, 'loss': 0.005753660709905741, 'time_step': 0.002826061593480857, 'init_value': -1.593550205230713, 'ave_value': -0.9659552126183166, 'soft_opc': nan} step=3652




2022-04-20 15:59.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155936/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.52 [info     ] FQE_20220420155936: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00016054187912538828, 'time_algorithm_update': 0.0034683382654764564, 'loss': 0.006116179792148745, 'time_step': 0.003701883626271443, 'init_value': -1.7442142963409424, 'ave_value': -1.0543981467751233, 'soft_opc': nan} step=3818




2022-04-20 15:59.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155936/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.52 [info     ] FQE_20220420155936: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.0001534381544733622, 'time_algorithm_update': 0.003512636724724827, 'loss': 0.006549455406402339, 'time_step': 0.0037328392626291298, 'init_value': -1.7781331539154053, 'ave_value': -1.0288836598262057, 'soft_opc': nan} step=3984




2022-04-20 15:59.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155936/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.53 [info     ] FQE_20220420155936: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00015746685395757836, 'time_algorithm_update': 0.0033352274492562534, 'loss': 0.006720821168385895, 'time_step': 0.003566723272024867, 'init_value': -1.8974637985229492, 'ave_value': -1.0782710210495703, 'soft_opc': nan} step=4150




2022-04-20 15:59.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155936/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.54 [info     ] FQE_20220420155936: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00015404999974262282, 'time_algorithm_update': 0.0035270553037344693, 'loss': 0.007111052635367062, 'time_step': 0.003753939306879618, 'init_value': -2.002481460571289, 'ave_value': -1.1491124455053527, 'soft_opc': nan} step=4316




2022-04-20 15:59.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155936/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.54 [info     ] FQE_20220420155936: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00015224893409085562, 'time_algorithm_update': 0.0033664516655795545, 'loss': 0.007465151147019252, 'time_step': 0.0035876940531903005, 'init_value': -2.0421082973480225, 'ave_value': -1.1639673655835894, 'soft_opc': nan} step=4482




2022-04-20 15:59.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155936/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.55 [info     ] FQE_20220420155936: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00015797241624579373, 'time_algorithm_update': 0.003555748836103692, 'loss': 0.007485859326777014, 'time_step': 0.003784597638141678, 'init_value': -2.125171184539795, 'ave_value': -1.2025249869571075, 'soft_opc': nan} step=4648




2022-04-20 15:59.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155936/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.56 [info     ] FQE_20220420155936: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.0001560751214084855, 'time_algorithm_update': 0.00332657974886607, 'loss': 0.007805019679496132, 'time_step': 0.003554394446223615, 'init_value': -2.2358829975128174, 'ave_value': -1.2582741435516525, 'soft_opc': nan} step=4814




2022-04-20 15:59.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155936/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.56 [info     ] FQE_20220420155936: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.0001558783542679017, 'time_algorithm_update': 0.003579108111829643, 'loss': 0.008128334257221523, 'time_step': 0.003806236278579896, 'init_value': -2.2822375297546387, 'ave_value': -1.2807790577478775, 'soft_opc': nan} step=4980




2022-04-20 15:59.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155936/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.57 [info     ] FQE_20220420155936: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00015532970428466797, 'time_algorithm_update': 0.003402981413416116, 'loss': 0.008705286313330823, 'time_step': 0.0036259903965226137, 'init_value': -2.3411412239074707, 'ave_value': -1.2849622572528887, 'soft_opc': nan} step=5146




2022-04-20 15:59.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155936/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.58 [info     ] FQE_20220420155936: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.0001524557550269437, 'time_algorithm_update': 0.0035341245582304805, 'loss': 0.008577000701109913, 'time_step': 0.003757735332810735, 'init_value': -2.4822731018066406, 'ave_value': -1.3781085677512057, 'soft_opc': nan} step=5312




2022-04-20 15:59.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155936/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.58 [info     ] FQE_20220420155936: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00015584244785538638, 'time_algorithm_update': 0.0034029311444385944, 'loss': 0.00895927898818627, 'time_step': 0.0036254704716693923, 'init_value': -2.532957077026367, 'ave_value': -1.3776004364965735, 'soft_opc': nan} step=5478




2022-04-20 15:59.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155936/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 15:59.59 [info     ] FQE_20220420155936: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00015578068882586007, 'time_algorithm_update': 0.0034854182277817324, 'loss': 0.009358606595091567, 'time_step': 0.0037113499928669758, 'init_value': -2.5977063179016113, 'ave_value': -1.413786038031449, 'soft_opc': nan} step=5644




2022-04-20 15:59.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155936/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:00.00 [info     ] FQE_20220420155936: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00015331320015780897, 'time_algorithm_update': 0.0034091731151902533, 'loss': 0.009619935126049467, 'time_step': 0.003634178494832602, 'init_value': -2.723487377166748, 'ave_value': -1.496927563007082, 'soft_opc': nan} step=5810




2022-04-20 16:00.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155936/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:00.00 [info     ] FQE_20220420155936: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00015889592917568712, 'time_algorithm_update': 0.0034770189997661545, 'loss': 0.010101300804505506, 'time_step': 0.0037057744451316006, 'init_value': -2.735466957092285, 'ave_value': -1.4774482074084583, 'soft_opc': nan} step=5976




2022-04-20 16:00.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155936/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:00.01 [info     ] FQE_20220420155936: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.0001532069171767637, 'time_algorithm_update': 0.003421534974891019, 'loss': 0.010444245251745591, 'time_step': 0.003646886492350015, 'init_value': -2.8686699867248535, 'ave_value': -1.5709955665308075, 'soft_opc': nan} step=6142




2022-04-20 16:00.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155936/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:00.02 [info     ] FQE_20220420155936: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00015496345887701195, 'time_algorithm_update': 0.003520339368337608, 'loss': 0.010706642646474359, 'time_step': 0.0037442474480134896, 'init_value': -2.956279754638672, 'ave_value': -1.6036157542252325, 'soft_opc': nan} step=6308




2022-04-20 16:00.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155936/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:00.02 [info     ] FQE_20220420155936: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00015268411981054098, 'time_algorithm_update': 0.0034228319145110718, 'loss': 0.011164320831282446, 'time_step': 0.0036443874060389505, 'init_value': -3.080855131149292, 'ave_value': -1.6923487545200833, 'soft_opc': nan} step=6474




2022-04-20 16:00.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155936/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:00.03 [info     ] FQE_20220420155936: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00015514729970909027, 'time_algorithm_update': 0.0035093247172344163, 'loss': 0.011764536615242306, 'time_step': 0.003734166363635695, 'init_value': -3.1368212699890137, 'ave_value': -1.7112557439183866, 'soft_opc': nan} step=6640




2022-04-20 16:00.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155936/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:00.04 [info     ] FQE_20220420155936: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.0001600837133016931, 'time_algorithm_update': 0.003340741237962102, 'loss': 0.011729524705098965, 'time_step': 0.0035713437091873354, 'init_value': -3.1672964096069336, 'ave_value': -1.7253180308049327, 'soft_opc': nan} step=6806




2022-04-20 16:00.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155936/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:00.05 [info     ] FQE_20220420155936: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00015394515301807817, 'time_algorithm_update': 0.0035309762839811393, 'loss': 0.012150198172817164, 'time_step': 0.003757002841995423, 'init_value': -3.2717952728271484, 'ave_value': -1.7903150991410823, 'soft_opc': nan} step=6972




2022-04-20 16:00.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155936/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:00.05 [info     ] FQE_20220420155936: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00016028191669877753, 'time_algorithm_update': 0.0034906418926744574, 'loss': 0.012447113911789584, 'time_step': 0.0037239186735038296, 'init_value': -3.3566508293151855, 'ave_value': -1.8308227341991288, 'soft_opc': nan} step=7138




2022-04-20 16:00.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155936/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:00.06 [info     ] FQE_20220420155936: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00015643274927713783, 'time_algorithm_update': 0.003429908350289586, 'loss': 0.012586996951967994, 'time_step': 0.0036557424499327877, 'init_value': -3.419158697128296, 'ave_value': -1.8746870401072073, 'soft_opc': nan} step=7304




2022-04-20 16:00.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155936/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:00.07 [info     ] FQE_20220420155936: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00015816200210387447, 'time_algorithm_update': 0.0034494213311068982, 'loss': 0.012944959788527116, 'time_step': 0.003677997244409768, 'init_value': -3.445831775665283, 'ave_value': -1.8998355657264994, 'soft_opc': nan} step=7470




2022-04-20 16:00.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155936/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:00.07 [info     ] FQE_20220420155936: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00015768947371517318, 'time_algorithm_update': 0.0035395852054458067, 'loss': 0.01355926543319912, 'time_step': 0.003767510494553899, 'init_value': -3.6001157760620117, 'ave_value': -2.0078115090727806, 'soft_opc': nan} step=7636




2022-04-20 16:00.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155936/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:00.08 [info     ] FQE_20220420155936: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00015540726213570102, 'time_algorithm_update': 0.003511003701083631, 'loss': 0.014127689343596617, 'time_step': 0.00373575055455587, 'init_value': -3.618070363998413, 'ave_value': -2.0021056632737855, 'soft_opc': nan} step=7802




2022-04-20 16:00.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155936/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:00.09 [info     ] FQE_20220420155936: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.0001561928944415357, 'time_algorithm_update': 0.003575704183923193, 'loss': 0.013999940729307303, 'time_step': 0.003802198961556676, 'init_value': -3.681366443634033, 'ave_value': -2.058397335364475, 'soft_opc': nan} step=7968




2022-04-20 16:00.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155936/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:00.09 [info     ] FQE_20220420155936: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00015539720834019673, 'time_algorithm_update': 0.003375511571585414, 'loss': 0.014530795812186033, 'time_step': 0.0035994067249527895, 'init_value': -3.7472639083862305, 'ave_value': -2.069851685785227, 'soft_opc': nan} step=8134




2022-04-20 16:00.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155936/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:00.10 [info     ] FQE_20220420155936: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.0001578589519822454, 'time_algorithm_update': 0.003496882427169616, 'loss': 0.014653143571852425, 'time_step': 0.0037314762552100493, 'init_value': -3.805365562438965, 'ave_value': -2.128648778245793, 'soft_opc': nan} step=8300




2022-04-20 16:00.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420155936/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

2022-04-20 16:00.10 [info     ] Directory is created at d3rlpy_logs/FQE_20220420160010
2022-04-20 16:00.10 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 16:00.10 [debug    ] Building models...
2022-04-20 16:00.10 [debug    ] Models have been built.
2022-04-20 16:00.10 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420160010/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 16:00.12 [info     ] FQE_20220420160010: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.0001581096371939016, 'time_algorithm_update': 0.0034215443356092586, 'loss': 0.024100968998661914, 'time_step': 0.003651237765023875, 'init_value': -1.2711458206176758, 'ave_value': -1.2422740482263737, 'soft_opc': nan} step=344




2022-04-20 16:00.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160010/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.13 [info     ] FQE_20220420160010: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.0001577097316120946, 'time_algorithm_update': 0.003459089024122371, 'loss': 0.022916531877938743, 'time_step': 0.0036865618339804716, 'init_value': -2.0892856121063232, 'ave_value': -2.0081214523664466, 'soft_opc': nan} step=688




2022-04-20 16:00.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160010/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.14 [info     ] FQE_20220420160010: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00015937034473862758, 'time_algorithm_update': 0.0034997366195501285, 'loss': 0.025868037328405608, 'time_step': 0.003731908493263777, 'init_value': -3.064213991165161, 'ave_value': -2.928451614498018, 'soft_opc': nan} step=1032




2022-04-20 16:00.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160010/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.16 [info     ] FQE_20220420160010: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00016089650087578353, 'time_algorithm_update': 0.0034778499326040577, 'loss': 0.029207414679424188, 'time_step': 0.003713874622832897, 'init_value': -3.6785731315612793, 'ave_value': -3.533780812505666, 'soft_opc': nan} step=1376




2022-04-20 16:00.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160010/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.17 [info     ] FQE_20220420160010: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.0001606636269148006, 'time_algorithm_update': 0.0034739784030027167, 'loss': 0.03598185390948729, 'time_step': 0.0037066874115966085, 'init_value': -4.492989540100098, 'ave_value': -4.367399123116388, 'soft_opc': nan} step=1720




2022-04-20 16:00.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160010/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.19 [info     ] FQE_20220420160010: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00016226671462835266, 'time_algorithm_update': 0.003439636424530384, 'loss': 0.043394100335210045, 'time_step': 0.003675490617752075, 'init_value': -4.998096466064453, 'ave_value': -4.952909054459484, 'soft_opc': nan} step=2064




2022-04-20 16:00.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160010/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.20 [info     ] FQE_20220420160010: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00015906608381936716, 'time_algorithm_update': 0.0034305737462154654, 'loss': 0.053506507605942354, 'time_step': 0.003661706004031869, 'init_value': -5.622844696044922, 'ave_value': -5.7513887631127965, 'soft_opc': nan} step=2408




2022-04-20 16:00.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160010/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.21 [info     ] FQE_20220420160010: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.0001583723134772722, 'time_algorithm_update': 0.0034871919210567033, 'loss': 0.0643649680680747, 'time_step': 0.003720721533132154, 'init_value': -6.020578384399414, 'ave_value': -6.389291621972848, 'soft_opc': nan} step=2752




2022-04-20 16:00.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160010/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.23 [info     ] FQE_20220420160010: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00016145234884217728, 'time_algorithm_update': 0.003461116968199264, 'loss': 0.07685345521759848, 'time_step': 0.0036991869294366172, 'init_value': -6.481346607208252, 'ave_value': -7.06610008040527, 'soft_opc': nan} step=3096




2022-04-20 16:00.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160010/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.24 [info     ] FQE_20220420160010: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00016285097876260447, 'time_algorithm_update': 0.003478032211924708, 'loss': 0.09195748707929323, 'time_step': 0.003714201062224632, 'init_value': -6.625678539276123, 'ave_value': -7.487704491541461, 'soft_opc': nan} step=3440




2022-04-20 16:00.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160010/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.26 [info     ] FQE_20220420160010: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00016035797984101052, 'time_algorithm_update': 0.003528436256009479, 'loss': 0.11072272377164472, 'time_step': 0.0037641996561094773, 'init_value': -7.323152542114258, 'ave_value': -8.362179984098619, 'soft_opc': nan} step=3784




2022-04-20 16:00.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160010/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.27 [info     ] FQE_20220420160010: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.0001574983430463214, 'time_algorithm_update': 0.0034463343232177023, 'loss': 0.12755486208317413, 'time_step': 0.003677662028822788, 'init_value': -7.784997463226318, 'ave_value': -9.012680480082158, 'soft_opc': nan} step=4128




2022-04-20 16:00.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160010/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.28 [info     ] FQE_20220420160010: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00016014520512070766, 'time_algorithm_update': 0.003421338491661604, 'loss': 0.14696863934744236, 'time_step': 0.0036541403726089833, 'init_value': -8.215988159179688, 'ave_value': -9.56016501043294, 'soft_opc': nan} step=4472




2022-04-20 16:00.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160010/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.30 [info     ] FQE_20220420160010: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00015983817189238792, 'time_algorithm_update': 0.003437222436416981, 'loss': 0.1660228738896982, 'time_step': 0.0036662532839664193, 'init_value': -8.484363555908203, 'ave_value': -9.958015193501362, 'soft_opc': nan} step=4816




2022-04-20 16:00.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160010/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.31 [info     ] FQE_20220420160010: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.0001582357772561007, 'time_algorithm_update': 0.0034698178601819413, 'loss': 0.1812981146030388, 'time_step': 0.0036965740281482075, 'init_value': -8.670211791992188, 'ave_value': -10.194977943900435, 'soft_opc': nan} step=5160




2022-04-20 16:00.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160010/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.33 [info     ] FQE_20220420160010: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00016258414401564488, 'time_algorithm_update': 0.0034553339315015214, 'loss': 0.20161760281663127, 'time_step': 0.003691141688546469, 'init_value': -9.232912063598633, 'ave_value': -10.857747216796007, 'soft_opc': nan} step=5504




2022-04-20 16:00.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160010/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.34 [info     ] FQE_20220420160010: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00016290711802105571, 'time_algorithm_update': 0.003505314505377481, 'loss': 0.22604530014985696, 'time_step': 0.003742206928341888, 'init_value': -9.48902702331543, 'ave_value': -11.08355691630041, 'soft_opc': nan} step=5848




2022-04-20 16:00.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160010/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.35 [info     ] FQE_20220420160010: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00016032055366870969, 'time_algorithm_update': 0.003460583298705345, 'loss': 0.24915871923507818, 'time_step': 0.003691463969474615, 'init_value': -10.077842712402344, 'ave_value': -11.64874146808587, 'soft_opc': nan} step=6192




2022-04-20 16:00.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160010/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.37 [info     ] FQE_20220420160010: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00016030877135520758, 'time_algorithm_update': 0.0035119022047796914, 'loss': 0.2689422792646774, 'time_step': 0.003742234651432481, 'init_value': -10.809134483337402, 'ave_value': -12.260972546156482, 'soft_opc': nan} step=6536




2022-04-20 16:00.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160010/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.38 [info     ] FQE_20220420160010: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00016008490739866744, 'time_algorithm_update': 0.003486988156340843, 'loss': 0.29131935667354875, 'time_step': 0.003719830928846847, 'init_value': -11.090450286865234, 'ave_value': -12.511862206188459, 'soft_opc': nan} step=6880




2022-04-20 16:00.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160010/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.39 [info     ] FQE_20220420160010: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00016013966050258902, 'time_algorithm_update': 0.003477042497590531, 'loss': 0.3062172642865673, 'time_step': 0.00371072112127792, 'init_value': -11.616438865661621, 'ave_value': -12.965367199152107, 'soft_opc': nan} step=7224




2022-04-20 16:00.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160010/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.41 [info     ] FQE_20220420160010: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.0001624420631763547, 'time_algorithm_update': 0.0034302084944969, 'loss': 0.3182770584244281, 'time_step': 0.003666890221972798, 'init_value': -11.531553268432617, 'ave_value': -12.702656753926625, 'soft_opc': nan} step=7568




2022-04-20 16:00.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160010/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.42 [info     ] FQE_20220420160010: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00016140383343363917, 'time_algorithm_update': 0.0034302507722100547, 'loss': 0.3430097077669966, 'time_step': 0.0036662962547568388, 'init_value': -12.341211318969727, 'ave_value': -13.589310465310858, 'soft_opc': nan} step=7912




2022-04-20 16:00.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160010/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.44 [info     ] FQE_20220420160010: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00015909588614175486, 'time_algorithm_update': 0.0034771048745443653, 'loss': 0.356956145520387, 'time_step': 0.0037090875381647153, 'init_value': -12.531157493591309, 'ave_value': -13.713590633351071, 'soft_opc': nan} step=8256




2022-04-20 16:00.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160010/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.45 [info     ] FQE_20220420160010: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.0001606774884600972, 'time_algorithm_update': 0.00349005987477857, 'loss': 0.3796795716206002, 'time_step': 0.003722801458003909, 'init_value': -13.10291576385498, 'ave_value': -14.272448865212459, 'soft_opc': nan} step=8600




2022-04-20 16:00.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160010/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.46 [info     ] FQE_20220420160010: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00016026164210119912, 'time_algorithm_update': 0.0034306742424188657, 'loss': 0.38662092264221853, 'time_step': 0.0036643868268922317, 'init_value': -13.50922679901123, 'ave_value': -14.794176570252255, 'soft_opc': nan} step=8944




2022-04-20 16:00.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160010/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.48 [info     ] FQE_20220420160010: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00015907440074654512, 'time_algorithm_update': 0.003488195496936177, 'loss': 0.39335494033104285, 'time_step': 0.003721475601196289, 'init_value': -13.659924507141113, 'ave_value': -14.935033188708733, 'soft_opc': nan} step=9288




2022-04-20 16:00.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160010/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.49 [info     ] FQE_20220420160010: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.0001620442368263422, 'time_algorithm_update': 0.0034477024577384773, 'loss': 0.4085977365273636, 'time_step': 0.0036826002043347027, 'init_value': -14.288806915283203, 'ave_value': -15.667470916999957, 'soft_opc': nan} step=9632




2022-04-20 16:00.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160010/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.51 [info     ] FQE_20220420160010: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.0001594736132510873, 'time_algorithm_update': 0.0034564657266749894, 'loss': 0.42247139086383717, 'time_step': 0.003690639207529467, 'init_value': -14.454151153564453, 'ave_value': -15.903869105436103, 'soft_opc': nan} step=9976




2022-04-20 16:00.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160010/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.52 [info     ] FQE_20220420160010: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00015955816867739656, 'time_algorithm_update': 0.0034701533095781193, 'loss': 0.43379717441492305, 'time_step': 0.0037025060764578886, 'init_value': -14.830278396606445, 'ave_value': -16.264844598119968, 'soft_opc': nan} step=10320




2022-04-20 16:00.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160010/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.53 [info     ] FQE_20220420160010: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.0001620393852854884, 'time_algorithm_update': 0.0035216122172599617, 'loss': 0.43778638975413214, 'time_step': 0.0037582426570182625, 'init_value': -14.82166862487793, 'ave_value': -16.40526487324493, 'soft_opc': nan} step=10664




2022-04-20 16:00.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160010/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.55 [info     ] FQE_20220420160010: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.0001647978327995123, 'time_algorithm_update': 0.0033806001031121543, 'loss': 0.4525526309125992, 'time_step': 0.003620673750722131, 'init_value': -15.229015350341797, 'ave_value': -16.98331358065616, 'soft_opc': nan} step=11008




2022-04-20 16:00.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160010/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.56 [info     ] FQE_20220420160010: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00015972797260727992, 'time_algorithm_update': 0.0034826072149498517, 'loss': 0.45499230575812766, 'time_step': 0.003715441670528678, 'init_value': -15.363382339477539, 'ave_value': -17.239203138196387, 'soft_opc': nan} step=11352




2022-04-20 16:00.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160010/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.58 [info     ] FQE_20220420160010: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00016310949658238612, 'time_algorithm_update': 0.0034856692303058714, 'loss': 0.4675069519340299, 'time_step': 0.0037221742230792378, 'init_value': -15.450553894042969, 'ave_value': -17.480645887632402, 'soft_opc': nan} step=11696




2022-04-20 16:00.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160010/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:00.59 [info     ] FQE_20220420160010: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00016053332838901255, 'time_algorithm_update': 0.003419975901758948, 'loss': 0.4788704023988874, 'time_step': 0.0036541715610859007, 'init_value': -15.345285415649414, 'ave_value': -17.70345718435385, 'soft_opc': nan} step=12040




2022-04-20 16:00.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160010/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:01.00 [info     ] FQE_20220420160010: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00016229305156441622, 'time_algorithm_update': 0.0035028575464736583, 'loss': 0.47659667048516663, 'time_step': 0.003739198973012525, 'init_value': -15.428947448730469, 'ave_value': -17.97605439524485, 'soft_opc': nan} step=12384




2022-04-20 16:01.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160010/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:01.02 [info     ] FQE_20220420160010: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.0001611349194548851, 'time_algorithm_update': 0.0035047253897023756, 'loss': 0.4829968988982051, 'time_step': 0.003743268029634343, 'init_value': -15.46631145477295, 'ave_value': -18.20274071760333, 'soft_opc': nan} step=12728




2022-04-20 16:01.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160010/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:01.03 [info     ] FQE_20220420160010: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00016461070193800816, 'time_algorithm_update': 0.003929673239242199, 'loss': 0.4955191526475341, 'time_step': 0.004169247178144233, 'init_value': -15.519754409790039, 'ave_value': -18.373067831665672, 'soft_opc': nan} step=13072




2022-04-20 16:01.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160010/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:01.05 [info     ] FQE_20220420160010: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00016463080117868823, 'time_algorithm_update': 0.004126950058826181, 'loss': 0.5046325321904882, 'time_step': 0.0043652549732563104, 'init_value': -15.624095916748047, 'ave_value': -18.699671931245952, 'soft_opc': nan} step=13416




2022-04-20 16:01.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160010/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:01.07 [info     ] FQE_20220420160010: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.0001616027466086454, 'time_algorithm_update': 0.004465773355129154, 'loss': 0.5033036539612641, 'time_step': 0.004701304574345433, 'init_value': -15.06856918334961, 'ave_value': -18.356066876317602, 'soft_opc': nan} step=13760




2022-04-20 16:01.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160010/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:01.08 [info     ] FQE_20220420160010: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00016214126764341842, 'time_algorithm_update': 0.00401669047599615, 'loss': 0.5131315207108855, 'time_step': 0.004252568233844845, 'init_value': -15.397180557250977, 'ave_value': -18.962941367374125, 'soft_opc': nan} step=14104




2022-04-20 16:01.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160010/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:01.10 [info     ] FQE_20220420160010: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00016645151515339696, 'time_algorithm_update': 0.004462194997210836, 'loss': 0.5154056769178443, 'time_step': 0.004702830730482589, 'init_value': -15.168079376220703, 'ave_value': -18.932350123033498, 'soft_opc': nan} step=14448




2022-04-20 16:01.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160010/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:01.11 [info     ] FQE_20220420160010: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00016382405924242596, 'time_algorithm_update': 0.004088208425876706, 'loss': 0.5198460342145936, 'time_step': 0.004328751286794973, 'init_value': -15.11187744140625, 'ave_value': -18.977909671325186, 'soft_opc': nan} step=14792




2022-04-20 16:01.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160010/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:01.13 [info     ] FQE_20220420160010: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00016936382582021314, 'time_algorithm_update': 0.004517248203588086, 'loss': 0.513605953161713, 'time_step': 0.004763574794281361, 'init_value': -14.958788871765137, 'ave_value': -19.026609298204693, 'soft_opc': nan} step=15136




2022-04-20 16:01.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160010/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:01.15 [info     ] FQE_20220420160010: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00016446723494418833, 'time_algorithm_update': 0.004166163677393004, 'loss': 0.5151165885861616, 'time_step': 0.004406768915265105, 'init_value': -14.989370346069336, 'ave_value': -19.01559039400555, 'soft_opc': nan} step=15480




2022-04-20 16:01.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160010/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:01.16 [info     ] FQE_20220420160010: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00016180165978365167, 'time_algorithm_update': 0.004135163717491682, 'loss': 0.5179634858301819, 'time_step': 0.004372293865957925, 'init_value': -14.924827575683594, 'ave_value': -19.314959911543976, 'soft_opc': nan} step=15824




2022-04-20 16:01.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160010/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:01.18 [info     ] FQE_20220420160010: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00016517140144525574, 'time_algorithm_update': 0.004494906164879023, 'loss': 0.5086020146778156, 'time_step': 0.00473677072414132, 'init_value': -14.763107299804688, 'ave_value': -19.375599462093433, 'soft_opc': nan} step=16168




2022-04-20 16:01.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160010/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:01.20 [info     ] FQE_20220420160010: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00016368613686672476, 'time_algorithm_update': 0.004044568815896678, 'loss': 0.49741108973001585, 'time_step': 0.004283346409021422, 'init_value': -14.984685897827148, 'ave_value': -19.738082457534855, 'soft_opc': nan} step=16512




2022-04-20 16:01.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160010/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:01.22 [info     ] FQE_20220420160010: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00016392940698668014, 'time_algorithm_update': 0.0044866862685181376, 'loss': 0.5004297914037611, 'time_step': 0.0047246966251107145, 'init_value': -14.586929321289062, 'ave_value': -19.436755570263376, 'soft_opc': nan} step=16856




2022-04-20 16:01.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160010/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:01.23 [info     ] FQE_20220420160010: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.0001623318638912467, 'time_algorithm_update': 0.0040670321431270865, 'loss': 0.493950089872923, 'time_step': 0.0043018134527428205, 'init_value': -14.599668502807617, 'ave_value': -19.601680331644058, 'soft_opc': nan} step=17200




2022-04-20 16:01.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160010/model_17200.pt
most optimal hyper params for cql at this point:  [0.006478216278601315, 0.006419595903539504, 6.329045586121242e-05, 3]
search iteration:  4
using hyper params:  [0.002643098754478146, 0.0006400621272207207, 1.4556128545760968e-05, 3]
2022-04-20 16:01.23 [debug    ] RoundIterator is selected.
2022-04-20 16:01.23 [info     ] Directory is created at d3rlpy_logs/CQL_20220420160123
2022-04-20 16:01.23 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 16:01.23 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 16:01.23 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220420160123/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_l

  minimum = torch.tensor(
  maximum = torch.tensor(


Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:01.33 [info     ] CQL_20220420160123: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.0003346853088914302, 'time_algorithm_update': 0.025597771705939756, 'temp_loss': 4.835251742636252, 'temp': 0.9975193127205497, 'alpha_loss': -17.82257324073747, 'alpha': 1.017560785276848, 'critic_loss': 49.800582272267484, 'actor_loss': 0.9040618193504667, 'time_step': 0.026026672787136503, 'td_error': 2.8496812809709966, 'init_value': -2.9981932640075684, 'ave_value': -2.3316973753595676} step=342
2022-04-20 16:01.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420160123/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:01.42 [info     ] CQL_20220420160123: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.0003410828740973222, 'time_algorithm_update': 0.025270727642795497, 'temp_loss': 4.566200590970223, 'temp': 0.9927301980250063, 'alpha_loss': -16.144452064358003, 'alpha': 1.052400959514038, 'critic_loss': 39.05216316312377, 'actor_loss': 2.403968065802814, 'time_step': 0.025707398938853838, 'td_error': 2.2257494449411834, 'init_value': -5.821019649505615, 'ave_value': -3.871144672752702} step=684
2022-04-20 16:01.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420160123/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:01.51 [info     ] CQL_20220420160123: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.0003429546690823739, 'time_algorithm_update': 0.025469860835382115, 'temp_loss': 4.44356444007472, 'temp': 0.987975839982953, 'alpha_loss': -13.216641763497515, 'alpha': 1.0842594208075986, 'critic_loss': 36.248275868376794, 'actor_loss': 4.9254432247396105, 'time_step': 0.02590640804223847, 'td_error': 2.93798380668569, 'init_value': -10.101576805114746, 'ave_value': -6.267643867930784} step=1026
2022-04-20 16:01.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420160123/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:02.00 [info     ] CQL_20220420160123: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.0003409831844575224, 'time_algorithm_update': 0.025454211653324597, 'temp_loss': 4.27251580723545, 'temp': 0.9832870290990461, 'alpha_loss': -11.082456558071382, 'alpha': 1.114053676002904, 'critic_loss': 33.33366948958726, 'actor_loss': 8.057705523674947, 'time_step': 0.02589282306314212, 'td_error': 3.823449363698782, 'init_value': -15.551528930664062, 'ave_value': -9.1501064137262} step=1368
2022-04-20 16:02.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420160123/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:02.10 [info     ] CQL_20220420160123: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.0003454329674704033, 'time_algorithm_update': 0.025462112928691664, 'temp_loss': 4.066600390345032, 'temp': 0.9787053071964554, 'alpha_loss': -9.455908136758191, 'alpha': 1.142775887634322, 'critic_loss': 32.91970333300139, 'actor_loss': 11.3256661403946, 'time_step': 0.025902987920749954, 'td_error': 5.027555501185671, 'init_value': -20.747451782226562, 'ave_value': -11.940071136057377} step=1710
2022-04-20 16:02.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420160123/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:02.19 [info     ] CQL_20220420160123: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.00033624130382872464, 'time_algorithm_update': 0.025354608457688003, 'temp_loss': 3.865570300503781, 'temp': 0.9742370767202991, 'alpha_loss': -8.199027545270864, 'alpha': 1.1707964867179157, 'critic_loss': 35.13078470955118, 'actor_loss': 14.504945214031732, 'time_step': 0.02578553958245885, 'td_error': 6.5285034500018995, 'init_value': -25.956424713134766, 'ave_value': -14.571427800368081} step=2052
2022-04-20 16:02.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420160123/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:02.28 [info     ] CQL_20220420160123: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.00035107344911809556, 'time_algorithm_update': 0.025236143703349152, 'temp_loss': 3.6817632286172164, 'temp': 0.9698572298239546, 'alpha_loss': -7.272280660986203, 'alpha': 1.1988194961296885, 'critic_loss': 38.581304304781014, 'actor_loss': 17.52678832952042, 'time_step': 0.025687182158754582, 'td_error': 8.001516748999622, 'init_value': -30.74553871154785, 'ave_value': -17.007304664821373} step=2394
2022-04-20 16:02.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420160123/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:02.37 [info     ] CQL_20220420160123: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.00033624966939290363, 'time_algorithm_update': 0.02542059254228023, 'temp_loss': 3.494398890182986, 'temp': 0.9655585142604092, 'alpha_loss': -6.425767368740505, 'alpha': 1.226900915653385, 'critic_loss': 42.22366416663454, 'actor_loss': 20.364420611955968, 'time_step': 0.025854290577403287, 'td_error': 9.884834189178461, 'init_value': -35.577857971191406, 'ave_value': -19.625629077711487} step=2736
2022-04-20 16:02.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420160123/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:02.46 [info     ] CQL_20220420160123: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.00034339664972316453, 'time_algorithm_update': 0.02532689752634506, 'temp_loss': 3.357253360469439, 'temp': 0.961301126675299, 'alpha_loss': -5.7060626619740535, 'alpha': 1.2550367594462388, 'critic_loss': 46.65781288816218, 'actor_loss': 23.044416271455106, 'time_step': 0.02576623395172476, 'td_error': 11.883360843469964, 'init_value': -39.66368865966797, 'ave_value': -21.607552316322387} step=3078
2022-04-20 16:02.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420160123/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:02.56 [info     ] CQL_20220420160123: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.0003422617215162132, 'time_algorithm_update': 0.025429123326351766, 'temp_loss': 3.207756719394037, 'temp': 0.9570869456606301, 'alpha_loss': -5.046697060267131, 'alpha': 1.283472546359949, 'critic_loss': 51.12813549153289, 'actor_loss': 25.5638898157934, 'time_step': 0.025866074868810107, 'td_error': 14.031497988033214, 'init_value': -44.14533996582031, 'ave_value': -23.83473666656125} step=3420
2022-04-20 16:02.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420160123/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:03.05 [info     ] CQL_20220420160123: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.00033887087950232434, 'time_algorithm_update': 0.025235324575189958, 'temp_loss': 3.0854301306239345, 'temp': 0.9528921849546377, 'alpha_loss': -4.390714114172416, 'alpha': 1.311683511873435, 'critic_loss': 56.19211759065327, 'actor_loss': 27.955136828952366, 'time_step': 0.025671332203156767, 'td_error': 15.904829129041943, 'init_value': -48.380409240722656, 'ave_value': -26.038950881252553} step=3762
2022-04-20 16:03.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420160123/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:03.14 [info     ] CQL_20220420160123: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.0003420602508455689, 'time_algorithm_update': 0.025634559971547267, 'temp_loss': 2.9461235163504615, 'temp': 0.9487320782496915, 'alpha_loss': -3.751893377443503, 'alpha': 1.3391769057825993, 'critic_loss': 61.11899461801986, 'actor_loss': 30.194550436142592, 'time_step': 0.026069662027191697, 'td_error': 18.06820065849582, 'init_value': -51.836631774902344, 'ave_value': -27.487039754530333} step=4104
2022-04-20 16:03.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420160123/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:03.23 [info     ] CQL_20220420160123: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.0003365773206565812, 'time_algorithm_update': 0.02521814449488768, 'temp_loss': 2.836843227085314, 'temp': 0.9445953163487172, 'alpha_loss': -3.1881030081221233, 'alpha': 1.3662164549381413, 'critic_loss': 66.01310595016034, 'actor_loss': 32.274367683812194, 'time_step': 0.025649276393198826, 'td_error': 20.258970010676556, 'init_value': -54.8915901184082, 'ave_value': -28.65881605347266} step=4446
2022-04-20 16:03.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420160123/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:03.33 [info     ] CQL_20220420160123: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.0003439606281748989, 'time_algorithm_update': 0.025548835246883637, 'temp_loss': 2.742530361253616, 'temp': 0.9404735056280392, 'alpha_loss': -2.665115873721477, 'alpha': 1.3919123327522946, 'critic_loss': 70.83266006157412, 'actor_loss': 34.217673357467206, 'time_step': 0.025989467637580737, 'td_error': 22.48639910980165, 'init_value': -58.81232833862305, 'ave_value': -31.042265363288625} step=4788
2022-04-20 16:03.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420160123/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:03.42 [info     ] CQL_20220420160123: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.0003407419773570278, 'time_algorithm_update': 0.02515141657221387, 'temp_loss': 2.634549303361547, 'temp': 0.9363456893036937, 'alpha_loss': -2.1217398899307813, 'alpha': 1.4167265038044132, 'critic_loss': 75.80831158509729, 'actor_loss': 36.08206128795245, 'time_step': 0.0255886901888931, 'td_error': 25.107698374496284, 'init_value': -61.9161491394043, 'ave_value': -32.19817344449461} step=5130
2022-04-20 16:03.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420160123/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:03.51 [info     ] CQL_20220420160123: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.0003363333250346937, 'time_algorithm_update': 0.025481578899405853, 'temp_loss': 2.527635643356725, 'temp': 0.9322576684909955, 'alpha_loss': -1.5717332363823069, 'alpha': 1.4378861432884171, 'critic_loss': 81.07280490272923, 'actor_loss': 37.79894145608645, 'time_step': 0.025910919869852344, 'td_error': 26.7308842927567, 'init_value': -64.71932220458984, 'ave_value': -33.46007537127883} step=5472
2022-04-20 16:03.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420160123/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:04.00 [info     ] CQL_20220420160123: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.00034355210979082433, 'time_algorithm_update': 0.024895092200117503, 'temp_loss': 2.4445128580283004, 'temp': 0.9281754291545578, 'alpha_loss': -1.078479363250802, 'alpha': 1.455895710061168, 'critic_loss': 86.38969510619404, 'actor_loss': 39.409571413408244, 'time_step': 0.025332724838926082, 'td_error': 28.943857070436326, 'init_value': -67.96208190917969, 'ave_value': -34.6993919741255} step=5814
2022-04-20 16:04.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420160123/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:04.09 [info     ] CQL_20220420160123: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.00034012571412917467, 'time_algorithm_update': 0.02373922219750477, 'temp_loss': 2.3808296511047766, 'temp': 0.9240894134630236, 'alpha_loss': -0.6053769910719643, 'alpha': 1.4689320074884515, 'critic_loss': 91.08992035625971, 'actor_loss': 40.947361003585726, 'time_step': 0.024178568382709348, 'td_error': 31.066923097982244, 'init_value': -71.16748046875, 'ave_value': -36.36343823781355} step=6156
2022-04-20 16:04.09 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420160123/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:04.18 [info     ] CQL_20220420160123: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.0003407280347500628, 'time_algorithm_update': 0.023653928996526706, 'temp_loss': 2.2930688739520066, 'temp': 0.9200090213477263, 'alpha_loss': -0.14849595836913812, 'alpha': 1.4758608362130952, 'critic_loss': 95.50026685034322, 'actor_loss': 42.44329082756712, 'time_step': 0.02409041694730346, 'td_error': 33.222596399314256, 'init_value': -73.85972595214844, 'ave_value': -37.7234672437474} step=6498
2022-04-20 16:04.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420160123/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:04.26 [info     ] CQL_20220420160123: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.0003361499797531039, 'time_algorithm_update': 0.023743669889126604, 'temp_loss': 2.2092354653174415, 'temp': 0.9159303504472588, 'alpha_loss': 0.31373220892304454, 'alpha': 1.474977543828083, 'critic_loss': 100.34338200440881, 'actor_loss': 43.84817227146082, 'time_step': 0.024176624783298427, 'td_error': 34.93864547847842, 'init_value': -75.9141845703125, 'ave_value': -38.45453630458798} step=6840
2022-04-20 16:04.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420160123/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:04.35 [info     ] CQL_20220420160123: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.00034068272127742654, 'time_algorithm_update': 0.023981095057481912, 'temp_loss': 2.1403516283509325, 'temp': 0.9118900699922216, 'alpha_loss': 0.7023198585194802, 'alpha': 1.4632030519825674, 'critic_loss': 104.83975627966095, 'actor_loss': 45.14917306732713, 'time_step': 0.024417120113707426, 'td_error': 36.956215309834, 'init_value': -78.28169250488281, 'ave_value': -39.34769920007334} step=7182
2022-04-20 16:04.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420160123/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:04.44 [info     ] CQL_20220420160123: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.00033684571584065753, 'time_algorithm_update': 0.023857948375724213, 'temp_loss': 2.0819879352000723, 'temp': 0.9078421719938691, 'alpha_loss': 1.036849469736175, 'alpha': 1.4400838488026668, 'critic_loss': 108.90978577820181, 'actor_loss': 46.40323373448779, 'time_step': 0.02428998375496669, 'td_error': 38.737534741491054, 'init_value': -81.19862365722656, 'ave_value': -40.80295671480599} step=7524
2022-04-20 16:04.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420160123/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:04.52 [info     ] CQL_20220420160123: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.00033545982070833617, 'time_algorithm_update': 0.0234603874864634, 'temp_loss': 2.0222699157675805, 'temp': 0.9037869569153814, 'alpha_loss': 1.4448837883530827, 'alpha': 1.4076241965879475, 'critic_loss': 113.30803066945215, 'actor_loss': 47.617030829714054, 'time_step': 0.02389080482616759, 'td_error': 40.7224220347692, 'init_value': -83.38140869140625, 'ave_value': -41.706816421290775} step=7866
2022-04-20 16:04.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420160123/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:05.01 [info     ] CQL_20220420160123: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.0003386903227421275, 'time_algorithm_update': 0.023915090058979235, 'temp_loss': 1.936126717001374, 'temp': 0.8997689279896474, 'alpha_loss': 1.8038418685897575, 'alpha': 1.3664108765752692, 'critic_loss': 117.98004467166655, 'actor_loss': 48.7884768881993, 'time_step': 0.024348914274695325, 'td_error': 41.72163393522774, 'init_value': -85.33296203613281, 'ave_value': -42.7876755831988} step=8208
2022-04-20 16:05.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420160123/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:05.10 [info     ] CQL_20220420160123: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.00033371211492527297, 'time_algorithm_update': 0.023834708141304596, 'temp_loss': 1.8867492118076972, 'temp': 0.8957651398683849, 'alpha_loss': 2.0703471722462545, 'alpha': 1.320951581349847, 'critic_loss': 123.06597670616462, 'actor_loss': 49.8590937497323, 'time_step': 0.02426199676000584, 'td_error': 43.38181144758092, 'init_value': -87.44331359863281, 'ave_value': -43.49650884307049} step=8550
2022-04-20 16:05.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420160123/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:05.18 [info     ] CQL_20220420160123: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.0003171022872478641, 'time_algorithm_update': 0.02228209777185094, 'temp_loss': 1.8533478961353413, 'temp': 0.8917563329314628, 'alpha_loss': 2.2434980344157993, 'alpha': 1.273864786178745, 'critic_loss': 127.46961056001005, 'actor_loss': 50.95625046400996, 'time_step': 0.022684075678998265, 'td_error': 44.946336245292386, 'init_value': -89.69529724121094, 'ave_value': -44.290487324488055} step=8892
2022-04-20 16:05.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420160123/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:05.27 [info     ] CQL_20220420160123: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.0003412125403420967, 'time_algorithm_update': 0.02396948226014076, 'temp_loss': 1.806722118492015, 'temp': 0.8877191688233649, 'alpha_loss': 2.382583697549781, 'alpha': 1.2294255407930117, 'critic_loss': 131.7914419787669, 'actor_loss': 52.02958199573539, 'time_step': 0.024405577726531447, 'td_error': 46.52994073589833, 'init_value': -91.44493103027344, 'ave_value': -45.52290159313126} step=9234
2022-04-20 16:05.27 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420160123/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:05.35 [info     ] CQL_20220420160123: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.0003386373408356605, 'time_algorithm_update': 0.024004928549827886, 'temp_loss': 1.7694080210568612, 'temp': 0.8836761805397725, 'alpha_loss': 2.480476394557116, 'alpha': 1.1866733421359146, 'critic_loss': 136.5521407991822, 'actor_loss': 52.969961768702454, 'time_step': 0.02443935787468626, 'td_error': 47.96240443080251, 'init_value': -92.93093872070312, 'ave_value': -45.86726415413189} step=9576
2022-04-20 16:05.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420160123/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:05.44 [info     ] CQL_20220420160123: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.000334882596779985, 'time_algorithm_update': 0.0240234701257003, 'temp_loss': 1.7047422388840836, 'temp': 0.8796760919149856, 'alpha_loss': 2.5848007676632783, 'alpha': 1.1447362345561647, 'critic_loss': 140.97738138834634, 'actor_loss': 53.91813058462756, 'time_step': 0.02444884303020455, 'td_error': 48.84998672791922, 'init_value': -95.28306579589844, 'ave_value': -47.38651163202014} step=9918
2022-04-20 16:05.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420160123/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:05.53 [info     ] CQL_20220420160123: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.00033900194000779536, 'time_algorithm_update': 0.02423323176757634, 'temp_loss': 1.6628017237311916, 'temp': 0.8757060574509247, 'alpha_loss': 2.6510167056531238, 'alpha': 1.1052175864838718, 'critic_loss': 145.29457942226477, 'actor_loss': 54.759433735183805, 'time_step': 0.024666515707272536, 'td_error': 50.010880814766246, 'init_value': -96.72149658203125, 'ave_value': -47.72431787041662} step=10260
2022-04-20 16:05.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420160123/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:06.02 [info     ] CQL_20220420160123: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.00034040874905056425, 'time_algorithm_update': 0.024067652852911698, 'temp_loss': 1.632825546097337, 'temp': 0.8717122924955267, 'alpha_loss': 2.705882676868912, 'alpha': 1.0681607294500919, 'critic_loss': 149.50711333960817, 'actor_loss': 55.59884809750562, 'time_step': 0.024499359186629804, 'td_error': 50.784404295067084, 'init_value': -97.64364624023438, 'ave_value': -48.45428071200176} step=10602
2022-04-20 16:06.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420160123/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:06.10 [info     ] CQL_20220420160123: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.00033815144098293015, 'time_algorithm_update': 0.02395990856906824, 'temp_loss': 1.6154687422060827, 'temp': 0.8677346608792132, 'alpha_loss': 2.745278796780179, 'alpha': 1.0316439936732689, 'critic_loss': 153.43761350397477, 'actor_loss': 56.41213817484895, 'time_step': 0.024392203977930616, 'td_error': 51.67820808634936, 'init_value': -100.22279357910156, 'ave_value': -49.878104697459186} step=10944
2022-04-20 16:06.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420160123/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:06.19 [info     ] CQL_20220420160123: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.0003480918226186295, 'time_algorithm_update': 0.023884029416312947, 'temp_loss': 1.5641723881688034, 'temp': 0.8637155644726335, 'alpha_loss': 2.8085018430362667, 'alpha': 0.9969615063123536, 'critic_loss': 157.85200850726568, 'actor_loss': 57.14217471518712, 'time_step': 0.02432933048895228, 'td_error': 52.75633605151388, 'init_value': -99.90044403076172, 'ave_value': -49.64222590035952} step=11286
2022-04-20 16:06.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420160123/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:06.28 [info     ] CQL_20220420160123: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.0003408855862087674, 'time_algorithm_update': 0.024144765926383392, 'temp_loss': 1.5296013763076381, 'temp': 0.8597628331323813, 'alpha_loss': 2.8395209950313234, 'alpha': 0.9634385014835157, 'critic_loss': 161.2633886950755, 'actor_loss': 57.84128967642087, 'time_step': 0.024579541725024842, 'td_error': 53.59609326513197, 'init_value': -101.212890625, 'ave_value': -50.313039738836586} step=11628
2022-04-20 16:06.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420160123/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:06.37 [info     ] CQL_20220420160123: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.00034111773061473467, 'time_algorithm_update': 0.02538101157249763, 'temp_loss': 1.5020092314446878, 'temp': 0.8558266433010324, 'alpha_loss': 2.822889439718068, 'alpha': 0.9311124403574313, 'critic_loss': 165.02247142234043, 'actor_loss': 58.48938264902572, 'time_step': 0.025813378785785875, 'td_error': 54.09115680152814, 'init_value': -102.69721984863281, 'ave_value': -51.201954928172064} step=11970
2022-04-20 16:06.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420160123/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:06.47 [info     ] CQL_20220420160123: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.00034405543790226096, 'time_algorithm_update': 0.025355020461723818, 'temp_loss': 1.476719065367827, 'temp': 0.8518626388402013, 'alpha_loss': 2.8333228754195554, 'alpha': 0.9007848558718698, 'critic_loss': 167.87897527148152, 'actor_loss': 59.125812463593064, 'time_step': 0.025791189126801072, 'td_error': 54.689336888693056, 'init_value': -103.6506118774414, 'ave_value': -51.61106929388535} step=12312
2022-04-20 16:06.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420160123/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:06.56 [info     ] CQL_20220420160123: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.000343428717719184, 'time_algorithm_update': 0.02548724238635504, 'temp_loss': 1.4505864036710638, 'temp': 0.8479256863482514, 'alpha_loss': 2.827229379823333, 'alpha': 0.8710894187291464, 'critic_loss': 171.28046983863877, 'actor_loss': 59.75774805169357, 'time_step': 0.02592584194495664, 'td_error': 55.9228861574416, 'init_value': -104.64845275878906, 'ave_value': -52.082024190184235} step=12654
2022-04-20 16:06.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420160123/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:07.05 [info     ] CQL_20220420160123: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.0003407377945749383, 'time_algorithm_update': 0.02576014730665419, 'temp_loss': 1.4151140606542776, 'temp': 0.8439842818076151, 'alpha_loss': 2.8164833707419055, 'alpha': 0.8428224726030004, 'critic_loss': 174.33927199157358, 'actor_loss': 60.37134722659462, 'time_step': 0.0261964728260598, 'td_error': 56.24863238676447, 'init_value': -105.69111633300781, 'ave_value': -52.78570148372704} step=12996
2022-04-20 16:07.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420160123/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:07.15 [info     ] CQL_20220420160123: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.00034524613653707224, 'time_algorithm_update': 0.025440642708226255, 'temp_loss': 1.3966848418029427, 'temp': 0.8400714888907316, 'alpha_loss': 2.7902327423904376, 'alpha': 0.8158542369192804, 'critic_loss': 177.3313297472502, 'actor_loss': 60.91675937942594, 'time_step': 0.02588358817741885, 'td_error': 56.60705299417875, 'init_value': -105.87068939208984, 'ave_value': -52.86165540818561} step=13338
2022-04-20 16:07.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420160123/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:07.24 [info     ] CQL_20220420160123: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.0003434273234584875, 'time_algorithm_update': 0.0257232307690626, 'temp_loss': 1.3618969493790676, 'temp': 0.836177839173211, 'alpha_loss': 2.7960089996718525, 'alpha': 0.7894857750650037, 'critic_loss': 179.59733586004603, 'actor_loss': 61.4743509348373, 'time_step': 0.026159837231998554, 'td_error': 57.07892683623603, 'init_value': -107.06571960449219, 'ave_value': -53.593776487375166} step=13680
2022-04-20 16:07.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420160123/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:07.33 [info     ] CQL_20220420160123: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.00034317077949033145, 'time_algorithm_update': 0.02559477474257263, 'temp_loss': 1.3507987743581247, 'temp': 0.8323014115380962, 'alpha_loss': 2.731991235972845, 'alpha': 0.7639813010107007, 'critic_loss': 182.14925090053626, 'actor_loss': 62.02160887690315, 'time_step': 0.026032817991156327, 'td_error': 57.76652140913393, 'init_value': -108.62113189697266, 'ave_value': -54.53678290224022} step=14022
2022-04-20 16:07.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420160123/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:07.42 [info     ] CQL_20220420160123: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.0003416036304674651, 'time_algorithm_update': 0.02538767265297516, 'temp_loss': 1.3260446767709408, 'temp': 0.8283942754854235, 'alpha_loss': 2.70681891873566, 'alpha': 0.7399158594552536, 'critic_loss': 184.35621138901737, 'actor_loss': 62.599843309636704, 'time_step': 0.025826579646060343, 'td_error': 58.219616097465384, 'init_value': -108.2115707397461, 'ave_value': -54.103281706109776} step=14364
2022-04-20 16:07.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420160123/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:07.52 [info     ] CQL_20220420160123: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.0003484738500494706, 'time_algorithm_update': 0.025626931971276714, 'temp_loss': 1.3046984217668836, 'temp': 0.8245502128238567, 'alpha_loss': 2.6676130934416897, 'alpha': 0.7163220814794128, 'critic_loss': 186.78344329477054, 'actor_loss': 63.10737993563825, 'time_step': 0.026069118265520063, 'td_error': 58.2954473266334, 'init_value': -109.1479263305664, 'ave_value': -54.89505531401516} step=14706
2022-04-20 16:07.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420160123/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:08.01 [info     ] CQL_20220420160123: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.000338504886069493, 'time_algorithm_update': 0.025342200234619496, 'temp_loss': 1.2903358070125357, 'temp': 0.8206744619280274, 'alpha_loss': 2.6176898439376677, 'alpha': 0.6939443682717998, 'critic_loss': 188.81413157502112, 'actor_loss': 63.62398073966043, 'time_step': 0.02577497596629182, 'td_error': 58.88531903949631, 'init_value': -109.58357238769531, 'ave_value': -55.413478438911945} step=15048
2022-04-20 16:08.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420160123/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:08.10 [info     ] CQL_20220420160123: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.00033901100270232264, 'time_algorithm_update': 0.025742635392306142, 'temp_loss': 1.2761782229992382, 'temp': 0.8168001438093464, 'alpha_loss': 2.562489814054199, 'alpha': 0.672422662813064, 'critic_loss': 189.96058177390293, 'actor_loss': 64.08713150024414, 'time_step': 0.026179793982477915, 'td_error': 58.51245907421845, 'init_value': -110.1116943359375, 'ave_value': -55.54266370368433} step=15390
2022-04-20 16:08.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420160123/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:08.19 [info     ] CQL_20220420160123: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.0003298869607044242, 'time_algorithm_update': 0.024490718953093592, 'temp_loss': 1.26886780516446, 'temp': 0.8129304467934614, 'alpha_loss': 2.5034057253982587, 'alpha': 0.6515683507361607, 'critic_loss': 191.6235730355246, 'actor_loss': 64.56928142748382, 'time_step': 0.024911557024682476, 'td_error': 59.22048527746434, 'init_value': -111.18244934082031, 'ave_value': -56.47199433299604} step=15732
2022-04-20 16:08.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420160123/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:08.29 [info     ] CQL_20220420160123: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.00034691436946043494, 'time_algorithm_update': 0.02585132149925009, 'temp_loss': 1.2506093185896066, 'temp': 0.8090772353417692, 'alpha_loss': 2.451741893563354, 'alpha': 0.6315309450640316, 'critic_loss': 193.64336439880014, 'actor_loss': 65.01705987272207, 'time_step': 0.02629501638356705, 'td_error': 59.49025848856028, 'init_value': -112.076171875, 'ave_value': -56.929244995404474} step=16074
2022-04-20 16:08.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420160123/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:08.38 [info     ] CQL_20220420160123: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.0003424611007958128, 'time_algorithm_update': 0.025379295934710586, 'temp_loss': 1.2410780982664453, 'temp': 0.8052434602327514, 'alpha_loss': 2.3833161720145517, 'alpha': 0.6120741810017859, 'critic_loss': 194.2429593627216, 'actor_loss': 65.4291853765298, 'time_step': 0.025820943347194737, 'td_error': 58.7311091253801, 'init_value': -112.4711685180664, 'ave_value': -57.152253735198364} step=16416
2022-04-20 16:08.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420160123/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:08.47 [info     ] CQL_20220420160123: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.0003482472826862893, 'time_algorithm_update': 0.025798350049738298, 'temp_loss': 1.253809212244045, 'temp': 0.8013677560446555, 'alpha_loss': 2.3118893179105737, 'alpha': 0.5935011204571752, 'critic_loss': 194.9105412444176, 'actor_loss': 65.9032195799532, 'time_step': 0.026244720520331846, 'td_error': 59.66300594507234, 'init_value': -113.05024719238281, 'ave_value': -57.89812328358894} step=16758
2022-04-20 16:08.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420160123/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:08.57 [info     ] CQL_20220420160123: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.00034610569825646473, 'time_algorithm_update': 0.025533975913510684, 'temp_loss': 1.2294498687250572, 'temp': 0.7974931920132442, 'alpha_loss': 2.231751546350836, 'alpha': 0.5756503301754332, 'critic_loss': 196.635968080041, 'actor_loss': 66.33578763370626, 'time_step': 0.025977067780076413, 'td_error': 60.1737202802252, 'init_value': -113.64918518066406, 'ave_value': -58.62837810255386} step=17100
2022-04-20 16:08.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420160123/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.51910

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 16:08.58 [info     ] FQE_20220420160857: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.0001583085002669369, 'time_algorithm_update': 0.004515531551407044, 'loss': 0.007116328875803535, 'time_step': 0.004750034895287939, 'init_value': -0.0876469761133194, 'ave_value': -0.03407860865464082, 'soft_opc': nan} step=166




2022-04-20 16:08.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160857/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:08.59 [info     ] FQE_20220420160857: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.0001614582107727786, 'time_algorithm_update': 0.004045948924788509, 'loss': 0.005956531186722757, 'time_step': 0.004280091768287751, 'init_value': -0.2663358747959137, 'ave_value': -0.1552718020463782, 'soft_opc': nan} step=332




2022-04-20 16:08.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160857/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:08.59 [info     ] FQE_20220420160857: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00015496633139001318, 'time_algorithm_update': 0.004449318690472339, 'loss': 0.005611732997651021, 'time_step': 0.004671809184982116, 'init_value': -0.3541930317878723, 'ave_value': -0.20101077546877366, 'soft_opc': nan} step=498




2022-04-20 16:08.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160857/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.00 [info     ] FQE_20220420160857: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00016006647822368577, 'time_algorithm_update': 0.004565082400678152, 'loss': 0.005655418290386358, 'time_step': 0.00479604824479804, 'init_value': -0.4885537326335907, 'ave_value': -0.2903966812928719, 'soft_opc': nan} step=664




2022-04-20 16:09.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160857/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.01 [info     ] FQE_20220420160857: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00015860724161906415, 'time_algorithm_update': 0.004464363477316247, 'loss': 0.005415341987959053, 'time_step': 0.004696808665631765, 'init_value': -0.5427619218826294, 'ave_value': -0.3175407173481208, 'soft_opc': nan} step=830




2022-04-20 16:09.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160857/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.02 [info     ] FQE_20220420160857: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00015448662171880883, 'time_algorithm_update': 0.003644653113491564, 'loss': 0.0054356056105637224, 'time_step': 0.003870352205023708, 'init_value': -0.6240552663803101, 'ave_value': -0.3783810040200347, 'soft_opc': nan} step=996




2022-04-20 16:09.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160857/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.03 [info     ] FQE_20220420160857: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.0001568047397107963, 'time_algorithm_update': 0.004540276814656085, 'loss': 0.00527084794832416, 'time_step': 0.004769220409623112, 'init_value': -0.6620388627052307, 'ave_value': -0.3928485399632304, 'soft_opc': nan} step=1162




2022-04-20 16:09.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160857/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.04 [info     ] FQE_20220420160857: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00016187759767095726, 'time_algorithm_update': 0.004581939743225833, 'loss': 0.005108397494698594, 'time_step': 0.0048188157828457385, 'init_value': -0.7275447845458984, 'ave_value': -0.4482430477953843, 'soft_opc': nan} step=1328




2022-04-20 16:09.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160857/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.04 [info     ] FQE_20220420160857: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00016011100217520473, 'time_algorithm_update': 0.003940902560590261, 'loss': 0.004882811506963279, 'time_step': 0.004174854381974921, 'init_value': -0.7371796369552612, 'ave_value': -0.44567707203462853, 'soft_opc': nan} step=1494




2022-04-20 16:09.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160857/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.05 [info     ] FQE_20220420160857: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.0001587623573211302, 'time_algorithm_update': 0.004369299095797251, 'loss': 0.004934963118705719, 'time_step': 0.004600405693054199, 'init_value': -0.8177742958068848, 'ave_value': -0.5163700722342541, 'soft_opc': nan} step=1660




2022-04-20 16:09.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160857/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.06 [info     ] FQE_20220420160857: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00016453467219708915, 'time_algorithm_update': 0.0045226869812930925, 'loss': 0.004599093058404614, 'time_step': 0.004758796059941671, 'init_value': -0.8592890501022339, 'ave_value': -0.5459109702248227, 'soft_opc': nan} step=1826




2022-04-20 16:09.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160857/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.07 [info     ] FQE_20220420160857: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00016006935073668697, 'time_algorithm_update': 0.004551032939589167, 'loss': 0.004734967694286795, 'time_step': 0.004782515836049275, 'init_value': -0.9030000567436218, 'ave_value': -0.5702702478648306, 'soft_opc': nan} step=1992




2022-04-20 16:09.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160857/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.08 [info     ] FQE_20220420160857: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00015855553638504212, 'time_algorithm_update': 0.0035625997796116106, 'loss': 0.004850575922713447, 'time_step': 0.0037919383451163052, 'init_value': -0.9849746823310852, 'ave_value': -0.6455541645948616, 'soft_opc': nan} step=2158




2022-04-20 16:09.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160857/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.08 [info     ] FQE_20220420160857: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00016333108924957644, 'time_algorithm_update': 0.004548578377229622, 'loss': 0.004882758032356624, 'time_step': 0.004789106817130583, 'init_value': -1.072167158126831, 'ave_value': -0.7221309786511434, 'soft_opc': nan} step=2324




2022-04-20 16:09.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160857/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.09 [info     ] FQE_20220420160857: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00015927510089184865, 'time_algorithm_update': 0.0045762263148663995, 'loss': 0.005020285868899426, 'time_step': 0.004807674741170493, 'init_value': -1.0967519283294678, 'ave_value': -0.745541361721887, 'soft_opc': nan} step=2490




2022-04-20 16:09.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160857/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.10 [info     ] FQE_20220420160857: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00015926073832684253, 'time_algorithm_update': 0.004040071763188006, 'loss': 0.005397050713710712, 'time_step': 0.004275402390813253, 'init_value': -1.1919368505477905, 'ave_value': -0.8265160096238728, 'soft_opc': nan} step=2656




2022-04-20 16:09.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160857/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.11 [info     ] FQE_20220420160857: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.0001579350735767778, 'time_algorithm_update': 0.004275166844747153, 'loss': 0.005711529452314444, 'time_step': 0.004502411348273955, 'init_value': -1.2823154926300049, 'ave_value': -0.9044757584810492, 'soft_opc': nan} step=2822




2022-04-20 16:09.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160857/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.12 [info     ] FQE_20220420160857: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00015952788203595634, 'time_algorithm_update': 0.004579953400485487, 'loss': 0.005752638743977141, 'time_step': 0.004815246685441718, 'init_value': -1.3227466344833374, 'ave_value': -0.9273319159936462, 'soft_opc': nan} step=2988




2022-04-20 16:09.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160857/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.13 [info     ] FQE_20220420160857: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00016358961541968655, 'time_algorithm_update': 0.004514462976570589, 'loss': 0.006279546219058873, 'time_step': 0.004751889102430229, 'init_value': -1.4843716621398926, 'ave_value': -1.0677732532818776, 'soft_opc': nan} step=3154




2022-04-20 16:09.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160857/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.13 [info     ] FQE_20220420160857: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00015962841999099916, 'time_algorithm_update': 0.0037132415426782816, 'loss': 0.00660650764384408, 'time_step': 0.003943559635116394, 'init_value': -1.5106651782989502, 'ave_value': -1.0973892482836638, 'soft_opc': nan} step=3320




2022-04-20 16:09.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160857/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.14 [info     ] FQE_20220420160857: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00016195659177849092, 'time_algorithm_update': 0.004538299089454743, 'loss': 0.006972023394275793, 'time_step': 0.004775139222662133, 'init_value': -1.5960705280303955, 'ave_value': -1.1760318861405894, 'soft_opc': nan} step=3486




2022-04-20 16:09.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160857/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.15 [info     ] FQE_20220420160857: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00016163487032235386, 'time_algorithm_update': 0.004532666091459343, 'loss': 0.007709280205525862, 'time_step': 0.0047687837876469255, 'init_value': -1.660179615020752, 'ave_value': -1.193054405872517, 'soft_opc': nan} step=3652




2022-04-20 16:09.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160857/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.16 [info     ] FQE_20220420160857: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00015890885548419263, 'time_algorithm_update': 0.00413271461624697, 'loss': 0.00827161731627624, 'time_step': 0.004364984581269413, 'init_value': -1.8344799280166626, 'ave_value': -1.3724698267356903, 'soft_opc': nan} step=3818




2022-04-20 16:09.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160857/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.17 [info     ] FQE_20220420160857: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00016070561236645802, 'time_algorithm_update': 0.0043003243136118694, 'loss': 0.009250457624411098, 'time_step': 0.004530763051596032, 'init_value': -1.862595796585083, 'ave_value': -1.4007405216553928, 'soft_opc': nan} step=3984




2022-04-20 16:09.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160857/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.18 [info     ] FQE_20220420160857: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00016181440238493034, 'time_algorithm_update': 0.004543225449251841, 'loss': 0.009260695347007006, 'time_step': 0.004777758954519249, 'init_value': -1.9354021549224854, 'ave_value': -1.4531761111758716, 'soft_opc': nan} step=4150




2022-04-20 16:09.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160857/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.18 [info     ] FQE_20220420160857: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.000160557677946895, 'time_algorithm_update': 0.004551452326487346, 'loss': 0.009687719087492316, 'time_step': 0.004788388688880277, 'init_value': -2.033743143081665, 'ave_value': -1.506774487672793, 'soft_opc': nan} step=4316




2022-04-20 16:09.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160857/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.19 [info     ] FQE_20220420160857: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00016458781368761178, 'time_algorithm_update': 0.003698202500860375, 'loss': 0.010411007229536098, 'time_step': 0.003934988056320742, 'init_value': -2.0843124389648438, 'ave_value': -1.5387088354450424, 'soft_opc': nan} step=4482




2022-04-20 16:09.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160857/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.20 [info     ] FQE_20220420160857: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00016159178262733552, 'time_algorithm_update': 0.00461383469133492, 'loss': 0.011459406273837864, 'time_step': 0.004849953823779003, 'init_value': -2.1959986686706543, 'ave_value': -1.5928395732419214, 'soft_opc': nan} step=4648




2022-04-20 16:09.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160857/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.21 [info     ] FQE_20220420160857: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.0001601440360747188, 'time_algorithm_update': 0.00459233249526426, 'loss': 0.011947025312110782, 'time_step': 0.004824675709368235, 'init_value': -2.3181257247924805, 'ave_value': -1.700399683422602, 'soft_opc': nan} step=4814




2022-04-20 16:09.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160857/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.22 [info     ] FQE_20220420160857: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00015781011926122458, 'time_algorithm_update': 0.004139282617224268, 'loss': 0.012901905013096845, 'time_step': 0.004366423710283026, 'init_value': -2.4291388988494873, 'ave_value': -1.7867505465785192, 'soft_opc': nan} step=4980




2022-04-20 16:09.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160857/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.22 [info     ] FQE_20220420160857: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.0001529412097241505, 'time_algorithm_update': 0.002900616232171116, 'loss': 0.013802807135030582, 'time_step': 0.003125768109976527, 'init_value': -2.599738121032715, 'ave_value': -1.8980616667945576, 'soft_opc': nan} step=5146




2022-04-20 16:09.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160857/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.23 [info     ] FQE_20220420160857: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00015521911253412086, 'time_algorithm_update': 0.003359031964497394, 'loss': 0.014871599974604436, 'time_step': 0.0035853515188378022, 'init_value': -2.7288949489593506, 'ave_value': -2.02660932728962, 'soft_opc': nan} step=5312




2022-04-20 16:09.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160857/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.24 [info     ] FQE_20220420160857: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.0001550194728805358, 'time_algorithm_update': 0.003417039492044104, 'loss': 0.015678223064000976, 'time_step': 0.003639238426484257, 'init_value': -2.7687315940856934, 'ave_value': -2.0239430606847644, 'soft_opc': nan} step=5478




2022-04-20 16:09.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160857/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.24 [info     ] FQE_20220420160857: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00015518033360860435, 'time_algorithm_update': 0.003529150801968862, 'loss': 0.016418343785089856, 'time_step': 0.003756740007055811, 'init_value': -2.90248966217041, 'ave_value': -2.121041334598317, 'soft_opc': nan} step=5644




2022-04-20 16:09.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160857/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.25 [info     ] FQE_20220420160857: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.0001556973859488246, 'time_algorithm_update': 0.003417802144245929, 'loss': 0.017830771084100635, 'time_step': 0.0036422186587230267, 'init_value': -3.1250414848327637, 'ave_value': -2.330552252236049, 'soft_opc': nan} step=5810




2022-04-20 16:09.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160857/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.26 [info     ] FQE_20220420160857: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.0001548255782529532, 'time_algorithm_update': 0.003396298511918769, 'loss': 0.018862955601106072, 'time_step': 0.003623488437698548, 'init_value': -3.098968029022217, 'ave_value': -2.225509658918154, 'soft_opc': nan} step=5976




2022-04-20 16:09.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160857/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.26 [info     ] FQE_20220420160857: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00015625034470156016, 'time_algorithm_update': 0.0034135077373090997, 'loss': 0.019193093181610197, 'time_step': 0.0036427572549107565, 'init_value': -3.1912317276000977, 'ave_value': -2.2800917988189973, 'soft_opc': nan} step=6142




2022-04-20 16:09.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160857/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.27 [info     ] FQE_20220420160857: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00016064959836293417, 'time_algorithm_update': 0.0035178259194615378, 'loss': 0.020283259201026242, 'time_step': 0.003752571990691036, 'init_value': -3.2684578895568848, 'ave_value': -2.3384868583528675, 'soft_opc': nan} step=6308




2022-04-20 16:09.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160857/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.28 [info     ] FQE_20220420160857: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.0001573117382555123, 'time_algorithm_update': 0.0034315801528562984, 'loss': 0.021197654659641033, 'time_step': 0.00365924547953778, 'init_value': -3.3535571098327637, 'ave_value': -2.383782896511928, 'soft_opc': nan} step=6474




2022-04-20 16:09.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160857/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.28 [info     ] FQE_20220420160857: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00015519325991710984, 'time_algorithm_update': 0.0035024120146969714, 'loss': 0.02219357698347913, 'time_step': 0.0037290116390549994, 'init_value': -3.4499130249023438, 'ave_value': -2.4374846153171013, 'soft_opc': nan} step=6640




2022-04-20 16:09.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160857/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.29 [info     ] FQE_20220420160857: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.0001545670520828431, 'time_algorithm_update': 0.003455073000436806, 'loss': 0.023623975751927144, 'time_step': 0.0036792826939778156, 'init_value': -3.5630478858947754, 'ave_value': -2.508275387563684, 'soft_opc': nan} step=6806




2022-04-20 16:09.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160857/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.30 [info     ] FQE_20220420160857: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.0001556858958968197, 'time_algorithm_update': 0.0034954634057470114, 'loss': 0.023421033675479423, 'time_step': 0.0037238123905227846, 'init_value': -3.648895263671875, 'ave_value': -2.548370978630542, 'soft_opc': nan} step=6972




2022-04-20 16:09.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160857/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.31 [info     ] FQE_20220420160857: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00015668553042124552, 'time_algorithm_update': 0.0035313410931322948, 'loss': 0.02594358781885625, 'time_step': 0.0037612757050847433, 'init_value': -3.7797582149505615, 'ave_value': -2.60841361622217, 'soft_opc': nan} step=7138




2022-04-20 16:09.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160857/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.31 [info     ] FQE_20220420160857: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.0001614697008247835, 'time_algorithm_update': 0.003398026328489005, 'loss': 0.026704077608182084, 'time_step': 0.0036318560680711127, 'init_value': -3.9549975395202637, 'ave_value': -2.7509951191035578, 'soft_opc': nan} step=7304




2022-04-20 16:09.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160857/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.32 [info     ] FQE_20220420160857: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00015775410525770072, 'time_algorithm_update': 0.0035234316285834254, 'loss': 0.028496277602726078, 'time_step': 0.0037543673113168002, 'init_value': -4.011091709136963, 'ave_value': -2.755965738326546, 'soft_opc': nan} step=7470




2022-04-20 16:09.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160857/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.33 [info     ] FQE_20220420160857: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00015986683857010072, 'time_algorithm_update': 0.0035281712750354446, 'loss': 0.028948325055780405, 'time_step': 0.0037611191531261765, 'init_value': -4.17473030090332, 'ave_value': -2.852532990456785, 'soft_opc': nan} step=7636




2022-04-20 16:09.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160857/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.33 [info     ] FQE_20220420160857: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00015864745680108127, 'time_algorithm_update': 0.003521346184144537, 'loss': 0.030458590570495027, 'time_step': 0.003753222614885813, 'init_value': -4.255401611328125, 'ave_value': -2.8772232063781673, 'soft_opc': nan} step=7802




2022-04-20 16:09.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160857/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.34 [info     ] FQE_20220420160857: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00015883560640266142, 'time_algorithm_update': 0.003609276679624994, 'loss': 0.03199012250168227, 'time_step': 0.0038436694317553416, 'init_value': -4.3471999168396, 'ave_value': -2.925936247500683, 'soft_opc': nan} step=7968




2022-04-20 16:09.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160857/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.35 [info     ] FQE_20220420160857: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.0001566840941647449, 'time_algorithm_update': 0.003483663122337985, 'loss': 0.03348522130904876, 'time_step': 0.0037120121071137577, 'init_value': -4.45676326751709, 'ave_value': -2.9604219867680053, 'soft_opc': nan} step=8134




2022-04-20 16:09.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160857/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:09.35 [info     ] FQE_20220420160857: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00016097850110157426, 'time_algorithm_update': 0.0035640460899077266, 'loss': 0.03361138204780284, 'time_step': 0.003795768841203437, 'init_value': -4.557843208312988, 'ave_value': -2.9935628653575272, 'soft_opc': nan} step=8300




2022-04-20 16:09.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160857/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

2022-04-20 16:09.36 [info     ] Directory is created at d3rlpy_logs/FQE_20220420160936
2022-04-20 16:09.36 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 16:09.36 [debug    ] Building models...
2022-04-20 16:09.36 [debug    ] Models have been built.
2022-04-20 16:09.36 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420160936/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 16:09.37 [info     ] FQE_20220420160936: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.0001611792763998342, 'time_algorithm_update': 0.0034543705541034077, 'loss': 0.024433483190964476, 'time_step': 0.0036897250386171564, 'init_value': -1.4200184345245361, 'ave_value': -1.397778149311607, 'soft_opc': nan} step=344




2022-04-20 16:09.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160936/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:09.38 [info     ] FQE_20220420160936: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00015945282093314238, 'time_algorithm_update': 0.003462397081907405, 'loss': 0.02306374464573902, 'time_step': 0.0036927031916241314, 'init_value': -2.3578600883483887, 'ave_value': -2.3441202977219144, 'soft_opc': nan} step=688




2022-04-20 16:09.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160936/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:09.40 [info     ] FQE_20220420160936: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.0001620671083760816, 'time_algorithm_update': 0.003461296475210855, 'loss': 0.02648159213970567, 'time_step': 0.0036979615688323975, 'init_value': -3.4639029502868652, 'ave_value': -3.5011136405237084, 'soft_opc': nan} step=1032




2022-04-20 16:09.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160936/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:09.41 [info     ] FQE_20220420160936: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.0001620740391487299, 'time_algorithm_update': 0.0034390958242638166, 'loss': 0.02929331404481863, 'time_step': 0.003673945748528769, 'init_value': -4.202824592590332, 'ave_value': -4.302104219069352, 'soft_opc': nan} step=1376




2022-04-20 16:09.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160936/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:09.43 [info     ] FQE_20220420160936: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00016274978948193928, 'time_algorithm_update': 0.0034595686335896336, 'loss': 0.037234449943224356, 'time_step': 0.0036969912606616352, 'init_value': -5.163654327392578, 'ave_value': -5.347082129469863, 'soft_opc': nan} step=1720




2022-04-20 16:09.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160936/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:09.44 [info     ] FQE_20220420160936: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00016259523325188216, 'time_algorithm_update': 0.0034969532212545703, 'loss': 0.04497299701527714, 'time_step': 0.003733083952304929, 'init_value': -5.88343620300293, 'ave_value': -6.150222531994721, 'soft_opc': nan} step=2064




2022-04-20 16:09.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160936/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:09.45 [info     ] FQE_20220420160936: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00016274077947749647, 'time_algorithm_update': 0.003518145444781281, 'loss': 0.05421831529468384, 'time_step': 0.003756145405214886, 'init_value': -6.918133735656738, 'ave_value': -7.244940175478523, 'soft_opc': nan} step=2408




2022-04-20 16:09.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160936/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:09.47 [info     ] FQE_20220420160936: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00017043878865796467, 'time_algorithm_update': 0.003470505392828653, 'loss': 0.06581937953515715, 'time_step': 0.0037172713945078295, 'init_value': -7.542811393737793, 'ave_value': -7.945952980018951, 'soft_opc': nan} step=2752




2022-04-20 16:09.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160936/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:09.48 [info     ] FQE_20220420160936: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.0001629022664802019, 'time_algorithm_update': 0.003454839767411698, 'loss': 0.07760765476607133, 'time_step': 0.003692604081575261, 'init_value': -8.095601081848145, 'ave_value': -8.624086650078361, 'soft_opc': nan} step=3096




2022-04-20 16:09.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160936/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:09.50 [info     ] FQE_20220420160936: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00016281840413115745, 'time_algorithm_update': 0.0034179313238276994, 'loss': 0.09343485146501036, 'time_step': 0.003656980603240257, 'init_value': -8.917654037475586, 'ave_value': -9.499429806194327, 'soft_opc': nan} step=3440




2022-04-20 16:09.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160936/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:09.51 [info     ] FQE_20220420160936: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.0001592878685441128, 'time_algorithm_update': 0.003487233505692593, 'loss': 0.10732382846194817, 'time_step': 0.0037167827750361243, 'init_value': -9.504354476928711, 'ave_value': -10.17407951161668, 'soft_opc': nan} step=3784




2022-04-20 16:09.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160936/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:09.52 [info     ] FQE_20220420160936: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.0001601133235665255, 'time_algorithm_update': 0.003403904826142067, 'loss': 0.12898625386608115, 'time_step': 0.0036360157090564106, 'init_value': -9.938797950744629, 'ave_value': -10.733750692721422, 'soft_opc': nan} step=4128




2022-04-20 16:09.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160936/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:09.54 [info     ] FQE_20220420160936: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00016164086585821106, 'time_algorithm_update': 0.0034312709819438844, 'loss': 0.14734669054078675, 'time_step': 0.003662655519884686, 'init_value': -10.457115173339844, 'ave_value': -11.314625428267965, 'soft_opc': nan} step=4472




2022-04-20 16:09.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160936/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:09.55 [info     ] FQE_20220420160936: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00015984233035597691, 'time_algorithm_update': 0.003441583278567292, 'loss': 0.16821388172278137, 'time_step': 0.0036742922871611837, 'init_value': -11.379417419433594, 'ave_value': -12.281522278581654, 'soft_opc': nan} step=4816




2022-04-20 16:09.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160936/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:09.56 [info     ] FQE_20220420160936: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00016308038733726325, 'time_algorithm_update': 0.003464084031970002, 'loss': 0.19341693490440415, 'time_step': 0.003698869500049325, 'init_value': -11.473443984985352, 'ave_value': -12.482497038255941, 'soft_opc': nan} step=5160




2022-04-20 16:09.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160936/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:09.58 [info     ] FQE_20220420160936: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00016416297402492788, 'time_algorithm_update': 0.002999149089635805, 'loss': 0.2115889708565678, 'time_step': 0.003237487271774647, 'init_value': -12.16582202911377, 'ave_value': -13.187159513406925, 'soft_opc': nan} step=5504




2022-04-20 16:09.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160936/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:09.59 [info     ] FQE_20220420160936: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00016081263852673908, 'time_algorithm_update': 0.0033817450667536536, 'loss': 0.24032666766474586, 'time_step': 0.0036159545876259026, 'init_value': -12.515776634216309, 'ave_value': -13.641010672283603, 'soft_opc': nan} step=5848




2022-04-20 16:09.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160936/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.00 [info     ] FQE_20220420160936: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00016334583592969318, 'time_algorithm_update': 0.0034136827601942907, 'loss': 0.2609288690633379, 'time_step': 0.0036509376625682034, 'init_value': -12.888404846191406, 'ave_value': -14.165573625897503, 'soft_opc': nan} step=6192




2022-04-20 16:10.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160936/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.02 [info     ] FQE_20220420160936: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00016053055607995323, 'time_algorithm_update': 0.003488886494969213, 'loss': 0.294264092651564, 'time_step': 0.003724376129549603, 'init_value': -13.099180221557617, 'ave_value': -14.456967055529088, 'soft_opc': nan} step=6536




2022-04-20 16:10.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160936/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.03 [info     ] FQE_20220420160936: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00016320167585860852, 'time_algorithm_update': 0.003456273744272631, 'loss': 0.313225319414118, 'time_step': 0.0036959100601285004, 'init_value': -13.577882766723633, 'ave_value': -15.123328084693298, 'soft_opc': nan} step=6880




2022-04-20 16:10.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160936/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.05 [info     ] FQE_20220420160936: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.0001635440560274346, 'time_algorithm_update': 0.003445931645326836, 'loss': 0.335620837735532, 'time_step': 0.003682677135911099, 'init_value': -13.703023910522461, 'ave_value': -15.460756475861007, 'soft_opc': nan} step=7224




2022-04-20 16:10.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160936/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.06 [info     ] FQE_20220420160936: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.0001628010771995367, 'time_algorithm_update': 0.0034677545691645423, 'loss': 0.355624380202011, 'time_step': 0.0037041867888251015, 'init_value': -13.999858856201172, 'ave_value': -15.933163561235677, 'soft_opc': nan} step=7568




2022-04-20 16:10.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160936/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.07 [info     ] FQE_20220420160936: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.0001607627369636713, 'time_algorithm_update': 0.003468044968538506, 'loss': 0.38757598427730766, 'time_step': 0.0037030009336249774, 'init_value': -14.264359474182129, 'ave_value': -16.32020684735732, 'soft_opc': nan} step=7912




2022-04-20 16:10.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160936/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.09 [info     ] FQE_20220420160936: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.0001639079215914704, 'time_algorithm_update': 0.003436377575231153, 'loss': 0.4076400871745982, 'time_step': 0.0036758031955985136, 'init_value': -14.524749755859375, 'ave_value': -16.827228859040115, 'soft_opc': nan} step=8256




2022-04-20 16:10.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160936/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.10 [info     ] FQE_20220420160936: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00016551031622775766, 'time_algorithm_update': 0.003495926080748092, 'loss': 0.42481248796549304, 'time_step': 0.00373703726502352, 'init_value': -14.739991188049316, 'ave_value': -17.377168825564084, 'soft_opc': nan} step=8600




2022-04-20 16:10.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160936/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.12 [info     ] FQE_20220420160936: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.0001654077407925628, 'time_algorithm_update': 0.00350270853486172, 'loss': 0.4415907504383561, 'time_step': 0.003742671983186589, 'init_value': -14.953292846679688, 'ave_value': -17.763691282970413, 'soft_opc': nan} step=8944




2022-04-20 16:10.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160936/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.13 [info     ] FQE_20220420160936: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00016913095185923022, 'time_algorithm_update': 0.0035270743591840877, 'loss': 0.4587662107213725, 'time_step': 0.0037710534971813823, 'init_value': -15.176231384277344, 'ave_value': -18.18877860844672, 'soft_opc': nan} step=9288




2022-04-20 16:10.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160936/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.14 [info     ] FQE_20220420160936: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00016316979430442633, 'time_algorithm_update': 0.0035543330880098566, 'loss': 0.4736675882636201, 'time_step': 0.0037905760975771173, 'init_value': -15.156320571899414, 'ave_value': -18.217290409674515, 'soft_opc': nan} step=9632




2022-04-20 16:10.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160936/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.16 [info     ] FQE_20220420160936: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00016442703646282818, 'time_algorithm_update': 0.003536706053933432, 'loss': 0.47697638887587157, 'time_step': 0.003777454758799353, 'init_value': -15.189469337463379, 'ave_value': -18.45840040160729, 'soft_opc': nan} step=9976




2022-04-20 16:10.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160936/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.17 [info     ] FQE_20220420160936: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00016523932301720909, 'time_algorithm_update': 0.003476201794868292, 'loss': 0.5048903515399975, 'time_step': 0.003716791091963302, 'init_value': -15.734821319580078, 'ave_value': -19.143946129513218, 'soft_opc': nan} step=10320




2022-04-20 16:10.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160936/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.19 [info     ] FQE_20220420160936: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00016170393588931062, 'time_algorithm_update': 0.0034870879594669783, 'loss': 0.5140236492571962, 'time_step': 0.0037242936533550884, 'init_value': -15.675029754638672, 'ave_value': -19.26990876627398, 'soft_opc': nan} step=10664




2022-04-20 16:10.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160936/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.20 [info     ] FQE_20220420160936: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.0001633881136428478, 'time_algorithm_update': 0.0035344625628271767, 'loss': 0.5290730495200774, 'time_step': 0.0037739526393801666, 'init_value': -15.859195709228516, 'ave_value': -19.688913431844195, 'soft_opc': nan} step=11008




2022-04-20 16:10.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160936/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.21 [info     ] FQE_20220420160936: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00016438961029052734, 'time_algorithm_update': 0.0035017347613046338, 'loss': 0.5381494104634797, 'time_step': 0.0037389245144156522, 'init_value': -15.739876747131348, 'ave_value': -19.72035506324725, 'soft_opc': nan} step=11352




2022-04-20 16:10.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160936/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.23 [info     ] FQE_20220420160936: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00016167967818504157, 'time_algorithm_update': 0.00343222742856935, 'loss': 0.5435068244331105, 'time_step': 0.0036695495594379515, 'init_value': -15.585397720336914, 'ave_value': -19.767277368715217, 'soft_opc': nan} step=11696




2022-04-20 16:10.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160936/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.24 [info     ] FQE_20220420160936: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00016271513561869777, 'time_algorithm_update': 0.003459897152213163, 'loss': 0.5381097412711486, 'time_step': 0.0036963813526685848, 'init_value': -15.184751510620117, 'ave_value': -19.598373342728294, 'soft_opc': nan} step=12040




2022-04-20 16:10.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160936/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.25 [info     ] FQE_20220420160936: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00016172334205272586, 'time_algorithm_update': 0.0033788327560868373, 'loss': 0.536287798605816, 'time_step': 0.003615578939748365, 'init_value': -15.108884811401367, 'ave_value': -19.607322969294348, 'soft_opc': nan} step=12384




2022-04-20 16:10.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160936/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.27 [info     ] FQE_20220420160936: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00015779913857925769, 'time_algorithm_update': 0.0034304919630982157, 'loss': 0.5536978746017138, 'time_step': 0.0036616803601730702, 'init_value': -15.296485900878906, 'ave_value': -20.148839021024404, 'soft_opc': nan} step=12728




2022-04-20 16:10.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160936/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.28 [info     ] FQE_20220420160936: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00016405069550802542, 'time_algorithm_update': 0.0034516065619712654, 'loss': 0.5604231578438694, 'time_step': 0.0036901450434396435, 'init_value': -15.371284484863281, 'ave_value': -20.268258415149138, 'soft_opc': nan} step=13072




2022-04-20 16:10.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160936/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.30 [info     ] FQE_20220420160936: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00016084174777186193, 'time_algorithm_update': 0.0035178564315618472, 'loss': 0.5664452304684578, 'time_step': 0.003753266362256782, 'init_value': -15.464548110961914, 'ave_value': -20.504071983278873, 'soft_opc': nan} step=13416




2022-04-20 16:10.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160936/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.31 [info     ] FQE_20220420160936: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00016024154286051906, 'time_algorithm_update': 0.003437511449636415, 'loss': 0.581079920797154, 'time_step': 0.0036695287671200064, 'init_value': -15.439658164978027, 'ave_value': -20.549646482673715, 'soft_opc': nan} step=13760




2022-04-20 16:10.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160936/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.32 [info     ] FQE_20220420160936: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.0001654999200687852, 'time_algorithm_update': 0.003520014674164528, 'loss': 0.5893026280195214, 'time_step': 0.0037586910780086076, 'init_value': -15.428871154785156, 'ave_value': -20.49337517612085, 'soft_opc': nan} step=14104




2022-04-20 16:10.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160936/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.34 [info     ] FQE_20220420160936: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.0001633000928302144, 'time_algorithm_update': 0.003495699444482493, 'loss': 0.5928569103396216, 'time_step': 0.0037341907966968626, 'init_value': -15.273307800292969, 'ave_value': -20.51340198606454, 'soft_opc': nan} step=14448




2022-04-20 16:10.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160936/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.35 [info     ] FQE_20220420160936: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00016191671060961346, 'time_algorithm_update': 0.0035415742286415987, 'loss': 0.5982769538031154, 'time_step': 0.003779729438382526, 'init_value': -15.279031753540039, 'ave_value': -20.621143996094663, 'soft_opc': nan} step=14792




2022-04-20 16:10.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160936/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.37 [info     ] FQE_20220420160936: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00016109264174173044, 'time_algorithm_update': 0.003426676572755326, 'loss': 0.6153980060372242, 'time_step': 0.003662913344627203, 'init_value': -15.37331771850586, 'ave_value': -20.7255322774352, 'soft_opc': nan} step=15136




2022-04-20 16:10.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160936/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.38 [info     ] FQE_20220420160936: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00016077590543170307, 'time_algorithm_update': 0.0035457292268442553, 'loss': 0.6150475617519818, 'time_step': 0.0037818606509718786, 'init_value': -14.998262405395508, 'ave_value': -20.473761462593064, 'soft_opc': nan} step=15480




2022-04-20 16:10.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160936/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.39 [info     ] FQE_20220420160936: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00016372494919355526, 'time_algorithm_update': 0.0035313499528308247, 'loss': 0.6448649418713568, 'time_step': 0.0037698863550674083, 'init_value': -15.38726806640625, 'ave_value': -20.952621822976212, 'soft_opc': nan} step=15824




2022-04-20 16:10.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160936/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.41 [info     ] FQE_20220420160936: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.0001612735349078511, 'time_algorithm_update': 0.0034483740496080977, 'loss': 0.6716187868949546, 'time_step': 0.0036834977393926578, 'init_value': -15.275398254394531, 'ave_value': -21.199825693759227, 'soft_opc': nan} step=16168




2022-04-20 16:10.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160936/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.42 [info     ] FQE_20220420160936: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00016457466192023698, 'time_algorithm_update': 0.0035453861535981643, 'loss': 0.6767479684799477, 'time_step': 0.003784388303756714, 'init_value': -15.413822174072266, 'ave_value': -21.093046645283163, 'soft_opc': nan} step=16512




2022-04-20 16:10.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160936/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.44 [info     ] FQE_20220420160936: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.0001596073771631995, 'time_algorithm_update': 0.003508252459903096, 'loss': 0.6921631254006697, 'time_step': 0.0037417065265566802, 'init_value': -15.368202209472656, 'ave_value': -21.312531982342133, 'soft_opc': nan} step=16856




2022-04-20 16:10.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160936/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:10.45 [info     ] FQE_20220420160936: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00016360366067220999, 'time_algorithm_update': 0.0035214860771977624, 'loss': 0.7066603127087272, 'time_step': 0.0037588726642519927, 'init_value': -15.47386646270752, 'ave_value': -21.476244204656666, 'soft_opc': nan} step=17200




2022-04-20 16:10.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420160936/model_17200.pt
search iteration:  5
using hyper params:  [0.009797954756525222, 0.005714837300986957, 2.8537633896364146e-05, 7]
2022-04-20 16:10.45 [debug    ] RoundIterator is selected.
2022-04-20 16:10.45 [info     ] Directory is created at d3rlpy_logs/CQL_20220420161045
2022-04-20 16:10.45 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 16:10.45 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 16:10.45 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220420161045/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.009797954756525222, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'we

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:10.54 [info     ] CQL_20220420161045: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.00038384972957142613, 'time_algorithm_update': 0.02396017138720953, 'temp_loss': 4.4953071869779055, 'temp': 0.9952728154017911, 'alpha_loss': -16.85532129577726, 'alpha': 1.0169689268396611, 'critic_loss': 51.556545268722445, 'actor_loss': 5.230808362626193, 'time_step': 0.024440748649731017, 'td_error': 4.147608882031168, 'init_value': -12.450334548950195, 'ave_value': -7.888846338162249} step=342
2022-04-20 16:10.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420161045/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:11.03 [info     ] CQL_20220420161045: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.00039768079568070977, 'time_algorithm_update': 0.02411322217238577, 'temp_loss': 4.040743970731546, 'temp': 0.9858798665261408, 'alpha_loss': -7.672745680948447, 'alpha': 1.0413009935652304, 'critic_loss': 27.443315294053818, 'actor_loss': 11.533299638513933, 'time_step': 0.024608050870616533, 'td_error': 5.9471739894081495, 'init_value': -22.290193557739258, 'ave_value': -13.426654878853578} step=684
2022-04-20 16:11.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420161045/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:11.11 [info     ] CQL_20220420161045: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.0003952031944230286, 'time_algorithm_update': 0.024163800373412016, 'temp_loss': 3.4460161945276093, 'temp': 0.9774120223452474, 'alpha_loss': -4.354351496138768, 'alpha': 1.05826543367397, 'critic_loss': 43.87916967046191, 'actor_loss': 19.17635892566882, 'time_step': 0.02465639337461594, 'td_error': 8.609721969406774, 'init_value': -33.195762634277344, 'ave_value': -20.361442698730944} step=1026
2022-04-20 16:11.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420161045/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:11.20 [info     ] CQL_20220420161045: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.00039886522014238683, 'time_algorithm_update': 0.02429527148865817, 'temp_loss': 2.988980763836911, 'temp': 0.9695953205663558, 'alpha_loss': -1.7931064338265368, 'alpha': 1.0696146411505358, 'critic_loss': 66.2491066135161, 'actor_loss': 26.619694174381724, 'time_step': 0.024791662455999362, 'td_error': 13.02505646182336, 'init_value': -43.62983322143555, 'ave_value': -26.517403618155367} step=1368
2022-04-20 16:11.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420161045/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:11.30 [info     ] CQL_20220420161045: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.0003990318342956186, 'time_algorithm_update': 0.02529168965523703, 'temp_loss': 2.622050260939793, 'temp': 0.9621913840896205, 'alpha_loss': 0.26004322482087805, 'alpha': 1.072898495267009, 'critic_loss': 93.62249240540622, 'actor_loss': 33.476230197482636, 'time_step': 0.025789653348643877, 'td_error': 17.167329611422947, 'init_value': -52.866363525390625, 'ave_value': -32.2580494127436} step=1710
2022-04-20 16:11.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420161045/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:11.39 [info     ] CQL_20220420161045: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.00040159866823787576, 'time_algorithm_update': 0.025573526209557964, 'temp_loss': 2.330862381653479, 'temp': 0.9551039078430823, 'alpha_loss': 1.9544607599576314, 'alpha': 1.0661925014696623, 'critic_loss': 120.7859795107479, 'actor_loss': 39.770300011885794, 'time_step': 0.026076804127609522, 'td_error': 21.791319166702902, 'init_value': -61.82550811767578, 'ave_value': -37.69082429139866} step=2052
2022-04-20 16:11.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420161045/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:11.48 [info     ] CQL_20220420161045: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.00038859718724300985, 'time_algorithm_update': 0.025253691171344957, 'temp_loss': 2.0379245030252555, 'temp': 0.9482880747109129, 'alpha_loss': 3.5044354119757464, 'alpha': 1.0472602394589208, 'critic_loss': 146.44960532829776, 'actor_loss': 45.46419298718547, 'time_step': 0.02573992355525145, 'td_error': 25.888875972347048, 'init_value': -68.24766540527344, 'ave_value': -41.1294352923186} step=2394
2022-04-20 16:11.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420161045/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:11.58 [info     ] CQL_20220420161045: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.00039904229125084235, 'time_algorithm_update': 0.025596059553804455, 'temp_loss': 1.813355807323902, 'temp': 0.9417688294112334, 'alpha_loss': 4.778311426179451, 'alpha': 1.0168916051499328, 'critic_loss': 171.29889027556482, 'actor_loss': 50.67324400784676, 'time_step': 0.026094507055672986, 'td_error': 31.612709562537958, 'init_value': -77.19548797607422, 'ave_value': -47.439865910516936} step=2736
2022-04-20 16:11.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420161045/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:12.07 [info     ] CQL_20220420161045: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.00039645036061604816, 'time_algorithm_update': 0.025382789254885667, 'temp_loss': 1.6071716811921861, 'temp': 0.9354575715566936, 'alpha_loss': 5.789789311718523, 'alpha': 0.9799713701532599, 'critic_loss': 196.52952763072233, 'actor_loss': 55.487609874435336, 'time_step': 0.02587693825102689, 'td_error': 36.780307819868334, 'init_value': -82.13822174072266, 'ave_value': -50.35411476150558} step=3078
2022-04-20 16:12.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420161045/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:12.16 [info     ] CQL_20220420161045: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.0003946322446678117, 'time_algorithm_update': 0.025678497308876082, 'temp_loss': 1.4430499008873052, 'temp': 0.9292570071959356, 'alpha_loss': 6.5095410388812684, 'alpha': 0.9412592271266625, 'critic_loss': 221.2068528292472, 'actor_loss': 59.799935904162666, 'time_step': 0.02617038202564619, 'td_error': 40.931298789303476, 'init_value': -87.9457015991211, 'ave_value': -53.518098987862466} step=3420
2022-04-20 16:12.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420161045/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:12.25 [info     ] CQL_20220420161045: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.0003936750846996642, 'time_algorithm_update': 0.025055597400107578, 'temp_loss': 1.2713784085728272, 'temp': 0.9232964967077936, 'alpha_loss': 7.171957586940966, 'alpha': 0.9031297059087028, 'critic_loss': 244.62487779583847, 'actor_loss': 63.92132283930193, 'time_step': 0.025545799941347355, 'td_error': 45.16442647124191, 'init_value': -94.87428283691406, 'ave_value': -57.88046116345772} step=3762
2022-04-20 16:12.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420161045/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:12.35 [info     ] CQL_20220420161045: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.00039098904146785623, 'time_algorithm_update': 0.025375412918670834, 'temp_loss': 1.1376921717534985, 'temp': 0.9174591219216063, 'alpha_loss': 7.559087953372308, 'alpha': 0.8670419635828476, 'critic_loss': 272.2354184870134, 'actor_loss': 67.74187365749427, 'time_step': 0.025865251557868823, 'td_error': 46.82177438445236, 'init_value': -97.67911529541016, 'ave_value': -60.436426632078785} step=4104
2022-04-20 16:12.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420161045/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:12.44 [info     ] CQL_20220420161045: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.00040338889897218225, 'time_algorithm_update': 0.025244631962469448, 'temp_loss': 0.990959158940622, 'temp': 0.9117122172263631, 'alpha_loss': 8.266893870649282, 'alpha': 0.8329244696954538, 'critic_loss': 295.5715447141413, 'actor_loss': 71.09517399748863, 'time_step': 0.025747891058001602, 'td_error': 57.84095089264778, 'init_value': -104.137451171875, 'ave_value': -64.0149242989974} step=4446
2022-04-20 16:12.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420161045/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:12.53 [info     ] CQL_20220420161045: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.0003953935110081009, 'time_algorithm_update': 0.025470602582072652, 'temp_loss': 0.855974773827352, 'temp': 0.9064486641981448, 'alpha_loss': 8.733169518019023, 'alpha': 0.7997610778139349, 'critic_loss': 320.0472893073545, 'actor_loss': 74.3073044715569, 'time_step': 0.025964340968438755, 'td_error': 54.717429145565184, 'init_value': -106.5906753540039, 'ave_value': -65.79718937541652} step=4788
2022-04-20 16:12.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420161045/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:13.02 [info     ] CQL_20220420161045: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.00039349243654842263, 'time_algorithm_update': 0.025216327076069793, 'temp_loss': 0.7364661431495558, 'temp': 0.901254129340077, 'alpha_loss': 9.148610812181618, 'alpha': 0.7688208236331828, 'critic_loss': 345.5479605156079, 'actor_loss': 77.3153218498007, 'time_step': 0.025707958734523483, 'td_error': 71.25994616357607, 'init_value': -113.1331787109375, 'ave_value': -70.32826302669204} step=5130
2022-04-20 16:13.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420161045/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:13.12 [info     ] CQL_20220420161045: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.0004006143201861465, 'time_algorithm_update': 0.025503398382175736, 'temp_loss': 0.6028617159926404, 'temp': 0.8963387130993848, 'alpha_loss': 9.538906684395863, 'alpha': 0.7395113516620725, 'critic_loss': 371.7810933074059, 'actor_loss': 80.16293761047007, 'time_step': 0.026002162381222372, 'td_error': 71.27484201654917, 'init_value': -115.22807312011719, 'ave_value': -71.4909453301239} step=5472
2022-04-20 16:13.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420161045/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:13.21 [info     ] CQL_20220420161045: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.0003948079215155707, 'time_algorithm_update': 0.025391775265074613, 'temp_loss': 0.4718821979187734, 'temp': 0.8920166621431272, 'alpha_loss': 10.202573313350566, 'alpha': 0.7115332442417479, 'critic_loss': 398.52887908757083, 'actor_loss': 82.93084393328394, 'time_step': 0.0258842448742069, 'td_error': 74.58835514978071, 'init_value': -117.53627014160156, 'ave_value': -72.79075670666187} step=5814
2022-04-20 16:13.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420161045/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:13.30 [info     ] CQL_20220420161045: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.00039370506130463894, 'time_algorithm_update': 0.02549407356663754, 'temp_loss': 0.3984747642640308, 'temp': 0.888218629429912, 'alpha_loss': 10.424776505308541, 'alpha': 0.6839933984460886, 'critic_loss': 427.3590314541644, 'actor_loss': 85.74764517176222, 'time_step': 0.02598611165208426, 'td_error': 70.77517923650997, 'init_value': -126.30615234375, 'ave_value': -76.90512312995489} step=6156
2022-04-20 16:13.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420161045/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:13.40 [info     ] CQL_20220420161045: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.0003877787562141642, 'time_algorithm_update': 0.025243118492483396, 'temp_loss': 0.2835886092987238, 'temp': 0.8846469852659438, 'alpha_loss': 11.459193313330935, 'alpha': 0.6585693946707318, 'critic_loss': 457.90569488346927, 'actor_loss': 88.57835551033243, 'time_step': 0.025729321596915263, 'td_error': 95.17232350354544, 'init_value': -128.6676025390625, 'ave_value': -80.03392077541902} step=6498
2022-04-20 16:13.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420161045/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:13.49 [info     ] CQL_20220420161045: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.00040570616024976586, 'time_algorithm_update': 0.025792041020086635, 'temp_loss': 0.1823962549542823, 'temp': 0.8819362113350316, 'alpha_loss': 11.913234889158728, 'alpha': 0.6333929610531233, 'critic_loss': 491.4154711271587, 'actor_loss': 91.39072427136159, 'time_step': 0.02629661002354315, 'td_error': 133.07285144790075, 'init_value': -140.8517608642578, 'ave_value': -84.48348148359909} step=6840
2022-04-20 16:13.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420161045/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:13.58 [info     ] CQL_20220420161045: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.00039436803226582486, 'time_algorithm_update': 0.025480343584428754, 'temp_loss': 0.10102589076591863, 'temp': 0.8800472326097433, 'alpha_loss': 12.28848189498946, 'alpha': 0.6095350475339164, 'critic_loss': 530.0366188629329, 'actor_loss': 94.44376672220508, 'time_step': 0.025972607540108307, 'td_error': 133.18604255776927, 'init_value': -147.6380157470703, 'ave_value': -88.05084633609196} step=7182
2022-04-20 16:13.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420161045/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:14.08 [info     ] CQL_20220420161045: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.0003933767129106131, 'time_algorithm_update': 0.025771688996699817, 'temp_loss': 0.049967937454668404, 'temp': 0.8791640653596287, 'alpha_loss': 12.078586886500755, 'alpha': 0.5871093084937647, 'critic_loss': 572.7291688974838, 'actor_loss': 97.25647514744809, 'time_step': 0.026262035843921682, 'td_error': 170.662146021464, 'init_value': -149.2075653076172, 'ave_value': -87.43647368953303} step=7524
2022-04-20 16:14.08 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420161045/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:14.17 [info     ] CQL_20220420161045: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.0004018893715930961, 'time_algorithm_update': 0.025432797203287047, 'temp_loss': 0.012664563741469593, 'temp': 0.8786802328469461, 'alpha_loss': 11.90693065715812, 'alpha': 0.566657843296988, 'critic_loss': 613.7421800936871, 'actor_loss': 99.80256811220046, 'time_step': 0.025933198761521725, 'td_error': 175.34540471569207, 'init_value': -154.10731506347656, 'ave_value': -89.32625863786821} step=7866
2022-04-20 16:14.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420161045/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:14.26 [info     ] CQL_20220420161045: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.0004014920072945935, 'time_algorithm_update': 0.02485975745128609, 'temp_loss': 0.019609729459846927, 'temp': 0.8784890565258717, 'alpha_loss': 12.066232256024902, 'alpha': 0.5472430226049925, 'critic_loss': 658.5823826483119, 'actor_loss': 102.60707139132316, 'time_step': 0.0253595295008163, 'td_error': 207.31599954276942, 'init_value': -162.1173553466797, 'ave_value': -94.75103659933886} step=8208
2022-04-20 16:14.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420161045/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:14.35 [info     ] CQL_20220420161045: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.00039436872939617316, 'time_algorithm_update': 0.024194018882617616, 'temp_loss': -0.04306104694280708, 'temp': 0.8783099757649048, 'alpha_loss': 12.535972565935369, 'alpha': 0.5288070959654468, 'critic_loss': 699.7460059450384, 'actor_loss': 104.93280939470257, 'time_step': 0.02468726230643646, 'td_error': 230.45379481542219, 'init_value': -168.8746795654297, 'ave_value': -96.82507503628865} step=8550
2022-04-20 16:14.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420161045/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:14.44 [info     ] CQL_20220420161045: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.0003882862671076903, 'time_algorithm_update': 0.024142652924297847, 'temp_loss': -0.01102131099728813, 'temp': 0.8788419325797878, 'alpha_loss': 10.895064214516802, 'alpha': 0.5113615886858333, 'critic_loss': 744.7059440389711, 'actor_loss': 107.20655697828148, 'time_step': 0.024627944182234202, 'td_error': 281.5459186057895, 'init_value': -171.4219207763672, 'ave_value': -97.4027929553752} step=8892
2022-04-20 16:14.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420161045/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:14.53 [info     ] CQL_20220420161045: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.0003948678747255203, 'time_algorithm_update': 0.02406079657593666, 'temp_loss': -0.07305932230158159, 'temp': 0.880417532565301, 'alpha_loss': 10.86938550597743, 'alpha': 0.49484544905305605, 'critic_loss': 781.5103370711121, 'actor_loss': 109.07527615731223, 'time_step': 0.024552294385363483, 'td_error': 186.79215400932645, 'init_value': -175.74122619628906, 'ave_value': -98.98671668218465} step=9234
2022-04-20 16:14.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420161045/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:15.01 [info     ] CQL_20220420161045: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.00039642596105385943, 'time_algorithm_update': 0.02425928352869045, 'temp_loss': -0.03366693811850589, 'temp': 0.8815045086612479, 'alpha_loss': 9.222338266539992, 'alpha': 0.4811108315374419, 'critic_loss': 809.5929167451915, 'actor_loss': 110.1822048209564, 'time_step': 0.024753350960580928, 'td_error': 194.17581012200563, 'init_value': -180.94973754882812, 'ave_value': -101.3252017965604} step=9576
2022-04-20 16:15.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420161045/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:15.10 [info     ] CQL_20220420161045: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.0003933056056150916, 'time_algorithm_update': 0.02403021416468927, 'temp_loss': -0.046202100573750265, 'temp': 0.88241717777057, 'alpha_loss': 9.624024050974706, 'alpha': 0.46714068949222565, 'critic_loss': 836.2315895125183, 'actor_loss': 111.75576032672012, 'time_step': 0.024521785172802662, 'td_error': 205.54815075467215, 'init_value': -179.35784912109375, 'ave_value': -101.3816695038746} step=9918
2022-04-20 16:15.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420161045/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:15.19 [info     ] CQL_20220420161045: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.00039001793889274376, 'time_algorithm_update': 0.023752232044063813, 'temp_loss': -0.058714264421042994, 'temp': 0.8836286853977114, 'alpha_loss': 9.456278160998696, 'alpha': 0.4531520342791987, 'critic_loss': 865.6417620028668, 'actor_loss': 113.35469588341071, 'time_step': 0.024240831185502614, 'td_error': 214.1653244630277, 'init_value': -180.5632781982422, 'ave_value': -99.68353449202336} step=10260
2022-04-20 16:15.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420161045/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:15.28 [info     ] CQL_20220420161045: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.00039224805887679606, 'time_algorithm_update': 0.024149508504142537, 'temp_loss': -0.14427908242010234, 'temp': 0.8866239699006778, 'alpha_loss': 9.748032647266722, 'alpha': 0.4390695237102564, 'critic_loss': 894.3947218510143, 'actor_loss': 114.81950197721783, 'time_step': 0.0246407135188231, 'td_error': 197.91540971059587, 'init_value': -181.29237365722656, 'ave_value': -102.4157615768104} step=10602
2022-04-20 16:15.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420161045/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:15.37 [info     ] CQL_20220420161045: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.0003897725490101597, 'time_algorithm_update': 0.024237022065279775, 'temp_loss': -0.10516252609416407, 'temp': 0.8895551766219892, 'alpha_loss': 9.738083264981096, 'alpha': 0.4252253163453431, 'critic_loss': 916.6162835729052, 'actor_loss': 115.77833193505717, 'time_step': 0.0247268481561315, 'td_error': 361.2235047306121, 'init_value': -189.7552947998047, 'ave_value': -104.5394168817762} step=10944
2022-04-20 16:15.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420161045/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:15.45 [info     ] CQL_20220420161045: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.0003930874038160893, 'time_algorithm_update': 0.02416522112506175, 'temp_loss': -0.10702267194395526, 'temp': 0.8924947712156508, 'alpha_loss': 8.760634565911097, 'alpha': 0.41188076983766947, 'critic_loss': 939.3592316923085, 'actor_loss': 116.46453317564134, 'time_step': 0.024657476715177123, 'td_error': 226.2384545464639, 'init_value': -189.07626342773438, 'ave_value': -105.149498821599} step=11286
2022-04-20 16:15.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420161045/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:15.54 [info     ] CQL_20220420161045: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.0003945151267693057, 'time_algorithm_update': 0.02409851202490734, 'temp_loss': -0.057418663893439616, 'temp': 0.8943563964631822, 'alpha_loss': 7.1259537063844025, 'alpha': 0.40068361938696856, 'critic_loss': 937.4747019985266, 'actor_loss': 115.63718972010919, 'time_step': 0.024592345918131153, 'td_error': 152.41266078426082, 'init_value': -186.67807006835938, 'ave_value': -105.77466002339716} step=11628
2022-04-20 16:15.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420161045/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:16.03 [info     ] CQL_20220420161045: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.0003945778685006482, 'time_algorithm_update': 0.024085486841480635, 'temp_loss': -0.03106539538511407, 'temp': 0.8956604207816877, 'alpha_loss': 6.790106865397671, 'alpha': 0.39053800309959213, 'critic_loss': 924.5062039916278, 'actor_loss': 114.89502673678928, 'time_step': 0.024581499267042728, 'td_error': 171.16398702132017, 'init_value': -181.45376586914062, 'ave_value': -103.98466065691801} step=11970
2022-04-20 16:16.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420161045/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:16.12 [info     ] CQL_20220420161045: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.0003928204028927095, 'time_algorithm_update': 0.024292017981322887, 'temp_loss': -0.03440527528066907, 'temp': 0.8963824231722202, 'alpha_loss': 7.4118607309129505, 'alpha': 0.37959103477977174, 'critic_loss': 920.8394637972291, 'actor_loss': 115.2018388893172, 'time_step': 0.024783275977909913, 'td_error': 170.37565390663076, 'init_value': -182.44822692871094, 'ave_value': -103.59679012107823} step=12312
2022-04-20 16:16.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420161045/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:16.21 [info     ] CQL_20220420161045: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.0003912874132569073, 'time_algorithm_update': 0.024150565353750486, 'temp_loss': -0.026292673927562977, 'temp': 0.8970589039967074, 'alpha_loss': 6.375462242037232, 'alpha': 0.3689006082495751, 'critic_loss': 917.2492343835663, 'actor_loss': 114.73296610514323, 'time_step': 0.024636173108864946, 'td_error': 116.09361375875383, 'init_value': -177.21701049804688, 'ave_value': -103.24586374564885} step=12654
2022-04-20 16:16.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420161045/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:16.29 [info     ] CQL_20220420161045: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.0003940766317802563, 'time_algorithm_update': 0.024037210564864308, 'temp_loss': 0.024398759878866853, 'temp': 0.8974043169216803, 'alpha_loss': 6.471588234455265, 'alpha': 0.3587417181646615, 'critic_loss': 912.1214613886605, 'actor_loss': 114.53555043538411, 'time_step': 0.024531050732261257, 'td_error': 217.0722934202306, 'init_value': -179.95654296875, 'ave_value': -104.84078154369517} step=12996
2022-04-20 16:16.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420161045/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:16.38 [info     ] CQL_20220420161045: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.00039126859073750455, 'time_algorithm_update': 0.024140771369487918, 'temp_loss': -0.03076560367770188, 'temp': 0.8971944582044032, 'alpha_loss': 6.311038533846538, 'alpha': 0.34816012762443366, 'critic_loss': 909.5414121750503, 'actor_loss': 114.23845509757773, 'time_step': 0.024632664451822203, 'td_error': 134.03472370837702, 'init_value': -174.57583618164062, 'ave_value': -102.75446155368208} step=13338
2022-04-20 16:16.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420161045/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:16.47 [info     ] CQL_20220420161045: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.0003873625693962588, 'time_algorithm_update': 0.02369121849885461, 'temp_loss': 0.006991552417738396, 'temp': 0.8976404680843242, 'alpha_loss': 6.216952540023982, 'alpha': 0.33811615634034253, 'critic_loss': 903.3217696697392, 'actor_loss': 113.87556412903189, 'time_step': 0.02417518590625964, 'td_error': 155.1074466359517, 'init_value': -169.5410919189453, 'ave_value': -102.14118490956724} step=13680
2022-04-20 16:16.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420161045/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:16.56 [info     ] CQL_20220420161045: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.0003765828428212662, 'time_algorithm_update': 0.023555037570975677, 'temp_loss': 0.046699973732799466, 'temp': 0.8968462835975558, 'alpha_loss': 5.7386603473919875, 'alpha': 0.3280173906108789, 'critic_loss': 905.3402399431195, 'actor_loss': 113.85898835477774, 'time_step': 0.024026254464311208, 'td_error': 104.50728514879921, 'init_value': -168.8584442138672, 'ave_value': -101.99569341210497} step=14022
2022-04-20 16:16.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420161045/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:17.05 [info     ] CQL_20220420161045: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.00039727227729663514, 'time_algorithm_update': 0.025491566685905232, 'temp_loss': 0.08529373702772877, 'temp': 0.8946250084547969, 'alpha_loss': 5.427656707707902, 'alpha': 0.3188913295492094, 'critic_loss': 898.5708113107067, 'actor_loss': 113.39123225072672, 'time_step': 0.025983328707734045, 'td_error': 134.17084289424963, 'init_value': -170.77658081054688, 'ave_value': -102.0962809060071} step=14364
2022-04-20 16:17.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420161045/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:17.14 [info     ] CQL_20220420161045: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.00039314735702603883, 'time_algorithm_update': 0.02529704431344194, 'temp_loss': 0.020003556878420346, 'temp': 0.8934441449349386, 'alpha_loss': 5.432507023476718, 'alpha': 0.3096184997182143, 'critic_loss': 892.0978147272479, 'actor_loss': 113.09307876943845, 'time_step': 0.025786422846610087, 'td_error': 127.21286738384356, 'init_value': -170.51641845703125, 'ave_value': -102.8982472300959} step=14706
2022-04-20 16:17.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420161045/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:17.24 [info     ] CQL_20220420161045: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.0003920340398598833, 'time_algorithm_update': 0.025658507793270356, 'temp_loss': 0.068559935556082, 'temp': 0.8926671997845521, 'alpha_loss': 5.134326201433327, 'alpha': 0.3006554547457667, 'critic_loss': 886.7922627409996, 'actor_loss': 112.68458456742137, 'time_step': 0.026149671677260372, 'td_error': 120.14323042419468, 'init_value': -168.64077758789062, 'ave_value': -102.69014680915521} step=15048
2022-04-20 16:17.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420161045/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:17.33 [info     ] CQL_20220420161045: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.0003955141145583482, 'time_algorithm_update': 0.025401000390973007, 'temp_loss': 0.08540527990403754, 'temp': 0.8900210860877009, 'alpha_loss': 5.508923925154391, 'alpha': 0.2915353333218056, 'critic_loss': 884.7877946820175, 'actor_loss': 112.8947297034905, 'time_step': 0.02589725960067838, 'td_error': 190.71442240398977, 'init_value': -168.64700317382812, 'ave_value': -102.9742641144776} step=15390
2022-04-20 16:17.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420161045/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:17.42 [info     ] CQL_20220420161045: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.0003990855133324339, 'time_algorithm_update': 0.02571674048552039, 'temp_loss': 0.0857869957775226, 'temp': 0.8880282575972596, 'alpha_loss': 5.242056910754644, 'alpha': 0.282288575555846, 'critic_loss': 891.18108952394, 'actor_loss': 112.98582911351969, 'time_step': 0.026213132847122282, 'td_error': 183.31810855201292, 'init_value': -162.8031463623047, 'ave_value': -102.09795818703937} step=15732
2022-04-20 16:17.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420161045/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:17.51 [info     ] CQL_20220420161045: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.0003984845869722422, 'time_algorithm_update': 0.025459857014884724, 'temp_loss': 0.12617433254133192, 'temp': 0.885555546011841, 'alpha_loss': 5.274325695651316, 'alpha': 0.2734157009082928, 'critic_loss': 892.2032233344185, 'actor_loss': 112.95467581944159, 'time_step': 0.02595626262196323, 'td_error': 193.78632856095626, 'init_value': -163.51620483398438, 'ave_value': -103.06855135735225} step=16074
2022-04-20 16:17.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420161045/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:18.01 [info     ] CQL_20220420161045: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.0003935028935036464, 'time_algorithm_update': 0.02548045651954517, 'temp_loss': 0.15808361467540438, 'temp': 0.8818476562611541, 'alpha_loss': 4.386067653260036, 'alpha': 0.26545147728501706, 'critic_loss': 891.4314573410659, 'actor_loss': 112.50626063207437, 'time_step': 0.02597188531306752, 'td_error': 163.7235282417557, 'init_value': -160.56454467773438, 'ave_value': -100.32836849808491} step=16416
2022-04-20 16:18.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420161045/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:18.10 [info     ] CQL_20220420161045: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.00039165619521113165, 'time_algorithm_update': 0.025513977335210433, 'temp_loss': 0.16093947253685598, 'temp': 0.8780266942336545, 'alpha_loss': 4.619358633693896, 'alpha': 0.2578663028645934, 'critic_loss': 885.9255540635851, 'actor_loss': 112.31893818280851, 'time_step': 0.026003457649409423, 'td_error': 137.34211440137054, 'init_value': -160.46194458007812, 'ave_value': -102.05610390329281} step=16758
2022-04-20 16:18.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420161045/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:18.19 [info     ] CQL_20220420161045: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.0003788973155774568, 'time_algorithm_update': 0.024849773150438455, 'temp_loss': 0.06550338566956813, 'temp': 0.8746444251802232, 'alpha_loss': 4.736183426184961, 'alpha': 0.2499901498096031, 'critic_loss': 887.777971234238, 'actor_loss': 112.5354890656053, 'time_step': 0.02532272171555904, 'td_error': 182.61304448689816, 'init_value': -167.6997528076172, 'ave_value': -103.7490180090251} step=17100
2022-04-20 16:18.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420161045/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.51910

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 16:18.20 [info     ] FQE_20220420161819: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00013204942266625095, 'time_algorithm_update': 0.0038751521742487528, 'loss': 0.0073885489409475265, 'time_step': 0.004069507840168045, 'init_value': -0.36022889614105225, 'ave_value': -0.2822167189070233, 'soft_opc': nan} step=166




2022-04-20 16:18.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161819/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:18.21 [info     ] FQE_20220420161819: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.0001415904745998153, 'time_algorithm_update': 0.004176760294351233, 'loss': 0.0062899324434237125, 'time_step': 0.004384504743369229, 'init_value': -0.511821985244751, 'ave_value': -0.38861810713670814, 'soft_opc': nan} step=332




2022-04-20 16:18.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161819/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:18.22 [info     ] FQE_20220420161819: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00013914309352277274, 'time_algorithm_update': 0.003858067903173975, 'loss': 0.006271909819971995, 'time_step': 0.0040602856371776165, 'init_value': -0.5898547172546387, 'ave_value': -0.43100019894741676, 'soft_opc': nan} step=498




2022-04-20 16:18.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161819/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:18.22 [info     ] FQE_20220420161819: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00014480338039168393, 'time_algorithm_update': 0.0037624405091067395, 'loss': 0.006570713249517672, 'time_step': 0.003972798944955848, 'init_value': -0.6572728157043457, 'ave_value': -0.4568848104224549, 'soft_opc': nan} step=664




2022-04-20 16:18.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161819/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:18.23 [info     ] FQE_20220420161819: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.0001533289989793157, 'time_algorithm_update': 0.004608174404466009, 'loss': 0.006209581379262528, 'time_step': 0.004833242979394384, 'init_value': -0.7019250392913818, 'ave_value': -0.4819868660271839, 'soft_opc': nan} step=830




2022-04-20 16:18.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161819/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:18.24 [info     ] FQE_20220420161819: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00015928802720035416, 'time_algorithm_update': 0.004437344620026738, 'loss': 0.0059683894337413, 'time_step': 0.004666848355029003, 'init_value': -0.7464811205863953, 'ave_value': -0.4989477159438638, 'soft_opc': nan} step=996




2022-04-20 16:18.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161819/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:18.25 [info     ] FQE_20220420161819: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00015588697180690537, 'time_algorithm_update': 0.003502206630017384, 'loss': 0.005880035603059882, 'time_step': 0.0037293218704591313, 'init_value': -0.8213814496994019, 'ave_value': -0.5431447884873362, 'soft_opc': nan} step=1162




2022-04-20 16:18.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161819/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:18.26 [info     ] FQE_20220420161819: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00015575196369584785, 'time_algorithm_update': 0.004436392381966832, 'loss': 0.0056815656429403515, 'time_step': 0.0046632562775209725, 'init_value': -0.908126711845398, 'ave_value': -0.6178257384010264, 'soft_opc': nan} step=1328




2022-04-20 16:18.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161819/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:18.26 [info     ] FQE_20220420161819: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00015663095267422228, 'time_algorithm_update': 0.004490723092871976, 'loss': 0.005305313486998609, 'time_step': 0.0047223165810826315, 'init_value': -0.9319308996200562, 'ave_value': -0.612525702576648, 'soft_opc': nan} step=1494




2022-04-20 16:18.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161819/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:18.27 [info     ] FQE_20220420161819: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00015593293201492494, 'time_algorithm_update': 0.004433325974338026, 'loss': 0.005298329344024904, 'time_step': 0.004661122000361064, 'init_value': -1.014739990234375, 'ave_value': -0.6533791930960106, 'soft_opc': nan} step=1660




2022-04-20 16:18.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161819/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:18.28 [info     ] FQE_20220420161819: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00015403850969061795, 'time_algorithm_update': 0.00384157536977745, 'loss': 0.0050012588904923706, 'time_step': 0.00406348418040448, 'init_value': -1.064960241317749, 'ave_value': -0.680115200250267, 'soft_opc': nan} step=1826




2022-04-20 16:18.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161819/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:18.29 [info     ] FQE_20220420161819: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00016253971191773932, 'time_algorithm_update': 0.0044414465685924855, 'loss': 0.004865570205645299, 'time_step': 0.004674766437116876, 'init_value': -1.101299524307251, 'ave_value': -0.683588177237559, 'soft_opc': nan} step=1992




2022-04-20 16:18.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161819/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:18.30 [info     ] FQE_20220420161819: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00015536704695368387, 'time_algorithm_update': 0.004486856690372329, 'loss': 0.004832750158973532, 'time_step': 0.004712179482701313, 'init_value': -1.2091560363769531, 'ave_value': -0.76498443451752, 'soft_opc': nan} step=2158




2022-04-20 16:18.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161819/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:18.30 [info     ] FQE_20220420161819: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00015717673014445477, 'time_algorithm_update': 0.003937971161072512, 'loss': 0.004936189534484847, 'time_step': 0.00416644797267684, 'init_value': -1.3452587127685547, 'ave_value': -0.8505344959361865, 'soft_opc': nan} step=2324




2022-04-20 16:18.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161819/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:18.31 [info     ] FQE_20220420161819: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.0001600578606846821, 'time_algorithm_update': 0.004536927464496659, 'loss': 0.005078191718883274, 'time_step': 0.004770593470837696, 'init_value': -1.3780521154403687, 'ave_value': -0.8533041155687323, 'soft_opc': nan} step=2490




2022-04-20 16:18.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161819/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:18.32 [info     ] FQE_20220420161819: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00015954080834446182, 'time_algorithm_update': 0.004552026829087591, 'loss': 0.005329763742601387, 'time_step': 0.004785024976155844, 'init_value': -1.390763282775879, 'ave_value': -0.8518953413341765, 'soft_opc': nan} step=2656




2022-04-20 16:18.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161819/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:18.33 [info     ] FQE_20220420161819: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00015467189880738775, 'time_algorithm_update': 0.004446772207696754, 'loss': 0.005388937295829675, 'time_step': 0.004673470933753324, 'init_value': -1.4970036745071411, 'ave_value': -0.9353786018886813, 'soft_opc': nan} step=2822




2022-04-20 16:18.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161819/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:18.34 [info     ] FQE_20220420161819: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00015391930040106717, 'time_algorithm_update': 0.003764866346336273, 'loss': 0.005407114815477461, 'time_step': 0.003987007830516401, 'init_value': -1.550716519355774, 'ave_value': -0.9615652462115158, 'soft_opc': nan} step=2988




2022-04-20 16:18.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161819/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:18.35 [info     ] FQE_20220420161819: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.0001586991620351033, 'time_algorithm_update': 0.004467161304979439, 'loss': 0.00571168888450028, 'time_step': 0.004701319947300187, 'init_value': -1.6146228313446045, 'ave_value': -0.9969149225500521, 'soft_opc': nan} step=3154




2022-04-20 16:18.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161819/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:18.36 [info     ] FQE_20220420161819: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00016308692564447242, 'time_algorithm_update': 0.004462638533259013, 'loss': 0.005975798591522567, 'time_step': 0.00469801655734878, 'init_value': -1.6478960514068604, 'ave_value': -1.0105066706887909, 'soft_opc': nan} step=3320




2022-04-20 16:18.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161819/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:18.36 [info     ] FQE_20220420161819: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00015478967184043792, 'time_algorithm_update': 0.003846172826835908, 'loss': 0.005946443712826908, 'time_step': 0.0040719997451966064, 'init_value': -1.6956802606582642, 'ave_value': -1.0371793162446838, 'soft_opc': nan} step=3486




2022-04-20 16:18.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161819/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:18.37 [info     ] FQE_20220420161819: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00015547476619122977, 'time_algorithm_update': 0.0043290393898286015, 'loss': 0.006107373137857361, 'time_step': 0.00455460778201919, 'init_value': -1.7302935123443604, 'ave_value': -1.0689072191367044, 'soft_opc': nan} step=3652




2022-04-20 16:18.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161819/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:18.38 [info     ] FQE_20220420161819: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00016258279961275766, 'time_algorithm_update': 0.004566149539258106, 'loss': 0.006445558807610658, 'time_step': 0.0048006255942654895, 'init_value': -1.8005186319351196, 'ave_value': -1.129680373977769, 'soft_opc': nan} step=3818




2022-04-20 16:18.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161819/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:18.39 [info     ] FQE_20220420161819: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.0001597447567675487, 'time_algorithm_update': 0.004459961351141872, 'loss': 0.006760585962665683, 'time_step': 0.00469050493585058, 'init_value': -1.84639573097229, 'ave_value': -1.1527045654158132, 'soft_opc': nan} step=3984




2022-04-20 16:18.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161819/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:18.40 [info     ] FQE_20220420161819: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00015296849859766214, 'time_algorithm_update': 0.003608450832137142, 'loss': 0.007191725863502866, 'time_step': 0.00383392299514219, 'init_value': -1.912518858909607, 'ave_value': -1.1794804197266229, 'soft_opc': nan} step=4150




2022-04-20 16:18.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161819/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:18.40 [info     ] FQE_20220420161819: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.0001571422599884401, 'time_algorithm_update': 0.004492881786392395, 'loss': 0.007489243387637368, 'time_step': 0.004723271691655538, 'init_value': -1.9326287508010864, 'ave_value': -1.2011285164519339, 'soft_opc': nan} step=4316




2022-04-20 16:18.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161819/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:18.41 [info     ] FQE_20220420161819: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.0001596442188125059, 'time_algorithm_update': 0.004501377243593514, 'loss': 0.007745012824692909, 'time_step': 0.004731893539428711, 'init_value': -1.961674451828003, 'ave_value': -1.2144080382964766, 'soft_opc': nan} step=4482




2022-04-20 16:18.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161819/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:18.42 [info     ] FQE_20220420161819: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.0001573993499020496, 'time_algorithm_update': 0.004070391137915921, 'loss': 0.008381818821625671, 'time_step': 0.004302169903215155, 'init_value': -2.0725724697113037, 'ave_value': -1.295176394672242, 'soft_opc': nan} step=4648




2022-04-20 16:18.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161819/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:18.43 [info     ] FQE_20220420161819: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00015934834997337986, 'time_algorithm_update': 0.004259006086602269, 'loss': 0.008439891319441417, 'time_step': 0.004489778036094573, 'init_value': -2.062037944793701, 'ave_value': -1.2809702948566426, 'soft_opc': nan} step=4814




2022-04-20 16:18.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161819/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:18.44 [info     ] FQE_20220420161819: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00015295557228915663, 'time_algorithm_update': 0.004397487065878259, 'loss': 0.008757957594654331, 'time_step': 0.004623124398380877, 'init_value': -2.141732692718506, 'ave_value': -1.365798291056375, 'soft_opc': nan} step=4980




2022-04-20 16:18.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161819/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:18.45 [info     ] FQE_20220420161819: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00015958964106548265, 'time_algorithm_update': 0.004478412938405232, 'loss': 0.009047384426440954, 'time_step': 0.004712274275630353, 'init_value': -2.1324355602264404, 'ave_value': -1.3437200689671545, 'soft_opc': nan} step=5146




2022-04-20 16:18.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161819/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:18.45 [info     ] FQE_20220420161819: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00015583957534238515, 'time_algorithm_update': 0.0036645653736160464, 'loss': 0.009370802443738219, 'time_step': 0.003891993717974927, 'init_value': -2.2333717346191406, 'ave_value': -1.4208641136115467, 'soft_opc': nan} step=5312




2022-04-20 16:18.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161819/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:18.46 [info     ] FQE_20220420161819: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00015968587025102363, 'time_algorithm_update': 0.004589431257133025, 'loss': 0.010029083626283086, 'time_step': 0.004825435489057058, 'init_value': -2.2769250869750977, 'ave_value': -1.4595241242331696, 'soft_opc': nan} step=5478




2022-04-20 16:18.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161819/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:18.47 [info     ] FQE_20220420161819: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.0001550309629325407, 'time_algorithm_update': 0.004496055913258748, 'loss': 0.01026095162883558, 'time_step': 0.00472381603286927, 'init_value': -2.3266944885253906, 'ave_value': -1.4571341032397236, 'soft_opc': nan} step=5644




2022-04-20 16:18.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161819/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:18.48 [info     ] FQE_20220420161819: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00016325784016804523, 'time_algorithm_update': 0.004251830549125212, 'loss': 0.011100535466846258, 'time_step': 0.00448800713182932, 'init_value': -2.4424431324005127, 'ave_value': -1.564027880836983, 'soft_opc': nan} step=5810




2022-04-20 16:18.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161819/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:18.49 [info     ] FQE_20220420161819: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00018252952989325467, 'time_algorithm_update': 0.004095241247889507, 'loss': 0.01133722481836778, 'time_step': 0.004350262952138142, 'init_value': -2.4915778636932373, 'ave_value': -1.5989295207769485, 'soft_opc': nan} step=5976




2022-04-20 16:18.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161819/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:18.49 [info     ] FQE_20220420161819: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00015676165201577796, 'time_algorithm_update': 0.004383578357926334, 'loss': 0.01157922459084608, 'time_step': 0.0046133004039166925, 'init_value': -2.5516111850738525, 'ave_value': -1.6174887005743144, 'soft_opc': nan} step=6142




2022-04-20 16:18.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161819/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:18.50 [info     ] FQE_20220420161819: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00015469631516789817, 'time_algorithm_update': 0.004450085651443665, 'loss': 0.011877714370164168, 'time_step': 0.004676978272127818, 'init_value': -2.656773328781128, 'ave_value': -1.701779125016686, 'soft_opc': nan} step=6308




2022-04-20 16:18.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161819/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:18.51 [info     ] FQE_20220420161819: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00015598320099244635, 'time_algorithm_update': 0.0034810433904808686, 'loss': 0.012658571460458214, 'time_step': 0.003706880362637072, 'init_value': -2.6708920001983643, 'ave_value': -1.7276576496291536, 'soft_opc': nan} step=6474




2022-04-20 16:18.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161819/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:18.52 [info     ] FQE_20220420161819: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00015677170581128224, 'time_algorithm_update': 0.004524235265800752, 'loss': 0.012744974059540868, 'time_step': 0.004755406494600227, 'init_value': -2.683927059173584, 'ave_value': -1.7314362496909526, 'soft_opc': nan} step=6640




2022-04-20 16:18.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161819/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:18.53 [info     ] FQE_20220420161819: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00015586542795939618, 'time_algorithm_update': 0.004540309848555599, 'loss': 0.013469000008232129, 'time_step': 0.004767985229032585, 'init_value': -2.7013378143310547, 'ave_value': -1.737235368576807, 'soft_opc': nan} step=6806




2022-04-20 16:18.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161819/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:18.54 [info     ] FQE_20220420161819: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.0001567286181162639, 'time_algorithm_update': 0.0043159278042345164, 'loss': 0.0146694478756995, 'time_step': 0.004542525992336044, 'init_value': -2.7730987071990967, 'ave_value': -1.753753517326471, 'soft_opc': nan} step=6972




2022-04-20 16:18.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161819/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:18.54 [info     ] FQE_20220420161819: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.0001567673970417804, 'time_algorithm_update': 0.0039873654583850535, 'loss': 0.015103777726769942, 'time_step': 0.0042174149708575515, 'init_value': -2.9261910915374756, 'ave_value': -1.8441010468614263, 'soft_opc': nan} step=7138




2022-04-20 16:18.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161819/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:18.55 [info     ] FQE_20220420161819: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.0001553943358271955, 'time_algorithm_update': 0.004471742963216391, 'loss': 0.015140321122549743, 'time_step': 0.004703579178775649, 'init_value': -2.972667694091797, 'ave_value': -1.8847125661399988, 'soft_opc': nan} step=7304




2022-04-20 16:18.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161819/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:18.56 [info     ] FQE_20220420161819: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.0001577526690012001, 'time_algorithm_update': 0.004516430647976427, 'loss': 0.015827882796503514, 'time_step': 0.004746652511228998, 'init_value': -3.018803119659424, 'ave_value': -1.8772579949550532, 'soft_opc': nan} step=7470




2022-04-20 16:18.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161819/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:18.57 [info     ] FQE_20220420161819: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00018779340996799698, 'time_algorithm_update': 0.005425756236156785, 'loss': 0.01650250650709495, 'time_step': 0.00568375960890069, 'init_value': -3.039358139038086, 'ave_value': -1.902294931251992, 'soft_opc': nan} step=7636




2022-04-20 16:18.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161819/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:18.58 [info     ] FQE_20220420161819: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00015950777444494776, 'time_algorithm_update': 0.004508472350706537, 'loss': 0.017297378225050057, 'time_step': 0.0047420248927840265, 'init_value': -3.1155035495758057, 'ave_value': -1.9325287123386925, 'soft_opc': nan} step=7802




2022-04-20 16:18.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161819/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:18.59 [info     ] FQE_20220420161819: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00015596022088843655, 'time_algorithm_update': 0.004554774387773261, 'loss': 0.017381892568313975, 'time_step': 0.0047869366335581585, 'init_value': -3.104949712753296, 'ave_value': -1.9434247875602932, 'soft_opc': nan} step=7968




2022-04-20 16:18.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161819/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:18.59 [info     ] FQE_20220420161819: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00015405287225562405, 'time_algorithm_update': 0.003741992525307529, 'loss': 0.0181838173744639, 'time_step': 0.003972194280969091, 'init_value': -3.2001309394836426, 'ave_value': -1.95670650686968, 'soft_opc': nan} step=8134




2022-04-20 16:18.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161819/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:19.00 [info     ] FQE_20220420161819: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00015888731163668344, 'time_algorithm_update': 0.0044936645461852295, 'loss': 0.01820250407439174, 'time_step': 0.004724199513354933, 'init_value': -3.147757053375244, 'ave_value': -1.9095737334198175, 'soft_opc': nan} step=8300




2022-04-20 16:19.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161819/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

2022-04-20 16:19.01 [info     ] Directory is created at d3rlpy_logs/FQE_20220420161901
2022-04-20 16:19.01 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 16:19.01 [debug    ] Building models...
2022-04-20 16:19.01 [debug    ] Models have been built.
2022-04-20 16:19.01 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420161901/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 16:19.02 [info     ] FQE_20220420161901: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00016156809274540392, 'time_algorithm_update': 0.004250411377396695, 'loss': 0.022886072810695962, 'time_step': 0.0044848530791526614, 'init_value': -1.1495745182037354, 'ave_value': -1.1973396798914617, 'soft_opc': nan} step=344




2022-04-20 16:19.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161901/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:19.04 [info     ] FQE_20220420161901: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00016466614811919456, 'time_algorithm_update': 0.004490309676458669, 'loss': 0.02137537264037704, 'time_step': 0.0047288038009820985, 'init_value': -2.00504732131958, 'ave_value': -2.0689431876481117, 'soft_opc': nan} step=688




2022-04-20 16:19.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161901/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:19.06 [info     ] FQE_20220420161901: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00016620963118797125, 'time_algorithm_update': 0.004103019487026126, 'loss': 0.025246181847462648, 'time_step': 0.004342415998148364, 'init_value': -3.0217013359069824, 'ave_value': -3.1271961925803002, 'soft_opc': nan} step=1032




2022-04-20 16:19.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161901/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:19.07 [info     ] FQE_20220420161901: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00016629765200060467, 'time_algorithm_update': 0.004515746998232465, 'loss': 0.02746866718692662, 'time_step': 0.004751954660859219, 'init_value': -3.7591021060943604, 'ave_value': -3.945110984804394, 'soft_opc': nan} step=1376




2022-04-20 16:19.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161901/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:19.09 [info     ] FQE_20220420161901: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.0001662733942963356, 'time_algorithm_update': 0.0040754273880359735, 'loss': 0.03312487914335242, 'time_step': 0.004315117070841235, 'init_value': -4.707487106323242, 'ave_value': -5.01295886214252, 'soft_opc': nan} step=1720




2022-04-20 16:19.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161901/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:19.11 [info     ] FQE_20220420161901: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00016914827879085096, 'time_algorithm_update': 0.004506523525992105, 'loss': 0.037496036500670016, 'time_step': 0.004754324292027673, 'init_value': -5.277748107910156, 'ave_value': -5.734376016447136, 'soft_opc': nan} step=2064




2022-04-20 16:19.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161901/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:19.12 [info     ] FQE_20220420161901: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00016376445459765057, 'time_algorithm_update': 0.0042196484499199445, 'loss': 0.04471495329283264, 'time_step': 0.004456743944522946, 'init_value': -6.0123677253723145, 'ave_value': -6.671379064466502, 'soft_opc': nan} step=2408




2022-04-20 16:19.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161901/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:19.14 [info     ] FQE_20220420161901: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00017149988995041957, 'time_algorithm_update': 0.004159092903137207, 'loss': 0.053545813682640706, 'time_step': 0.004405581673910451, 'init_value': -6.387874603271484, 'ave_value': -7.199412351309716, 'soft_opc': nan} step=2752




2022-04-20 16:19.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161901/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:19.16 [info     ] FQE_20220420161901: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00016613131345704545, 'time_algorithm_update': 0.004489731650019801, 'loss': 0.06284545216070445, 'time_step': 0.004730995311293491, 'init_value': -6.935784339904785, 'ave_value': -7.873823247675423, 'soft_opc': nan} step=3096




2022-04-20 16:19.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161901/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:19.17 [info     ] FQE_20220420161901: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00016673567683197731, 'time_algorithm_update': 0.004079711991687154, 'loss': 0.0760178753570112, 'time_step': 0.004319672667702963, 'init_value': -7.409530162811279, 'ave_value': -8.604183311827548, 'soft_opc': nan} step=3440




2022-04-20 16:19.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161901/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:19.19 [info     ] FQE_20220420161901: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.0001754143903421801, 'time_algorithm_update': 0.004529653593551281, 'loss': 0.0866520902883738, 'time_step': 0.004780445681061856, 'init_value': -7.8122477531433105, 'ave_value': -9.205812928107408, 'soft_opc': nan} step=3784




2022-04-20 16:19.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161901/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:19.21 [info     ] FQE_20220420161901: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00016865134239196777, 'time_algorithm_update': 0.004134964804316676, 'loss': 0.10256163667898278, 'time_step': 0.004376551439595777, 'init_value': -8.534805297851562, 'ave_value': -10.11506507928307, 'soft_opc': nan} step=4128




2022-04-20 16:19.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161901/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:19.23 [info     ] FQE_20220420161901: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00016602527263552644, 'time_algorithm_update': 0.004476197930269463, 'loss': 0.11695520022375033, 'time_step': 0.00472096371096234, 'init_value': -8.891841888427734, 'ave_value': -10.686959100628759, 'soft_opc': nan} step=4472




2022-04-20 16:19.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161901/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:19.24 [info     ] FQE_20220420161901: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00016478882279506949, 'time_algorithm_update': 0.004262438347173291, 'loss': 0.1318937770258852, 'time_step': 0.004500196423641471, 'init_value': -9.384714126586914, 'ave_value': -11.430474520186046, 'soft_opc': nan} step=4816




2022-04-20 16:19.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161901/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:19.26 [info     ] FQE_20220420161901: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00016974293908407522, 'time_algorithm_update': 0.004113898720852164, 'loss': 0.15022861464815432, 'time_step': 0.004357613796411559, 'init_value': -9.863456726074219, 'ave_value': -11.992933283840214, 'soft_opc': nan} step=5160




2022-04-20 16:19.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161901/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:19.28 [info     ] FQE_20220420161901: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00017038958017216173, 'time_algorithm_update': 0.004521424687185953, 'loss': 0.16703131748904843, 'time_step': 0.004767015922901242, 'init_value': -10.229879379272461, 'ave_value': -12.511146293593956, 'soft_opc': nan} step=5504




2022-04-20 16:19.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161901/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:19.29 [info     ] FQE_20220420161901: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.000164321688718574, 'time_algorithm_update': 0.004203323708024136, 'loss': 0.18194472172492465, 'time_step': 0.004441395055416019, 'init_value': -10.359760284423828, 'ave_value': -12.779574713567357, 'soft_opc': nan} step=5848




2022-04-20 16:19.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161901/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:19.31 [info     ] FQE_20220420161901: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.0001687830270722855, 'time_algorithm_update': 0.004529625177383423, 'loss': 0.20032858613304533, 'time_step': 0.0047750292822372085, 'init_value': -10.842906951904297, 'ave_value': -13.545877113374504, 'soft_opc': nan} step=6192




2022-04-20 16:19.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161901/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:19.33 [info     ] FQE_20220420161901: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00016444436339444892, 'time_algorithm_update': 0.004116596870644148, 'loss': 0.2173832343663847, 'time_step': 0.004352993743364201, 'init_value': -11.35991096496582, 'ave_value': -14.278122883847168, 'soft_opc': nan} step=6536




2022-04-20 16:19.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161901/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:19.34 [info     ] FQE_20220420161901: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.0001663094343141068, 'time_algorithm_update': 0.004428490649822147, 'loss': 0.23768891211393353, 'time_step': 0.004669422327085983, 'init_value': -11.419404983520508, 'ave_value': -14.483376773950216, 'soft_opc': nan} step=6880




2022-04-20 16:19.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161901/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:19.36 [info     ] FQE_20220420161901: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.0001648740712986436, 'time_algorithm_update': 0.0032251151495201642, 'loss': 0.2600136061366864, 'time_step': 0.0034651090932446855, 'init_value': -11.782939910888672, 'ave_value': -15.01302926129586, 'soft_opc': nan} step=7224




2022-04-20 16:19.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161901/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:19.37 [info     ] FQE_20220420161901: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.000163746434588765, 'time_algorithm_update': 0.0034982749195986017, 'loss': 0.27618474063062826, 'time_step': 0.003736938848051914, 'init_value': -12.075124740600586, 'ave_value': -15.418228163670848, 'soft_opc': nan} step=7568




2022-04-20 16:19.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161901/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:19.39 [info     ] FQE_20220420161901: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00016817519831102947, 'time_algorithm_update': 0.003470285687335702, 'loss': 0.29544230445753783, 'time_step': 0.0037120948004168135, 'init_value': -12.381819725036621, 'ave_value': -15.941160197488896, 'soft_opc': nan} step=7912




2022-04-20 16:19.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161901/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:19.40 [info     ] FQE_20220420161901: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00016503209291502486, 'time_algorithm_update': 0.003416823093281236, 'loss': 0.31254753541352964, 'time_step': 0.003655843263448671, 'init_value': -12.702455520629883, 'ave_value': -16.4727165221094, 'soft_opc': nan} step=8256




2022-04-20 16:19.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161901/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:19.41 [info     ] FQE_20220420161901: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00016501407290613927, 'time_algorithm_update': 0.0034609721150509146, 'loss': 0.32038326450450305, 'time_step': 0.0037044654058855635, 'init_value': -12.517908096313477, 'ave_value': -16.47375935391263, 'soft_opc': nan} step=8600




2022-04-20 16:19.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161901/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:19.43 [info     ] FQE_20220420161901: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00016439723414044048, 'time_algorithm_update': 0.0034473129483156427, 'loss': 0.34469597727915835, 'time_step': 0.0036888704743496207, 'init_value': -13.181283950805664, 'ave_value': -17.316455593302443, 'soft_opc': nan} step=8944




2022-04-20 16:19.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161901/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:19.44 [info     ] FQE_20220420161901: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00016206364298975744, 'time_algorithm_update': 0.0033638796140981276, 'loss': 0.3591700380039943, 'time_step': 0.0036020396753799083, 'init_value': -13.121686935424805, 'ave_value': -17.42159161084407, 'soft_opc': nan} step=9288




2022-04-20 16:19.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161901/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:19.45 [info     ] FQE_20220420161901: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.0001656357632126919, 'time_algorithm_update': 0.003469758948614431, 'loss': 0.3713913225991175, 'time_step': 0.0037072966265123946, 'init_value': -13.248348236083984, 'ave_value': -17.763024309183564, 'soft_opc': nan} step=9632




2022-04-20 16:19.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161901/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:19.47 [info     ] FQE_20220420161901: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00016379910846089208, 'time_algorithm_update': 0.0034894679867944053, 'loss': 0.394905717047148, 'time_step': 0.0037285401377567026, 'init_value': -13.257671356201172, 'ave_value': -18.030963431151065, 'soft_opc': nan} step=9976




2022-04-20 16:19.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161901/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:19.48 [info     ] FQE_20220420161901: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00016620408656985261, 'time_algorithm_update': 0.0034371455048405847, 'loss': 0.4114676566718709, 'time_step': 0.003678653129311495, 'init_value': -13.457622528076172, 'ave_value': -18.408906071277354, 'soft_opc': nan} step=10320




2022-04-20 16:19.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161901/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:19.50 [info     ] FQE_20220420161901: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00016429050024165662, 'time_algorithm_update': 0.0034336121969444806, 'loss': 0.434584217665847, 'time_step': 0.00367110482482023, 'init_value': -13.542901039123535, 'ave_value': -18.78512250401147, 'soft_opc': nan} step=10664




2022-04-20 16:19.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161901/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:19.51 [info     ] FQE_20220420161901: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.0001643265402594278, 'time_algorithm_update': 0.0034337230893068536, 'loss': 0.4562135377335687, 'time_step': 0.0036690727222797484, 'init_value': -14.174535751342773, 'ave_value': -19.50964351609215, 'soft_opc': nan} step=11008




2022-04-20 16:19.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161901/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:19.52 [info     ] FQE_20220420161901: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.000162522460139075, 'time_algorithm_update': 0.0034350011237832003, 'loss': 0.4720547360389732, 'time_step': 0.0036728146464325663, 'init_value': -13.712957382202148, 'ave_value': -19.3347420524638, 'soft_opc': nan} step=11352




2022-04-20 16:19.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161901/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:19.54 [info     ] FQE_20220420161901: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.000162734541782113, 'time_algorithm_update': 0.0034454762935638428, 'loss': 0.4727942188930979, 'time_step': 0.003681301377540411, 'init_value': -13.673009872436523, 'ave_value': -19.55696108749321, 'soft_opc': nan} step=11696




2022-04-20 16:19.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161901/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:19.55 [info     ] FQE_20220420161901: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.0001666573591010515, 'time_algorithm_update': 0.003444276576818422, 'loss': 0.49604754309646437, 'time_step': 0.0036829100098720816, 'init_value': -14.003089904785156, 'ave_value': -20.006315577875924, 'soft_opc': nan} step=12040




2022-04-20 16:19.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161901/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:19.57 [info     ] FQE_20220420161901: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.0001679804435996122, 'time_algorithm_update': 0.0034878260867540227, 'loss': 0.5111197688438128, 'time_step': 0.0037307635296222777, 'init_value': -14.370104789733887, 'ave_value': -20.49171989138599, 'soft_opc': nan} step=12384




2022-04-20 16:19.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161901/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:19.58 [info     ] FQE_20220420161901: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00016062273535617563, 'time_algorithm_update': 0.003430803847867389, 'loss': 0.5186420084074747, 'time_step': 0.0036629313646360886, 'init_value': -14.14311408996582, 'ave_value': -20.693620392304283, 'soft_opc': nan} step=12728




2022-04-20 16:19.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161901/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:19.59 [info     ] FQE_20220420161901: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.0001629736534384794, 'time_algorithm_update': 0.0034699647925620857, 'loss': 0.5290718180733884, 'time_step': 0.0037088830803715905, 'init_value': -13.897310256958008, 'ave_value': -20.750332382549573, 'soft_opc': nan} step=13072




2022-04-20 16:19.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161901/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:20.01 [info     ] FQE_20220420161901: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00016543823619221532, 'time_algorithm_update': 0.003484509018964546, 'loss': 0.5278879768637463, 'time_step': 0.003724356723386188, 'init_value': -14.076370239257812, 'ave_value': -21.126654404673626, 'soft_opc': nan} step=13416




2022-04-20 16:20.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161901/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:20.02 [info     ] FQE_20220420161901: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.0001638476238694302, 'time_algorithm_update': 0.0034740726615107337, 'loss': 0.5418397295667768, 'time_step': 0.003712160642756972, 'init_value': -14.01512622833252, 'ave_value': -21.147311415432675, 'soft_opc': nan} step=13760




2022-04-20 16:20.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161901/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:20.03 [info     ] FQE_20220420161901: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.0001652795214985692, 'time_algorithm_update': 0.0035698455433512844, 'loss': 0.5517031533188771, 'time_step': 0.003810859696809636, 'init_value': -14.298571586608887, 'ave_value': -21.37965247452779, 'soft_opc': nan} step=14104




2022-04-20 16:20.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161901/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:20.05 [info     ] FQE_20220420161901: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.0001643958479859108, 'time_algorithm_update': 0.003464177597400754, 'loss': 0.5724498103643486, 'time_step': 0.0037062265152154965, 'init_value': -14.145745277404785, 'ave_value': -21.405611441555426, 'soft_opc': nan} step=14448




2022-04-20 16:20.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161901/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:20.06 [info     ] FQE_20220420161901: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00016514852989551633, 'time_algorithm_update': 0.0034712650055109067, 'loss': 0.5823709445485715, 'time_step': 0.003711814797201822, 'init_value': -14.507942199707031, 'ave_value': -21.618631488241693, 'soft_opc': nan} step=14792




2022-04-20 16:20.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161901/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:20.08 [info     ] FQE_20220420161901: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00016567665477131688, 'time_algorithm_update': 0.003459861112195392, 'loss': 0.591092259937161, 'time_step': 0.00370103120803833, 'init_value': -14.67868709564209, 'ave_value': -21.929277571270596, 'soft_opc': nan} step=15136




2022-04-20 16:20.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161901/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:20.09 [info     ] FQE_20220420161901: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.0001644693141759828, 'time_algorithm_update': 0.0034685301226238872, 'loss': 0.6020278914095184, 'time_step': 0.0037061877028886662, 'init_value': -14.65489387512207, 'ave_value': -22.160129345292308, 'soft_opc': nan} step=15480




2022-04-20 16:20.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161901/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:20.10 [info     ] FQE_20220420161901: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00016477357509524324, 'time_algorithm_update': 0.0028689399708149046, 'loss': 0.6107943905517459, 'time_step': 0.003108333709628083, 'init_value': -15.020608901977539, 'ave_value': -22.800492241650584, 'soft_opc': nan} step=15824




2022-04-20 16:20.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161901/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:20.11 [info     ] FQE_20220420161901: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00016543823619221532, 'time_algorithm_update': 0.003256368082623149, 'loss': 0.6091590540180373, 'time_step': 0.0034956398398377176, 'init_value': -14.964544296264648, 'ave_value': -22.88070533768366, 'soft_opc': nan} step=16168




2022-04-20 16:20.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161901/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:20.13 [info     ] FQE_20220420161901: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00016439515490864598, 'time_algorithm_update': 0.0034527362779129384, 'loss': 0.6354194265292135, 'time_step': 0.0036931952764821608, 'init_value': -14.742705345153809, 'ave_value': -22.914662563183285, 'soft_opc': nan} step=16512




2022-04-20 16:20.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161901/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:20.14 [info     ] FQE_20220420161901: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00016323494356732036, 'time_algorithm_update': 0.003525931474774383, 'loss': 0.636801992838141, 'time_step': 0.0037651263004125552, 'init_value': -14.303932189941406, 'ave_value': -22.554308565467732, 'soft_opc': nan} step=16856




2022-04-20 16:20.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161901/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:20.16 [info     ] FQE_20220420161901: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.000163852475410284, 'time_algorithm_update': 0.0035485348036122877, 'loss': 0.6454626838081018, 'time_step': 0.0037863906039748083, 'init_value': -14.790427207946777, 'ave_value': -22.972249737226715, 'soft_opc': nan} step=17200




2022-04-20 16:20.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420161901/model_17200.pt
search iteration:  6
using hyper params:  [0.008762173788455153, 0.003332115014684912, 7.002783808884287e-05, 1]
2022-04-20 16:20.16 [debug    ] RoundIterator is selected.
2022-04-20 16:20.16 [info     ] Directory is created at d3rlpy_logs/CQL_20220420162016
2022-04-20 16:20.16 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 16:20.16 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 16:20.16 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220420162016/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.008762173788455153, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'wei

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:20.24 [info     ] CQL_20220420162016: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.00030503286952860873, 'time_algorithm_update': 0.02405697072458546, 'temp_loss': 4.618887755780192, 'temp': 0.9889669664073408, 'alpha_loss': -13.804201156772368, 'alpha': 1.016488091987476, 'critic_loss': 18.649004102450366, 'actor_loss': -1.3067673720876898, 'time_step': 0.024459410132023327, 'td_error': 4.884068598839054, 'init_value': -1.0016162395477295, 'ave_value': 0.28827390003479547} step=342
2022-04-20 16:20.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162016/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:20.33 [info     ] CQL_20220420162016: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.0003112568492777863, 'time_algorithm_update': 0.024180868215728225, 'temp_loss': 3.8789928726285523, 'temp': 0.9672128561644526, 'alpha_loss': -6.81284004833266, 'alpha': 1.0423265841969274, 'critic_loss': 21.93012957545052, 'actor_loss': -0.2436068342339618, 'time_step': 0.024588564683122243, 'td_error': 3.3621744488689944, 'init_value': -1.765472412109375, 'ave_value': 1.0308875433382418} step=684
2022-04-20 16:20.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162016/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:20.42 [info     ] CQL_20220420162016: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.0003114478629932069, 'time_algorithm_update': 0.024177273811652647, 'temp_loss': 3.117641163848297, 'temp': 0.948343255889346, 'alpha_loss': -2.586626404406209, 'alpha': 1.057765265305837, 'critic_loss': 37.10812604357625, 'actor_loss': 1.0005317562374108, 'time_step': 0.024589641052379943, 'td_error': 4.471353975229842, 'init_value': -4.2772321701049805, 'ave_value': 0.3265685067345967} step=1026
2022-04-20 16:20.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162016/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:20.51 [info     ] CQL_20220420162016: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.0003098632857116342, 'time_algorithm_update': 0.024324881403069747, 'temp_loss': 2.5875728942497433, 'temp': 0.9313179146476657, 'alpha_loss': 0.5877546946741423, 'alpha': 1.0620931373005025, 'critic_loss': 57.6523374144794, 'actor_loss': 2.383194180085645, 'time_step': 0.02473474455158613, 'td_error': 6.985116002965043, 'init_value': -7.82586669921875, 'ave_value': -0.29687882546234773} step=1368
2022-04-20 16:20.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162016/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:21.00 [info     ] CQL_20220420162016: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.00031747873763591925, 'time_algorithm_update': 0.02417415763899597, 'temp_loss': 2.1467046427447896, 'temp': 0.9157415229326104, 'alpha_loss': 3.42322314131395, 'alpha': 1.0499939451440734, 'critic_loss': 82.94578273393954, 'actor_loss': 3.982405005491268, 'time_step': 0.024591042284379926, 'td_error': 8.575159525727388, 'init_value': -10.828947067260742, 'ave_value': -1.5972376019192172} step=1710
2022-04-20 16:21.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162016/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:21.09 [info     ] CQL_20220420162016: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.0003142447499503866, 'time_algorithm_update': 0.024138807553296896, 'temp_loss': 1.7742840473414863, 'temp': 0.9013988455833747, 'alpha_loss': 5.663200522026821, 'alpha': 1.0211216838736283, 'critic_loss': 112.36994724385222, 'actor_loss': 5.566809378172222, 'time_step': 0.024549968061391373, 'td_error': 10.86187407733913, 'init_value': -13.029932975769043, 'ave_value': -2.138542499652317} step=2052
2022-04-20 16:21.09 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162016/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:21.17 [info     ] CQL_20220420162016: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.000316750933552346, 'time_algorithm_update': 0.024100517668919255, 'temp_loss': 1.4585489046852491, 'temp': 0.8882902065912882, 'alpha_loss': 7.365699482242963, 'alpha': 0.9815598172053956, 'critic_loss': 145.71127339413292, 'actor_loss': 7.299551797889129, 'time_step': 0.024515146400496277, 'td_error': 14.842207495522164, 'init_value': -18.569538116455078, 'ave_value': -4.58953148637269} step=2394
2022-04-20 16:21.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162016/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:21.26 [info     ] CQL_20220420162016: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.0003122244662011576, 'time_algorithm_update': 0.024303045189171507, 'temp_loss': 1.1872598697916108, 'temp': 0.8764233864538851, 'alpha_loss': 8.61265154331051, 'alpha': 0.9397121535407172, 'critic_loss': 182.79205259802745, 'actor_loss': 9.20039030961823, 'time_step': 0.024714658831992344, 'td_error': 16.897756675676032, 'init_value': -22.1961669921875, 'ave_value': -5.928501589437863} step=2736
2022-04-20 16:21.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162016/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:21.35 [info     ] CQL_20220420162016: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.00031861227158217403, 'time_algorithm_update': 0.024246366400467723, 'temp_loss': 0.9401600839929622, 'temp': 0.8654936597361202, 'alpha_loss': 9.607816661310475, 'alpha': 0.8995851842980636, 'critic_loss': 226.1169138233564, 'actor_loss': 11.42866889914574, 'time_step': 0.02466752096923471, 'td_error': 17.526761426214037, 'init_value': -25.190975189208984, 'ave_value': -7.0009323250871525} step=3078
2022-04-20 16:21.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162016/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:21.44 [info     ] CQL_20220420162016: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.00031040286460117986, 'time_algorithm_update': 0.02436569768782945, 'temp_loss': 0.7033962001533884, 'temp': 0.8560812980459448, 'alpha_loss': 10.180821159429717, 'alpha': 0.8620257171971059, 'critic_loss': 270.90384294833353, 'actor_loss': 13.61860774413884, 'time_step': 0.02477349245060257, 'td_error': 21.685389483791543, 'init_value': -29.7366886138916, 'ave_value': -8.58847766314004} step=3420
2022-04-20 16:21.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162016/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:21.53 [info     ] CQL_20220420162016: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.0003160747171145434, 'time_algorithm_update': 0.025251389246935037, 'temp_loss': 0.5101676162288725, 'temp': 0.8480794752201839, 'alpha_loss': 10.920801337002313, 'alpha': 0.827650972799948, 'critic_loss': 317.3196379009046, 'actor_loss': 15.958658307616474, 'time_step': 0.025668430049517, 'td_error': 31.544949138975788, 'init_value': -36.491981506347656, 'ave_value': -11.64121549765269} step=3762
2022-04-20 16:21.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162016/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:22.02 [info     ] CQL_20220420162016: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.0003174480639005962, 'time_algorithm_update': 0.025670985032243337, 'temp_loss': 0.345684529254921, 'temp': 0.8416388840006109, 'alpha_loss': 11.52721257098237, 'alpha': 0.794799551454901, 'critic_loss': 371.057049399928, 'actor_loss': 18.630334346615083, 'time_step': 0.02608735031551785, 'td_error': 35.90011019754249, 'init_value': -40.39495849609375, 'ave_value': -13.486634405088854} step=4104
2022-04-20 16:22.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162016/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:22.12 [info     ] CQL_20220420162016: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.00032643477121988934, 'time_algorithm_update': 0.025833836772985626, 'temp_loss': 0.2112612648872517, 'temp': 0.8369704906703436, 'alpha_loss': 12.034690796980383, 'alpha': 0.7636498932601415, 'critic_loss': 432.116307398032, 'actor_loss': 21.615735157191406, 'time_step': 0.02625935677199336, 'td_error': 40.020943849025294, 'init_value': -45.96067428588867, 'ave_value': -15.289747400670437} step=4446
2022-04-20 16:22.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162016/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:22.21 [info     ] CQL_20220420162016: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.000317614678053828, 'time_algorithm_update': 0.025698818658527574, 'temp_loss': 0.11380932047658147, 'temp': 0.8334845271375444, 'alpha_loss': 12.271469464776112, 'alpha': 0.7349596325068446, 'critic_loss': 496.29029926500823, 'actor_loss': 24.645490211352968, 'time_step': 0.026116276345057796, 'td_error': 31.046101824270593, 'init_value': -52.52348709106445, 'ave_value': -18.461851983070375} step=4788
2022-04-20 16:22.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162016/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:22.30 [info     ] CQL_20220420162016: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.0003178914388020833, 'time_algorithm_update': 0.025697045158921628, 'temp_loss': 0.02140443937646018, 'temp': 0.8320596746185369, 'alpha_loss': 12.328809487192254, 'alpha': 0.7081551081255862, 'critic_loss': 561.665032816212, 'actor_loss': 27.724441489281013, 'time_step': 0.026111530281646908, 'td_error': 63.4966462895834, 'init_value': -59.2003173828125, 'ave_value': -21.802246295879552} step=5130
2022-04-20 16:22.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162016/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:22.40 [info     ] CQL_20220420162016: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.0003162964045652869, 'time_algorithm_update': 0.025832122529459278, 'temp_loss': -0.11212216554024415, 'temp': 0.8330221608368277, 'alpha_loss': 12.725096143477144, 'alpha': 0.6824858688820176, 'critic_loss': 633.3260494477568, 'actor_loss': 31.213542324757714, 'time_step': 0.026246237475969637, 'td_error': 94.3896372550602, 'init_value': -65.63154602050781, 'ave_value': -24.747528288992676} step=5472
2022-04-20 16:22.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162016/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:22.49 [info     ] CQL_20220420162016: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.0003110365560877393, 'time_algorithm_update': 0.025430314024986578, 'temp_loss': -0.19408765081207305, 'temp': 0.8380716097982306, 'alpha_loss': 13.25640522806268, 'alpha': 0.6575385006199106, 'critic_loss': 710.6973912646199, 'actor_loss': 34.99983048020748, 'time_step': 0.025839146814848248, 'td_error': 81.63638024529232, 'init_value': -72.45149993896484, 'ave_value': -28.54634880495501} step=5814
2022-04-20 16:22.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162016/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:22.58 [info     ] CQL_20220420162016: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.0003153462159006219, 'time_algorithm_update': 0.025548068403500563, 'temp_loss': -0.2436078147396271, 'temp': 0.8457546049391317, 'alpha_loss': 12.994790623759666, 'alpha': 0.6343518417132529, 'critic_loss': 789.1759813096788, 'actor_loss': 38.65669034656725, 'time_step': 0.025962438499718382, 'td_error': 75.42798933469906, 'init_value': -80.61373138427734, 'ave_value': -32.01432078670811} step=6156
2022-04-20 16:22.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162016/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:23.08 [info     ] CQL_20220420162016: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.0003144371579265037, 'time_algorithm_update': 0.02548010586298, 'temp_loss': -0.30859034521537915, 'temp': 0.857039765656343, 'alpha_loss': 12.937858015473125, 'alpha': 0.6124180771454036, 'critic_loss': 874.6447600426033, 'actor_loss': 42.74268073924104, 'time_step': 0.02589128449646353, 'td_error': 121.76542025287543, 'init_value': -86.90716552734375, 'ave_value': -35.03806831041972} step=6498
2022-04-20 16:23.08 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162016/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:23.17 [info     ] CQL_20220420162016: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.00031559509143494724, 'time_algorithm_update': 0.025747435831884196, 'temp_loss': -0.3523460864606831, 'temp': 0.8706780001085405, 'alpha_loss': 13.231801790103578, 'alpha': 0.5913315626264316, 'critic_loss': 961.2466630545276, 'actor_loss': 47.00079077447367, 'time_step': 0.02616113040879456, 'td_error': 139.47554524538876, 'init_value': -97.90327453613281, 'ave_value': -39.63370308623657} step=6840
2022-04-20 16:23.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162016/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:23.26 [info     ] CQL_20220420162016: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.0003133803083185564, 'time_algorithm_update': 0.02551986669239245, 'temp_loss': -0.3505079903427446, 'temp': 0.8853315493168189, 'alpha_loss': 13.215183666575024, 'alpha': 0.5708925715315412, 'critic_loss': 1052.8505925407187, 'actor_loss': 51.35102122568945, 'time_step': 0.025932553915949594, 'td_error': 163.40859685806956, 'init_value': -108.89517974853516, 'ave_value': -45.36970600295711} step=7182
2022-04-20 16:23.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162016/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:23.36 [info     ] CQL_20220420162016: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.000311754600346437, 'time_algorithm_update': 0.025768321159987423, 'temp_loss': -0.43671014752478626, 'temp': 0.9025623577031475, 'alpha_loss': 14.381652101438645, 'alpha': 0.5511179512355759, 'critic_loss': 1146.059849253872, 'actor_loss': 55.949035198367824, 'time_step': 0.02617814108642221, 'td_error': 239.2960817872962, 'init_value': -116.80888366699219, 'ave_value': -48.554323750047} step=7524
2022-04-20 16:23.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162016/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:23.45 [info     ] CQL_20220420162016: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.0003114848109016642, 'time_algorithm_update': 0.02555620809744673, 'temp_loss': -0.45782881549815513, 'temp': 0.9219651399997243, 'alpha_loss': 14.302622629187958, 'alpha': 0.5310394824945439, 'critic_loss': 1254.8800436097977, 'actor_loss': 61.30936460885388, 'time_step': 0.025963719128168118, 'td_error': 141.09091878944537, 'init_value': -126.64506530761719, 'ave_value': -51.908199211122756} step=7866
2022-04-20 16:23.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162016/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:23.55 [info     ] CQL_20220420162016: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.00031728354113840917, 'time_algorithm_update': 0.025753334948891086, 'temp_loss': -0.4013230273205983, 'temp': 0.9405246858011213, 'alpha_loss': 12.905645728808397, 'alpha': 0.5139123447108687, 'critic_loss': 1358.8467432211714, 'actor_loss': 65.83389153954579, 'time_step': 0.026171325940137716, 'td_error': 216.02705810097987, 'init_value': -133.51126098632812, 'ave_value': -56.28636462211609} step=8208
2022-04-20 16:23.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162016/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:24.04 [info     ] CQL_20220420162016: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.00031243221104493616, 'time_algorithm_update': 0.025558345499094467, 'temp_loss': -0.41733322512598064, 'temp': 0.958136594783493, 'alpha_loss': 13.181837176718908, 'alpha': 0.49735588314589, 'critic_loss': 1462.6127297920093, 'actor_loss': 70.88992318493581, 'time_step': 0.025970575405143158, 'td_error': 265.84575236364196, 'init_value': -144.80235290527344, 'ave_value': -61.932961771047864} step=8550
2022-04-20 16:24.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162016/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:24.13 [info     ] CQL_20220420162016: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.0003166861004299588, 'time_algorithm_update': 0.025661892361111112, 'temp_loss': -0.44898424094967676, 'temp': 0.9769820685972247, 'alpha_loss': 13.45827239159255, 'alpha': 0.4806997907614847, 'critic_loss': 1578.588283248812, 'actor_loss': 76.52457155818827, 'time_step': 0.026079737652114958, 'td_error': 260.1209864726284, 'init_value': -156.974365234375, 'ave_value': -67.22341984657554} step=8892
2022-04-20 16:24.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162016/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:24.23 [info     ] CQL_20220420162016: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.0003160280093812106, 'time_algorithm_update': 0.025616720405935545, 'temp_loss': -0.4766151864811429, 'temp': 0.9973798242577335, 'alpha_loss': 14.343805262916966, 'alpha': 0.46441207669283213, 'critic_loss': 1691.9538924010874, 'actor_loss': 82.07290595874451, 'time_step': 0.026032733638384188, 'td_error': 286.1124890289599, 'init_value': -167.72592163085938, 'ave_value': -72.89727439649471} step=9234
2022-04-20 16:24.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162016/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:24.32 [info     ] CQL_20220420162016: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.00031557278326380323, 'time_algorithm_update': 0.02586989374885782, 'temp_loss': -0.41255573080907093, 'temp': 1.0158005663525989, 'alpha_loss': 12.22552986730609, 'alpha': 0.44902167743758153, 'critic_loss': 1809.7163389328628, 'actor_loss': 87.07971227099324, 'time_step': 0.02628418855499803, 'td_error': 279.9278417355614, 'init_value': -180.39486694335938, 'ave_value': -78.37325194912988} step=9576
2022-04-20 16:24.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162016/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:24.41 [info     ] CQL_20220420162016: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.00031210874256334806, 'time_algorithm_update': 0.025830895580046358, 'temp_loss': -0.4967829523313987, 'temp': 1.0353212363538686, 'alpha_loss': 13.043912808100382, 'alpha': 0.435342055402304, 'critic_loss': 1916.533377664131, 'actor_loss': 92.57351938883464, 'time_step': 0.026243134548789578, 'td_error': 398.0159974423559, 'init_value': -193.1420135498047, 'ave_value': -85.3972552176424} step=9918
2022-04-20 16:24.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162016/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:24.51 [info     ] CQL_20220420162016: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.000316525063319513, 'time_algorithm_update': 0.025683200847335726, 'temp_loss': -0.4360087044558853, 'temp': 1.056146986651839, 'alpha_loss': 11.533872133110002, 'alpha': 0.42168701479309484, 'critic_loss': 2038.5491661384092, 'actor_loss': 98.10586257845338, 'time_step': 0.02609732625080131, 'td_error': 322.87270678760234, 'init_value': -200.04513549804688, 'ave_value': -87.18214128815376} step=10260
2022-04-20 16:24.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162016/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:24.59 [info     ] CQL_20220420162016: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.0003124621876499109, 'time_algorithm_update': 0.02422676867211771, 'temp_loss': -0.40000668926741817, 'temp': 1.0750030380243447, 'alpha_loss': 11.799344847773948, 'alpha': 0.40857776293629094, 'critic_loss': 2160.5750978704086, 'actor_loss': 103.86417087755706, 'time_step': 0.024637468377051996, 'td_error': 301.89085586986744, 'init_value': -214.6680145263672, 'ave_value': -96.09702577729483} step=10602
2022-04-20 16:24.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162016/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:25.08 [info     ] CQL_20220420162016: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.0003124510335643389, 'time_algorithm_update': 0.02425460996683578, 'temp_loss': -0.384525369830996, 'temp': 1.0928171196876213, 'alpha_loss': 10.794003662310148, 'alpha': 0.3963392527654157, 'critic_loss': 2291.427220037806, 'actor_loss': 109.96619087353088, 'time_step': 0.024666744366026762, 'td_error': 278.2880249652737, 'init_value': -225.855712890625, 'ave_value': -98.32237921596648} step=10944
2022-04-20 16:25.08 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162016/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:25.17 [info     ] CQL_20220420162016: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.0003118312846847445, 'time_algorithm_update': 0.024370141196669193, 'temp_loss': -0.3220520553162746, 'temp': 1.1086894531696163, 'alpha_loss': 9.957038197601051, 'alpha': 0.38516803913646275, 'critic_loss': 2394.409123649374, 'actor_loss': 114.06436523080569, 'time_step': 0.02477956097028409, 'td_error': 376.635603295852, 'init_value': -233.98388671875, 'ave_value': -104.95407995116604} step=11286
2022-04-20 16:25.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162016/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:25.26 [info     ] CQL_20220420162016: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.00031300316080015304, 'time_algorithm_update': 0.024396312864203202, 'temp_loss': -0.3913038835931591, 'temp': 1.1257065346366482, 'alpha_loss': 10.398789131153396, 'alpha': 0.37365551920313583, 'critic_loss': 2506.418499149077, 'actor_loss': 119.92012427703679, 'time_step': 0.02480760025002106, 'td_error': 381.8487729009417, 'init_value': -246.237060546875, 'ave_value': -110.68810333135966} step=11628
2022-04-20 16:25.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162016/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:25.35 [info     ] CQL_20220420162016: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.0003099225417912355, 'time_algorithm_update': 0.024264719751146104, 'temp_loss': -0.26576236836960787, 'temp': 1.1429609841073465, 'alpha_loss': 9.534741227389777, 'alpha': 0.36256511221852217, 'critic_loss': 2623.468163919728, 'actor_loss': 125.07237214651721, 'time_step': 0.024673544175443592, 'td_error': 334.0636166124285, 'init_value': -257.4759826660156, 'ave_value': -116.18212088642893} step=11970
2022-04-20 16:25.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162016/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:25.44 [info     ] CQL_20220420162016: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.00030915918405990153, 'time_algorithm_update': 0.02408010638945284, 'temp_loss': -0.21962607200992734, 'temp': 1.154362836427856, 'alpha_loss': 8.88205805996008, 'alpha': 0.3523490369668481, 'critic_loss': 2738.842445775082, 'actor_loss': 130.2355365641633, 'time_step': 0.02448877047377023, 'td_error': 387.0755338062015, 'init_value': -263.71685791015625, 'ave_value': -116.7009938416932} step=12312
2022-04-20 16:25.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162016/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:25.52 [info     ] CQL_20220420162016: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.00031719012567174365, 'time_algorithm_update': 0.024367563208641363, 'temp_loss': -0.1816254862044987, 'temp': 1.165732300769516, 'alpha_loss': 8.212571807772095, 'alpha': 0.3425875264301635, 'critic_loss': 2837.1157219423885, 'actor_loss': 134.71477885553014, 'time_step': 0.02478006220700448, 'td_error': 391.45569062990734, 'init_value': -271.58837890625, 'ave_value': -122.38444701784366} step=12654
2022-04-20 16:25.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162016/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:26.01 [info     ] CQL_20220420162016: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.0003105618103205809, 'time_algorithm_update': 0.02409524457496509, 'temp_loss': -0.1796610899356722, 'temp': 1.1766355508252193, 'alpha_loss': 8.162868764665392, 'alpha': 0.3328380746799603, 'critic_loss': 2930.5597609135143, 'actor_loss': 139.27513443796258, 'time_step': 0.024506234983254594, 'td_error': 410.1962100322751, 'init_value': -287.19207763671875, 'ave_value': -129.25967419927184} step=12996
2022-04-20 16:26.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162016/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:26.10 [info     ] CQL_20220420162016: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.00031330222971955236, 'time_algorithm_update': 0.02439973577421311, 'temp_loss': -0.17249598896555735, 'temp': 1.1866015471213045, 'alpha_loss': 7.890601897100259, 'alpha': 0.3232589838100456, 'critic_loss': 3033.370719686586, 'actor_loss': 144.08948438767104, 'time_step': 0.024814756990176195, 'td_error': 393.25201197328397, 'init_value': -294.9908142089844, 'ave_value': -131.88124330317652} step=13338
2022-04-20 16:26.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162016/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:26.19 [info     ] CQL_20220420162016: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.00031647277854339424, 'time_algorithm_update': 0.02430547050565307, 'temp_loss': -0.19868739785855277, 'temp': 1.196840444852037, 'alpha_loss': 7.921723687857912, 'alpha': 0.3137628862209487, 'critic_loss': 3132.018878066749, 'actor_loss': 148.77742046780057, 'time_step': 0.024719283594722637, 'td_error': 443.45246159507565, 'init_value': -298.3031005859375, 'ave_value': -133.32390482035842} step=13680
2022-04-20 16:26.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162016/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:26.28 [info     ] CQL_20220420162016: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.00030602558314451697, 'time_algorithm_update': 0.024253815935369124, 'temp_loss': -0.19817325115552423, 'temp': 1.2077390523681863, 'alpha_loss': 7.877044929398431, 'alpha': 0.30423665543397266, 'critic_loss': 3225.4281469869334, 'actor_loss': 153.06989593952022, 'time_step': 0.024657642632200008, 'td_error': 540.9622570375094, 'init_value': -316.7300720214844, 'ave_value': -142.92951784433546} step=14022
2022-04-20 16:26.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162016/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:26.37 [info     ] CQL_20220420162016: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.00031432422281008716, 'time_algorithm_update': 0.024234715958087766, 'temp_loss': -0.14571797129921396, 'temp': 1.2196393490534776, 'alpha_loss': 7.29626316773264, 'alpha': 0.29499748279476723, 'critic_loss': 3327.8300038834063, 'actor_loss': 157.94880263568365, 'time_step': 0.024652989287125438, 'td_error': 492.3307419450502, 'init_value': -318.3974609375, 'ave_value': -143.3721409078654} step=14364
2022-04-20 16:26.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162016/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:26.45 [info     ] CQL_20220420162016: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.00031871265835232206, 'time_algorithm_update': 0.024326392781664753, 'temp_loss': -0.06449717683009586, 'temp': 1.226541528924864, 'alpha_loss': 7.22728381170864, 'alpha': 0.28616442317851104, 'critic_loss': 3426.0153872841283, 'actor_loss': 162.4371950919168, 'time_step': 0.024742473635757177, 'td_error': 509.8967893464904, 'init_value': -327.21661376953125, 'ave_value': -147.75030958309904} step=14706
2022-04-20 16:26.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162016/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:26.54 [info     ] CQL_20220420162016: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.0003138174090469093, 'time_algorithm_update': 0.024480510873404162, 'temp_loss': -0.11005413607416446, 'temp': 1.2323718321950812, 'alpha_loss': 7.187715785545215, 'alpha': 0.27749430444854045, 'critic_loss': 3518.7280323407804, 'actor_loss': 166.80719185990895, 'time_step': 0.024890569915548402, 'td_error': 520.7830394697146, 'init_value': -338.73516845703125, 'ave_value': -154.28664190273028} step=15048
2022-04-20 16:26.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162016/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:27.03 [info     ] CQL_20220420162016: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.00031161378001609043, 'time_algorithm_update': 0.024298592617637234, 'temp_loss': -0.05770728280713335, 'temp': 1.237612928214826, 'alpha_loss': 7.093052592193871, 'alpha': 0.2689252265712671, 'critic_loss': 3616.7805889642727, 'actor_loss': 171.43520399840952, 'time_step': 0.024707794886583475, 'td_error': 611.1220552121125, 'init_value': -352.7435607910156, 'ave_value': -162.09603945314348} step=15390
2022-04-20 16:27.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162016/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:27.12 [info     ] CQL_20220420162016: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.0003145612471284922, 'time_algorithm_update': 0.024196847140440465, 'temp_loss': -0.04396029268745442, 'temp': 1.2414426036745485, 'alpha_loss': 6.5644053464744525, 'alpha': 0.26068311999415794, 'critic_loss': 3721.8609283625733, 'actor_loss': 176.25857039780644, 'time_step': 0.02460706094552202, 'td_error': 595.6729900338358, 'init_value': -355.3885498046875, 'ave_value': -160.4432874283812} step=15732
2022-04-20 16:27.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162016/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:27.21 [info     ] CQL_20220420162016: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.0003134033136200487, 'time_algorithm_update': 0.024498610468635783, 'temp_loss': 0.03908568316776501, 'temp': 1.2404315827185648, 'alpha_loss': 6.450197088090997, 'alpha': 0.2529626757516498, 'critic_loss': 3817.4090397706505, 'actor_loss': 180.5405779832985, 'time_step': 0.024910864077116315, 'td_error': 622.8988372385029, 'init_value': -365.76055908203125, 'ave_value': -165.80129522857365} step=16074
2022-04-20 16:27.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162016/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:27.30 [info     ] CQL_20220420162016: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.0003147940886648078, 'time_algorithm_update': 0.025544422411779216, 'temp_loss': -0.03211206467876657, 'temp': 1.241015769236269, 'alpha_loss': 6.310481098660252, 'alpha': 0.24530377696480668, 'critic_loss': 3913.237633777641, 'actor_loss': 185.1905285573145, 'time_step': 0.025958883832072652, 'td_error': 671.2408060852794, 'init_value': -373.9687805175781, 'ave_value': -170.60514712618277} step=16416
2022-04-20 16:27.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162016/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:27.40 [info     ] CQL_20220420162016: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.00032058863611946327, 'time_algorithm_update': 0.02579029679995531, 'temp_loss': -0.00907629723960196, 'temp': 1.2422787215974596, 'alpha_loss': 6.25849934209857, 'alpha': 0.2377650122544919, 'critic_loss': 4004.981693736294, 'actor_loss': 189.2061500772398, 'time_step': 0.026210609932391965, 'td_error': 669.6047319633919, 'init_value': -386.7667541503906, 'ave_value': -174.9874869514693} step=16758
2022-04-20 16:27.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162016/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:27.49 [info     ] CQL_20220420162016: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.0003153657355503729, 'time_algorithm_update': 0.02553921693946883, 'temp_loss': -0.025917219286730065, 'temp': 1.2432503787397642, 'alpha_loss': 5.978214518368593, 'alpha': 0.23033336388785935, 'critic_loss': 4103.1601569638615, 'actor_loss': 193.9011504859255, 'time_step': 0.025951007653398122, 'td_error': 693.592948265363, 'init_value': -397.375244140625, 'ave_value': -181.90429968904806} step=17100
2022-04-20 16:27.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162016/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.51

Epoch 1/50:   0%|          | 0/177 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 16:27.50 [info     ] FQE_20220420162749: epoch=1 step=177 epoch=1 metrics={'time_sample_batch': 0.00015813078583970581, 'time_algorithm_update': 0.004602020069704217, 'loss': 0.008113336736220594, 'time_step': 0.004833486794078417, 'init_value': -0.5574992299079895, 'ave_value': -0.5234439714534863, 'soft_opc': nan} step=177




2022-04-20 16:27.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162749/model_177.pt


Epoch 2/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:27.51 [info     ] FQE_20220420162749: epoch=2 step=354 epoch=2 metrics={'time_sample_batch': 0.000164410488753669, 'time_algorithm_update': 0.0044905762214445125, 'loss': 0.006353459839240811, 'time_step': 0.004728996147543697, 'init_value': -0.6673142313957214, 'ave_value': -0.5845378581557545, 'soft_opc': nan} step=354




2022-04-20 16:27.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162749/model_354.pt


Epoch 3/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:27.52 [info     ] FQE_20220420162749: epoch=3 step=531 epoch=3 metrics={'time_sample_batch': 0.00015484276464429952, 'time_algorithm_update': 0.0037903381606279795, 'loss': 0.005776767646578156, 'time_step': 0.004021649980275644, 'init_value': -0.7216536402702332, 'ave_value': -0.5900046211284202, 'soft_opc': nan} step=531




2022-04-20 16:27.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162749/model_531.pt


Epoch 4/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:27.53 [info     ] FQE_20220420162749: epoch=4 step=708 epoch=4 metrics={'time_sample_batch': 0.00016174208646440235, 'time_algorithm_update': 0.004524625627334508, 'loss': 0.005705382473672093, 'time_step': 0.004761135510805636, 'init_value': -0.8212983012199402, 'ave_value': -0.6634144607159469, 'soft_opc': nan} step=708




2022-04-20 16:27.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162749/model_708.pt


Epoch 5/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:27.53 [info     ] FQE_20220420162749: epoch=5 step=885 epoch=5 metrics={'time_sample_batch': 0.00016642963818911104, 'time_algorithm_update': 0.004464618230270127, 'loss': 0.005428536668798681, 'time_step': 0.004699600618437859, 'init_value': -0.8748037219047546, 'ave_value': -0.6892159345987681, 'soft_opc': nan} step=885




2022-04-20 16:27.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162749/model_885.pt


Epoch 6/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:27.54 [info     ] FQE_20220420162749: epoch=6 step=1062 epoch=6 metrics={'time_sample_batch': 0.00015199655866892324, 'time_algorithm_update': 0.003650196528030654, 'loss': 0.005213911303103307, 'time_step': 0.0038710906680694407, 'init_value': -0.9027080535888672, 'ave_value': -0.6949958160832838, 'soft_opc': nan} step=1062




2022-04-20 16:27.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162749/model_1062.pt


Epoch 7/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:27.55 [info     ] FQE_20220420162749: epoch=7 step=1239 epoch=7 metrics={'time_sample_batch': 0.00016029675801595053, 'time_algorithm_update': 0.004474160361424678, 'loss': 0.004941591205033289, 'time_step': 0.0047110985901396155, 'init_value': -0.9715356826782227, 'ave_value': -0.7276905118166147, 'soft_opc': nan} step=1239




2022-04-20 16:27.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162749/model_1239.pt


Epoch 8/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:27.56 [info     ] FQE_20220420162749: epoch=8 step=1416 epoch=8 metrics={'time_sample_batch': 0.00015704106476347325, 'time_algorithm_update': 0.004506899138628426, 'loss': 0.004812115507389782, 'time_step': 0.004731342617401295, 'init_value': -1.022133469581604, 'ave_value': -0.7403136233220229, 'soft_opc': nan} step=1416




2022-04-20 16:27.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162749/model_1416.pt


Epoch 9/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:27.57 [info     ] FQE_20220420162749: epoch=9 step=1593 epoch=9 metrics={'time_sample_batch': 0.00015663831247448247, 'time_algorithm_update': 0.004263788966809289, 'loss': 0.004587167405028759, 'time_step': 0.004489766675873664, 'init_value': -1.0292927026748657, 'ave_value': -0.7341328345351033, 'soft_opc': nan} step=1593




2022-04-20 16:27.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162749/model_1593.pt


Epoch 10/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:27.58 [info     ] FQE_20220420162749: epoch=10 step=1770 epoch=10 metrics={'time_sample_batch': 0.0001584230843236891, 'time_algorithm_update': 0.003959265132408358, 'loss': 0.00468668734423269, 'time_step': 0.004188507963708565, 'init_value': -1.0942552089691162, 'ave_value': -0.7749105290965633, 'soft_opc': nan} step=1770




2022-04-20 16:27.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162749/model_1770.pt


Epoch 11/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:27.59 [info     ] FQE_20220420162749: epoch=11 step=1947 epoch=11 metrics={'time_sample_batch': 0.00015535058274780962, 'time_algorithm_update': 0.004546952112919867, 'loss': 0.004663967369597847, 'time_step': 0.004772714302364716, 'init_value': -1.1411402225494385, 'ave_value': -0.785890347705231, 'soft_opc': nan} step=1947




2022-04-20 16:27.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162749/model_1947.pt


Epoch 12/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:27.59 [info     ] FQE_20220420162749: epoch=12 step=2124 epoch=12 metrics={'time_sample_batch': 0.00015922050691593838, 'time_algorithm_update': 0.004461005582647808, 'loss': 0.00467214085251135, 'time_step': 0.004693386918407375, 'init_value': -1.1919516324996948, 'ave_value': -0.8249075917689291, 'soft_opc': nan} step=2124




2022-04-20 16:28.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162749/model_2124.pt


Epoch 13/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:28.00 [info     ] FQE_20220420162749: epoch=13 step=2301 epoch=13 metrics={'time_sample_batch': 0.00016009066737977798, 'time_algorithm_update': 0.003597297237417792, 'loss': 0.004519723782626291, 'time_step': 0.0038287073878918664, 'init_value': -1.2061799764633179, 'ave_value': -0.8180877484940552, 'soft_opc': nan} step=2301




2022-04-20 16:28.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162749/model_2301.pt


Epoch 14/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:28.01 [info     ] FQE_20220420162749: epoch=14 step=2478 epoch=14 metrics={'time_sample_batch': 0.00016450073759434586, 'time_algorithm_update': 0.0045496528431520624, 'loss': 0.004749343008339658, 'time_step': 0.004787331920559123, 'init_value': -1.2592352628707886, 'ave_value': -0.8491533482128435, 'soft_opc': nan} step=2478




2022-04-20 16:28.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162749/model_2478.pt


Epoch 15/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:28.02 [info     ] FQE_20220420162749: epoch=15 step=2655 epoch=15 metrics={'time_sample_batch': 0.00016105646467478262, 'time_algorithm_update': 0.00452191008012847, 'loss': 0.004739621650864302, 'time_step': 0.0047534886053053, 'init_value': -1.332291603088379, 'ave_value': -0.8980906005915221, 'soft_opc': nan} step=2655




2022-04-20 16:28.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162749/model_2655.pt


Epoch 16/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:28.03 [info     ] FQE_20220420162749: epoch=16 step=2832 epoch=16 metrics={'time_sample_batch': 0.0001605675045379811, 'time_algorithm_update': 0.0039758237741761284, 'loss': 0.00502883917770459, 'time_step': 0.00420713289982855, 'init_value': -1.357259750366211, 'ave_value': -0.9032569298358472, 'soft_opc': nan} step=2832




2022-04-20 16:28.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162749/model_2832.pt


Epoch 17/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:28.04 [info     ] FQE_20220420162749: epoch=17 step=3009 epoch=17 metrics={'time_sample_batch': 0.00016324533581060205, 'time_algorithm_update': 0.004313757190596586, 'loss': 0.0051476126056595965, 'time_step': 0.0045533853735627424, 'init_value': -1.4075042009353638, 'ave_value': -0.9525276912896483, 'soft_opc': nan} step=3009




2022-04-20 16:28.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162749/model_3009.pt


Epoch 18/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:28.05 [info     ] FQE_20220420162749: epoch=18 step=3186 epoch=18 metrics={'time_sample_batch': 0.00015875713973395567, 'time_algorithm_update': 0.004532046237234342, 'loss': 0.005282483076792484, 'time_step': 0.004765029680930962, 'init_value': -1.3937870264053345, 'ave_value': -0.9116697406826971, 'soft_opc': nan} step=3186




2022-04-20 16:28.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162749/model_3186.pt


Epoch 19/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:28.06 [info     ] FQE_20220420162749: epoch=19 step=3363 epoch=19 metrics={'time_sample_batch': 0.00017065516972945908, 'time_algorithm_update': 0.004492484917074947, 'loss': 0.005610276503609441, 'time_step': 0.004735574883929753, 'init_value': -1.4443858861923218, 'ave_value': -0.9528966844943282, 'soft_opc': nan} step=3363




2022-04-20 16:28.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162749/model_3363.pt


Epoch 20/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:28.06 [info     ] FQE_20220420162749: epoch=20 step=3540 epoch=20 metrics={'time_sample_batch': 0.0001555647553697144, 'time_algorithm_update': 0.0037682420116359906, 'loss': 0.005814263628043319, 'time_step': 0.003994609003013136, 'init_value': -1.4875892400741577, 'ave_value': -0.9847530118360355, 'soft_opc': nan} step=3540




2022-04-20 16:28.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162749/model_3540.pt


Epoch 21/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:28.07 [info     ] FQE_20220420162749: epoch=21 step=3717 epoch=21 metrics={'time_sample_batch': 0.00016232129544188075, 'time_algorithm_update': 0.004501410123318602, 'loss': 0.0061767255146803575, 'time_step': 0.004734699335475426, 'init_value': -1.5651293992996216, 'ave_value': -1.0343726971933433, 'soft_opc': nan} step=3717




2022-04-20 16:28.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162749/model_3717.pt


Epoch 22/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:28.08 [info     ] FQE_20220420162749: epoch=22 step=3894 epoch=22 metrics={'time_sample_batch': 0.00015756774083369195, 'time_algorithm_update': 0.004563704722345212, 'loss': 0.00637081392980207, 'time_step': 0.00479499229603568, 'init_value': -1.5824484825134277, 'ave_value': -1.025780616340426, 'soft_opc': nan} step=3894




2022-04-20 16:28.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162749/model_3894.pt


Epoch 23/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:28.09 [info     ] FQE_20220420162749: epoch=23 step=4071 epoch=23 metrics={'time_sample_batch': 0.00014967433476852158, 'time_algorithm_update': 0.0035122047036381093, 'loss': 0.006608536220313508, 'time_step': 0.0037294067231948766, 'init_value': -1.633227825164795, 'ave_value': -1.0917098666342708, 'soft_opc': nan} step=4071




2022-04-20 16:28.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162749/model_4071.pt


Epoch 24/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:28.10 [info     ] FQE_20220420162749: epoch=24 step=4248 epoch=24 metrics={'time_sample_batch': 0.0001421715580137436, 'time_algorithm_update': 0.0041524016924497096, 'loss': 0.00692888229722315, 'time_step': 0.004364595574847722, 'init_value': -1.6604342460632324, 'ave_value': -1.110810296118886, 'soft_opc': nan} step=4248




2022-04-20 16:28.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162749/model_4248.pt


Epoch 25/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:28.11 [info     ] FQE_20220420162749: epoch=25 step=4425 epoch=25 metrics={'time_sample_batch': 0.0001451066658321747, 'time_algorithm_update': 0.0041660333083847825, 'loss': 0.007024831722600986, 'time_step': 0.0043754671926552295, 'init_value': -1.7100472450256348, 'ave_value': -1.1527387856810658, 'soft_opc': nan} step=4425




2022-04-20 16:28.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162749/model_4425.pt


Epoch 26/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:28.11 [info     ] FQE_20220420162749: epoch=26 step=4602 epoch=26 metrics={'time_sample_batch': 0.0001444574129783501, 'time_algorithm_update': 0.0038572165925624007, 'loss': 0.007651415379917891, 'time_step': 0.004068503945560778, 'init_value': -1.8469120264053345, 'ave_value': -1.2913594014786027, 'soft_opc': nan} step=4602




2022-04-20 16:28.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162749/model_4602.pt


Epoch 27/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:28.12 [info     ] FQE_20220420162749: epoch=27 step=4779 epoch=27 metrics={'time_sample_batch': 0.00013677279154459635, 'time_algorithm_update': 0.0036215378066240733, 'loss': 0.008147711165138852, 'time_step': 0.003819717525762353, 'init_value': -1.8802142143249512, 'ave_value': -1.316289472226445, 'soft_opc': nan} step=4779




2022-04-20 16:28.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162749/model_4779.pt


Epoch 28/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:28.13 [info     ] FQE_20220420162749: epoch=28 step=4956 epoch=28 metrics={'time_sample_batch': 0.00014103873301360567, 'time_algorithm_update': 0.004211648035857637, 'loss': 0.008902912994972983, 'time_step': 0.004417884147773355, 'init_value': -1.8503535985946655, 'ave_value': -1.2865751183963752, 'soft_opc': nan} step=4956




2022-04-20 16:28.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162749/model_4956.pt


Epoch 29/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:28.14 [info     ] FQE_20220420162749: epoch=29 step=5133 epoch=29 metrics={'time_sample_batch': 0.00014171223182462704, 'time_algorithm_update': 0.004265359566036591, 'loss': 0.009098330437005787, 'time_step': 0.004470844053279209, 'init_value': -1.9170336723327637, 'ave_value': -1.3118220131721225, 'soft_opc': nan} step=5133




2022-04-20 16:28.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162749/model_5133.pt


Epoch 30/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:28.14 [info     ] FQE_20220420162749: epoch=30 step=5310 epoch=30 metrics={'time_sample_batch': 0.00014618022293694276, 'time_algorithm_update': 0.0035078175323831158, 'loss': 0.009213623720336978, 'time_step': 0.0037224993193890415, 'init_value': -1.9866541624069214, 'ave_value': -1.3683813924903985, 'soft_opc': nan} step=5310




2022-04-20 16:28.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162749/model_5310.pt


Epoch 31/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:28.15 [info     ] FQE_20220420162749: epoch=31 step=5487 epoch=31 metrics={'time_sample_batch': 0.00016181482433599267, 'time_algorithm_update': 0.0045651338868222, 'loss': 0.009386910023009879, 'time_step': 0.004799221868568895, 'init_value': -2.007781744003296, 'ave_value': -1.382470481862893, 'soft_opc': nan} step=5487




2022-04-20 16:28.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162749/model_5487.pt


Epoch 32/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:28.16 [info     ] FQE_20220420162749: epoch=32 step=5664 epoch=32 metrics={'time_sample_batch': 0.00015782905837236826, 'time_algorithm_update': 0.004591953956474692, 'loss': 0.010329603908141539, 'time_step': 0.004821169847822459, 'init_value': -2.064000129699707, 'ave_value': -1.448458844134668, 'soft_opc': nan} step=5664




2022-04-20 16:28.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162749/model_5664.pt


Epoch 33/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:28.17 [info     ] FQE_20220420162749: epoch=33 step=5841 epoch=33 metrics={'time_sample_batch': 0.0001569104059941351, 'time_algorithm_update': 0.003745242027239611, 'loss': 0.010661701039058592, 'time_step': 0.003975815692190397, 'init_value': -2.156085252761841, 'ave_value': -1.5375517392816307, 'soft_opc': nan} step=5841




2022-04-20 16:28.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162749/model_5841.pt


Epoch 34/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:28.18 [info     ] FQE_20220420162749: epoch=34 step=6018 epoch=34 metrics={'time_sample_batch': 0.000158798896660239, 'time_algorithm_update': 0.004585376567086258, 'loss': 0.010504073057570994, 'time_step': 0.004817928971543824, 'init_value': -2.1524460315704346, 'ave_value': -1.5325197853763273, 'soft_opc': nan} step=6018




2022-04-20 16:28.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162749/model_6018.pt


Epoch 35/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:28.19 [info     ] FQE_20220420162749: epoch=35 step=6195 epoch=35 metrics={'time_sample_batch': 0.0001634864483849477, 'time_algorithm_update': 0.00451350885595979, 'loss': 0.010895804633612687, 'time_step': 0.00475009282430013, 'init_value': -2.2111480236053467, 'ave_value': -1.5913282254996064, 'soft_opc': nan} step=6195




2022-04-20 16:28.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162749/model_6195.pt


Epoch 36/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:28.20 [info     ] FQE_20220420162749: epoch=36 step=6372 epoch=36 metrics={'time_sample_batch': 0.00015862648096461753, 'time_algorithm_update': 0.004323198296929483, 'loss': 0.011972117743528538, 'time_step': 0.004553639956113309, 'init_value': -2.209935426712036, 'ave_value': -1.5770244521197971, 'soft_opc': nan} step=6372




2022-04-20 16:28.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162749/model_6372.pt


Epoch 37/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:28.21 [info     ] FQE_20220420162749: epoch=37 step=6549 epoch=37 metrics={'time_sample_batch': 0.00016349048937781382, 'time_algorithm_update': 0.003982593784224516, 'loss': 0.01209296164560621, 'time_step': 0.004218506947749078, 'init_value': -2.269913911819458, 'ave_value': -1.6141926731936656, 'soft_opc': nan} step=6549




2022-04-20 16:28.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162749/model_6549.pt


Epoch 38/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:28.21 [info     ] FQE_20220420162749: epoch=38 step=6726 epoch=38 metrics={'time_sample_batch': 0.00016059444449042197, 'time_algorithm_update': 0.004522062290859761, 'loss': 0.012808033420393864, 'time_step': 0.004755641107505324, 'init_value': -2.324958086013794, 'ave_value': -1.6829613247231858, 'soft_opc': nan} step=6726




2022-04-20 16:28.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162749/model_6726.pt


Epoch 39/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:28.22 [info     ] FQE_20220420162749: epoch=39 step=6903 epoch=39 metrics={'time_sample_batch': 0.00014913822971494858, 'time_algorithm_update': 0.00429970127041057, 'loss': 0.013283797190524638, 'time_step': 0.004515462002511752, 'init_value': -2.3977062702178955, 'ave_value': -1.73523693788159, 'soft_opc': nan} step=6903




2022-04-20 16:28.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162749/model_6903.pt


Epoch 40/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:28.23 [info     ] FQE_20220420162749: epoch=40 step=7080 epoch=40 metrics={'time_sample_batch': 0.00014106028497555835, 'time_algorithm_update': 0.003410686880855237, 'loss': 0.013951651603351795, 'time_step': 0.003614241120505468, 'init_value': -2.4167325496673584, 'ave_value': -1.7381843868833224, 'soft_opc': nan} step=7080




2022-04-20 16:28.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162749/model_7080.pt


Epoch 41/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:28.24 [info     ] FQE_20220420162749: epoch=41 step=7257 epoch=41 metrics={'time_sample_batch': 0.00014724839205122265, 'time_algorithm_update': 0.004233723979885295, 'loss': 0.014648612054580517, 'time_step': 0.004446800145725746, 'init_value': -2.487447500228882, 'ave_value': -1.821128379365614, 'soft_opc': nan} step=7257




2022-04-20 16:28.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162749/model_7257.pt


Epoch 42/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:28.25 [info     ] FQE_20220420162749: epoch=42 step=7434 epoch=42 metrics={'time_sample_batch': 0.00014267129413152144, 'time_algorithm_update': 0.004210776528396175, 'loss': 0.014885601796746507, 'time_step': 0.00442000432203045, 'init_value': -2.535395383834839, 'ave_value': -1.8313911282689543, 'soft_opc': nan} step=7434




2022-04-20 16:28.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162749/model_7434.pt


Epoch 43/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:28.25 [info     ] FQE_20220420162749: epoch=43 step=7611 epoch=43 metrics={'time_sample_batch': 0.00013545542787023857, 'time_algorithm_update': 0.003636031501037253, 'loss': 0.015271181819465874, 'time_step': 0.0038351931814420022, 'init_value': -2.623544454574585, 'ave_value': -1.9209614099246692, 'soft_opc': nan} step=7611




2022-04-20 16:28.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162749/model_7611.pt


Epoch 44/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:28.26 [info     ] FQE_20220420162749: epoch=44 step=7788 epoch=44 metrics={'time_sample_batch': 0.00013470380319713872, 'time_algorithm_update': 0.0038966620709263, 'loss': 0.01593583000108263, 'time_step': 0.004095247236348815, 'init_value': -2.608381986618042, 'ave_value': -1.8739645814092398, 'soft_opc': nan} step=7788




2022-04-20 16:28.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162749/model_7788.pt


Epoch 45/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:28.27 [info     ] FQE_20220420162749: epoch=45 step=7965 epoch=45 metrics={'time_sample_batch': 0.00014526561155157574, 'time_algorithm_update': 0.004223352098195566, 'loss': 0.01723377357729011, 'time_step': 0.004435287357050147, 'init_value': -2.694314956665039, 'ave_value': -1.956957497322434, 'soft_opc': nan} step=7965




2022-04-20 16:28.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162749/model_7965.pt


Epoch 46/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:28.28 [info     ] FQE_20220420162749: epoch=46 step=8142 epoch=46 metrics={'time_sample_batch': 0.0001403180892858128, 'time_algorithm_update': 0.004075016679063355, 'loss': 0.017941281477668148, 'time_step': 0.004276743042940474, 'init_value': -2.75457763671875, 'ave_value': -1.9847006001876102, 'soft_opc': nan} step=8142




2022-04-20 16:28.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162749/model_8142.pt


Epoch 47/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:28.29 [info     ] FQE_20220420162749: epoch=47 step=8319 epoch=47 metrics={'time_sample_batch': 0.00015625711214744439, 'time_algorithm_update': 0.0038215521365235756, 'loss': 0.0186232923704994, 'time_step': 0.004050904074631169, 'init_value': -2.870695114135742, 'ave_value': -2.0996854538506304, 'soft_opc': nan} step=8319




2022-04-20 16:28.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162749/model_8319.pt


Epoch 48/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:28.30 [info     ] FQE_20220420162749: epoch=48 step=8496 epoch=48 metrics={'time_sample_batch': 0.00015889857448427017, 'time_algorithm_update': 0.004540081078049827, 'loss': 0.01930816708419388, 'time_step': 0.004772952720943817, 'init_value': -2.901344060897827, 'ave_value': -2.1095384903453493, 'soft_opc': nan} step=8496




2022-04-20 16:28.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162749/model_8496.pt


Epoch 49/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:28.30 [info     ] FQE_20220420162749: epoch=49 step=8673 epoch=49 metrics={'time_sample_batch': 0.00016465833631612486, 'time_algorithm_update': 0.004538281489226778, 'loss': 0.020476793925577805, 'time_step': 0.004776418545825333, 'init_value': -2.991591453552246, 'ave_value': -2.1786728796501595, 'soft_opc': nan} step=8673




2022-04-20 16:28.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162749/model_8673.pt


Epoch 50/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 16:28.31 [info     ] FQE_20220420162749: epoch=50 step=8850 epoch=50 metrics={'time_sample_batch': 0.00015185242992336466, 'time_algorithm_update': 0.003560910790653552, 'loss': 0.021926306171663693, 'time_step': 0.0037850148260256664, 'init_value': -3.023946523666382, 'ave_value': -2.207777668251558, 'soft_opc': nan} step=8850




2022-04-20 16:28.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162749/model_8850.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

2022-04-20 16:28.31 [info     ] Directory is created at d3rlpy_logs/FQE_20220420162831
2022-04-20 16:28.31 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 16:28.31 [debug    ] Building models...
2022-04-20 16:28.31 [debug    ] Models have been built.
2022-04-20 16:28.31 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420162831/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 16:28.33 [info     ] FQE_20220420162831: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00016496486442033634, 'time_algorithm_update': 0.004509270884269892, 'loss': 0.0241899378814323, 'time_step': 0.004752402388772299, 'init_value': -1.0600104331970215, 'ave_value': -1.0872200342180494, 'soft_opc': nan} step=344




2022-04-20 16:28.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162831/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:28.35 [info     ] FQE_20220420162831: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00017920968144438987, 'time_algorithm_update': 0.004866060129431791, 'loss': 0.02091676123483583, 'time_step': 0.005119065212648969, 'init_value': -1.646593451499939, 'ave_value': -1.711840994575539, 'soft_opc': nan} step=688




2022-04-20 16:28.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162831/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:28.37 [info     ] FQE_20220420162831: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00016629280045975085, 'time_algorithm_update': 0.004133154486500939, 'loss': 0.024862038131348442, 'time_step': 0.004373190014861351, 'init_value': -2.4524521827697754, 'ave_value': -2.615944682397284, 'soft_opc': nan} step=1032




2022-04-20 16:28.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162831/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:28.39 [info     ] FQE_20220420162831: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00016432792641395746, 'time_algorithm_update': 0.004555983598842178, 'loss': 0.026505223521421296, 'time_step': 0.0047915744227032325, 'init_value': -2.9147491455078125, 'ave_value': -3.1712794704480216, 'soft_opc': nan} step=1376




2022-04-20 16:28.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162831/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:28.40 [info     ] FQE_20220420162831: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00016013827434805937, 'time_algorithm_update': 0.004080597744431607, 'loss': 0.03278747579992511, 'time_step': 0.0043148883553438405, 'init_value': -3.5302486419677734, 'ave_value': -3.928790463238686, 'soft_opc': nan} step=1720




2022-04-20 16:28.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162831/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:28.42 [info     ] FQE_20220420162831: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.0001662290373513865, 'time_algorithm_update': 0.004520080117292182, 'loss': 0.03824607146968848, 'time_step': 0.004762588545333507, 'init_value': -3.9331307411193848, 'ave_value': -4.421720963380896, 'soft_opc': nan} step=2064




2022-04-20 16:28.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162831/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:28.44 [info     ] FQE_20220420162831: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00016421010327893635, 'time_algorithm_update': 0.004093161156011182, 'loss': 0.046932696535381985, 'time_step': 0.004332313010858935, 'init_value': -4.733679294586182, 'ave_value': -5.314038217389906, 'soft_opc': nan} step=2408




2022-04-20 16:28.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162831/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:28.45 [info     ] FQE_20220420162831: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00016449495803478153, 'time_algorithm_update': 0.00450430013412653, 'loss': 0.05656186460387395, 'time_step': 0.004745046759760657, 'init_value': -5.159862518310547, 'ave_value': -5.841897247020189, 'soft_opc': nan} step=2752




2022-04-20 16:28.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162831/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:28.47 [info     ] FQE_20220420162831: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00016400911087213562, 'time_algorithm_update': 0.004217117331748785, 'loss': 0.06597215235287454, 'time_step': 0.0044540658939716425, 'init_value': -5.636109352111816, 'ave_value': -6.392252972695204, 'soft_opc': nan} step=3096




2022-04-20 16:28.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162831/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:28.49 [info     ] FQE_20220420162831: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.0001630519711694052, 'time_algorithm_update': 0.004069376130436742, 'loss': 0.0784806260801202, 'time_step': 0.004308286101319069, 'init_value': -6.254384994506836, 'ave_value': -7.103496205833581, 'soft_opc': nan} step=3440




2022-04-20 16:28.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162831/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:28.50 [info     ] FQE_20220420162831: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.0001639141592868539, 'time_algorithm_update': 0.00453253194343212, 'loss': 0.08879386728120492, 'time_step': 0.004772895990416061, 'init_value': -6.66309928894043, 'ave_value': -7.585982901045868, 'soft_opc': nan} step=3784




2022-04-20 16:28.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162831/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:28.52 [info     ] FQE_20220420162831: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00016184740288313046, 'time_algorithm_update': 0.004123215065446011, 'loss': 0.10364848684227138, 'time_step': 0.004359602235084356, 'init_value': -7.36868143081665, 'ave_value': -8.360611294545569, 'soft_opc': nan} step=4128




2022-04-20 16:28.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162831/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:28.54 [info     ] FQE_20220420162831: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00016374782074329465, 'time_algorithm_update': 0.004556923411613287, 'loss': 0.11112743282560693, 'time_step': 0.0047939232615537425, 'init_value': -7.803360939025879, 'ave_value': -8.805130547338777, 'soft_opc': nan} step=4472




2022-04-20 16:28.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162831/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:28.55 [info     ] FQE_20220420162831: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.0001613726449567218, 'time_algorithm_update': 0.004097197638001553, 'loss': 0.12208385471806883, 'time_step': 0.004332367763962857, 'init_value': -8.261545181274414, 'ave_value': -9.43391451841002, 'soft_opc': nan} step=4816




2022-04-20 16:28.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162831/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:28.57 [info     ] FQE_20220420162831: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.0001631282096685365, 'time_algorithm_update': 0.00438468054283497, 'loss': 0.1319453952980206, 'time_step': 0.004621171674063039, 'init_value': -8.51439094543457, 'ave_value': -9.769931094388705, 'soft_opc': nan} step=5160




2022-04-20 16:28.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162831/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:28.59 [info     ] FQE_20220420162831: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00016105105710584065, 'time_algorithm_update': 0.004303182973418125, 'loss': 0.14348747842230422, 'time_step': 0.004535499007202858, 'init_value': -8.920228004455566, 'ave_value': -10.28009434626983, 'soft_opc': nan} step=5504




2022-04-20 16:28.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162831/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:29.00 [info     ] FQE_20220420162831: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.0001627622648727062, 'time_algorithm_update': 0.004093014916708303, 'loss': 0.15161211241773048, 'time_step': 0.004329316837843074, 'init_value': -9.202096939086914, 'ave_value': -10.706619638043481, 'soft_opc': nan} step=5848




2022-04-20 16:29.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162831/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:29.02 [info     ] FQE_20220420162831: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.0001601133235665255, 'time_algorithm_update': 0.004544012768324031, 'loss': 0.16419106806617575, 'time_step': 0.004776250484377839, 'init_value': -9.557777404785156, 'ave_value': -11.137908222814938, 'soft_opc': nan} step=6192




2022-04-20 16:29.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162831/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:29.04 [info     ] FQE_20220420162831: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00016329316205756608, 'time_algorithm_update': 0.004073093103808026, 'loss': 0.17629761361993504, 'time_step': 0.0043098344359287, 'init_value': -9.919151306152344, 'ave_value': -11.659927439045262, 'soft_opc': nan} step=6536




2022-04-20 16:29.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162831/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:29.05 [info     ] FQE_20220420162831: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00016327583512594534, 'time_algorithm_update': 0.004493424365686816, 'loss': 0.19090976170264184, 'time_step': 0.00472906093264735, 'init_value': -10.296332359313965, 'ave_value': -12.157307394721487, 'soft_opc': nan} step=6880




2022-04-20 16:29.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162831/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:29.07 [info     ] FQE_20220420162831: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.0001661694327066111, 'time_algorithm_update': 0.004033903049868207, 'loss': 0.2037239852009428, 'time_step': 0.004278059615645298, 'init_value': -10.634944915771484, 'ave_value': -12.70192547362667, 'soft_opc': nan} step=7224




2022-04-20 16:29.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162831/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:29.09 [info     ] FQE_20220420162831: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.0001640943593757097, 'time_algorithm_update': 0.004413058591443439, 'loss': 0.21795616974122822, 'time_step': 0.004654288985008417, 'init_value': -10.839553833007812, 'ave_value': -13.023083844888317, 'soft_opc': nan} step=7568




2022-04-20 16:29.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162831/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:29.10 [info     ] FQE_20220420162831: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00016127145567605662, 'time_algorithm_update': 0.004429387491802836, 'loss': 0.235004379170362, 'time_step': 0.004664345536121103, 'init_value': -11.216604232788086, 'ave_value': -13.496112954133265, 'soft_opc': nan} step=7912




2022-04-20 16:29.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162831/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:29.12 [info     ] FQE_20220420162831: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00016524278840353323, 'time_algorithm_update': 0.004057056682054387, 'loss': 0.25223247073915633, 'time_step': 0.004298385492590971, 'init_value': -11.483257293701172, 'ave_value': -13.809652149274543, 'soft_opc': nan} step=8256




2022-04-20 16:29.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162831/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:29.14 [info     ] FQE_20220420162831: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00016681676687196244, 'time_algorithm_update': 0.004558423923891644, 'loss': 0.26786902635141685, 'time_step': 0.004800883836524431, 'init_value': -11.725919723510742, 'ave_value': -14.160340736846667, 'soft_opc': nan} step=8600




2022-04-20 16:29.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162831/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:29.15 [info     ] FQE_20220420162831: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.0001616713612578636, 'time_algorithm_update': 0.004030537466670192, 'loss': 0.28859583715648324, 'time_step': 0.004268621982530106, 'init_value': -12.235058784484863, 'ave_value': -14.80886692963205, 'soft_opc': nan} step=8944




2022-04-20 16:29.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162831/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:29.17 [info     ] FQE_20220420162831: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00016452822574349336, 'time_algorithm_update': 0.0045017704021099005, 'loss': 0.3176742957926594, 'time_step': 0.004744154769320821, 'init_value': -12.260236740112305, 'ave_value': -14.947478281753558, 'soft_opc': nan} step=9288




2022-04-20 16:29.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162831/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:29.19 [info     ] FQE_20220420162831: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.0001601160958755848, 'time_algorithm_update': 0.004082850938619569, 'loss': 0.34306581587487356, 'time_step': 0.004319119592045628, 'init_value': -12.696601867675781, 'ave_value': -15.44339207341005, 'soft_opc': nan} step=9632




2022-04-20 16:29.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162831/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:29.21 [info     ] FQE_20220420162831: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.000160617190738057, 'time_algorithm_update': 0.0043382062468417856, 'loss': 0.364128683661219, 'time_step': 0.0045744062856186265, 'init_value': -12.884371757507324, 'ave_value': -15.85774452782966, 'soft_opc': nan} step=9976




2022-04-20 16:29.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162831/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:29.22 [info     ] FQE_20220420162831: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00016388920850531999, 'time_algorithm_update': 0.0044177340906719825, 'loss': 0.38767009224677673, 'time_step': 0.004661054805267689, 'init_value': -13.217522621154785, 'ave_value': -16.29507393058356, 'soft_opc': nan} step=10320




2022-04-20 16:29.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162831/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:29.24 [info     ] FQE_20220420162831: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00016229998233706453, 'time_algorithm_update': 0.004138360189837079, 'loss': 0.41185464513986264, 'time_step': 0.00437394893446634, 'init_value': -13.55303955078125, 'ave_value': -16.748358453716243, 'soft_opc': nan} step=10664




2022-04-20 16:29.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162831/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:29.26 [info     ] FQE_20220420162831: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00016463080117868823, 'time_algorithm_update': 0.004558195901471515, 'loss': 0.44259593748422554, 'time_step': 0.004798922427864962, 'init_value': -13.959558486938477, 'ave_value': -17.12630558873082, 'soft_opc': nan} step=11008




2022-04-20 16:29.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162831/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:29.27 [info     ] FQE_20220420162831: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00016225285308305606, 'time_algorithm_update': 0.00409233292867971, 'loss': 0.47715864110153255, 'time_step': 0.004328137220338334, 'init_value': -13.966209411621094, 'ave_value': -17.249747280226096, 'soft_opc': nan} step=11352




2022-04-20 16:29.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162831/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:29.29 [info     ] FQE_20220420162831: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.000163097714268884, 'time_algorithm_update': 0.0045718010081801305, 'loss': 0.4873353138425236, 'time_step': 0.004811526037925898, 'init_value': -13.849008560180664, 'ave_value': -17.20204710713378, 'soft_opc': nan} step=11696




2022-04-20 16:29.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162831/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:29.31 [info     ] FQE_20220420162831: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00016245245933532715, 'time_algorithm_update': 0.004094884839168814, 'loss': 0.5144948226042352, 'time_step': 0.0043309226978656854, 'init_value': -14.193527221679688, 'ave_value': -17.571880117193, 'soft_opc': nan} step=12040




2022-04-20 16:29.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162831/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:29.32 [info     ] FQE_20220420162831: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00016267355098280797, 'time_algorithm_update': 0.004277269507563392, 'loss': 0.5415965537486468, 'time_step': 0.004517042359640432, 'init_value': -14.524923324584961, 'ave_value': -17.880456039927026, 'soft_opc': nan} step=12384




2022-04-20 16:29.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162831/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:29.34 [info     ] FQE_20220420162831: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00016210037608479344, 'time_algorithm_update': 0.004524994035099828, 'loss': 0.5635851884888875, 'time_step': 0.004762774983117747, 'init_value': -15.079235076904297, 'ave_value': -18.369895697660276, 'soft_opc': nan} step=12728




2022-04-20 16:29.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162831/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:29.36 [info     ] FQE_20220420162831: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.0001648352589718131, 'time_algorithm_update': 0.004082412220710932, 'loss': 0.5780225546305003, 'time_step': 0.004320672085118848, 'init_value': -14.62356185913086, 'ave_value': -17.984246940763146, 'soft_opc': nan} step=13072




2022-04-20 16:29.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162831/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:29.38 [info     ] FQE_20220420162831: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00016071352847786836, 'time_algorithm_update': 0.0045187826766524206, 'loss': 0.5862956362462408, 'time_step': 0.004754305578941523, 'init_value': -15.242842674255371, 'ave_value': -18.566627963598783, 'soft_opc': nan} step=13416




2022-04-20 16:29.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162831/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:29.39 [info     ] FQE_20220420162831: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00016240325084952421, 'time_algorithm_update': 0.0041244230991186095, 'loss': 0.5987967700253479, 'time_step': 0.004360407590866089, 'init_value': -15.202902793884277, 'ave_value': -18.50327104880466, 'soft_opc': nan} step=13760




2022-04-20 16:29.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162831/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:29.41 [info     ] FQE_20220420162831: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00016397722931795343, 'time_algorithm_update': 0.004560203746307728, 'loss': 0.6082734344093952, 'time_step': 0.004798184300577918, 'init_value': -15.559212684631348, 'ave_value': -18.879149562254682, 'soft_opc': nan} step=14104




2022-04-20 16:29.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162831/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:29.43 [info     ] FQE_20220420162831: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00016561081243115803, 'time_algorithm_update': 0.004204104113024335, 'loss': 0.6273508433490819, 'time_step': 0.004445724016012147, 'init_value': -15.554584503173828, 'ave_value': -18.95477915690826, 'soft_opc': nan} step=14448




2022-04-20 16:29.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162831/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:29.44 [info     ] FQE_20220420162831: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00016421911328337913, 'time_algorithm_update': 0.004233735938404881, 'loss': 0.636261725186392, 'time_step': 0.004474073648452759, 'init_value': -15.777822494506836, 'ave_value': -19.260993286859883, 'soft_opc': nan} step=14792




2022-04-20 16:29.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162831/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:29.46 [info     ] FQE_20220420162831: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.0001656447732171347, 'time_algorithm_update': 0.004599077757014785, 'loss': 0.6416003090968398, 'time_step': 0.004842574513235757, 'init_value': -15.90771198272705, 'ave_value': -19.438507731003803, 'soft_opc': nan} step=15136




2022-04-20 16:29.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162831/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:29.48 [info     ] FQE_20220420162831: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00016223206076511118, 'time_algorithm_update': 0.004068317108376082, 'loss': 0.6436058401054334, 'time_step': 0.00430561290230862, 'init_value': -15.49227523803711, 'ave_value': -19.143199375365768, 'soft_opc': nan} step=15480




2022-04-20 16:29.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162831/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:29.49 [info     ] FQE_20220420162831: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00016545001850571742, 'time_algorithm_update': 0.004568069480186285, 'loss': 0.6450928734117296, 'time_step': 0.004808357981748359, 'init_value': -15.798296928405762, 'ave_value': -19.4651850536667, 'soft_opc': nan} step=15824




2022-04-20 16:29.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162831/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:29.51 [info     ] FQE_20220420162831: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00016007797662601916, 'time_algorithm_update': 0.0033065704412238543, 'loss': 0.6526611350141033, 'time_step': 0.0035398415354795233, 'init_value': -15.940458297729492, 'ave_value': -19.678264954303568, 'soft_opc': nan} step=16168




2022-04-20 16:29.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162831/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:29.52 [info     ] FQE_20220420162831: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00016100115554277286, 'time_algorithm_update': 0.003406075544135515, 'loss': 0.6426914579581556, 'time_step': 0.003640778536020323, 'init_value': -15.875758171081543, 'ave_value': -19.681386502176053, 'soft_opc': nan} step=16512




2022-04-20 16:29.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162831/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:29.53 [info     ] FQE_20220420162831: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00016342831212420796, 'time_algorithm_update': 0.0034980351148649704, 'loss': 0.6558032002960613, 'time_step': 0.0037358957667683445, 'init_value': -15.412943840026855, 'ave_value': -19.42944289177912, 'soft_opc': nan} step=16856




2022-04-20 16:29.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162831/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:29.55 [info     ] FQE_20220420162831: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00016068372615548067, 'time_algorithm_update': 0.0034078435842380965, 'loss': 0.6494369441535064, 'time_step': 0.0036444379839786264, 'init_value': -15.456042289733887, 'ave_value': -19.45423047615316, 'soft_opc': nan} step=17200




2022-04-20 16:29.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420162831/model_17200.pt
most optimal hyper params for cql at this point:  [0.008762173788455153, 0.003332115014684912, 7.002783808884287e-05, 1]
search iteration:  7
using hyper params:  [0.004940147045651946, 0.0006002394062648449, 7.204121497423649e-05, 3]
2022-04-20 16:29.55 [debug    ] RoundIterator is selected.
2022-04-20 16:29.55 [info     ] Directory is created at d3rlpy_logs/CQL_20220420162955
2022-04-20 16:29.55 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 16:29.55 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 16:29.55 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220420162955/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_le

  minimum = torch.tensor(
  maximum = torch.tensor(


Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:30.04 [info     ] CQL_20220420162955: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.00033186262811136526, 'time_algorithm_update': 0.02416793365924679, 'temp_loss': 4.764942509389063, 'temp': 0.9877857905730867, 'alpha_loss': -17.900457839519657, 'alpha': 1.0176334740125645, 'critic_loss': 48.763805367096126, 'actor_loss': 0.7101711118212569, 'time_step': 0.024599053706342015, 'td_error': 3.0121825068646304, 'init_value': -2.4391770362854004, 'ave_value': -1.7822577852436954} step=342
2022-04-20 16:30.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162955/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:30.13 [info     ] CQL_20220420162955: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.0003408319071719521, 'time_algorithm_update': 0.02417173859668754, 'temp_loss': 4.428460319139804, 'temp': 0.9647033929127699, 'alpha_loss': -16.2416769161559, 'alpha': 1.0525506323541116, 'critic_loss': 38.10639130441766, 'actor_loss': 1.9215837243007639, 'time_step': 0.02461234658782245, 'td_error': 2.288563029864099, 'init_value': -5.254973888397217, 'ave_value': -3.371998290618932} step=684
2022-04-20 16:30.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162955/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:30.21 [info     ] CQL_20220420162955: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.0003393121630127667, 'time_algorithm_update': 0.02421478569856164, 'temp_loss': 4.202009949767799, 'temp': 0.9424894344039828, 'alpha_loss': -13.700388679727476, 'alpha': 1.0847534254977578, 'critic_loss': 35.04470543554652, 'actor_loss': 4.101636040280437, 'time_step': 0.024655204070241826, 'td_error': 2.7060671570758856, 'init_value': -8.95988655090332, 'ave_value': -5.66364015796398} step=1026
2022-04-20 16:30.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162955/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:30.30 [info     ] CQL_20220420162955: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.00034072942901075933, 'time_algorithm_update': 0.024258382836280512, 'temp_loss': 3.991767674161677, 'temp': 0.9210787424218585, 'alpha_loss': -11.808396807888098, 'alpha': 1.1156782692635965, 'critic_loss': 31.005052075748555, 'actor_loss': 7.120314379184567, 'time_step': 0.02469773668991892, 'td_error': 3.510216715379616, 'init_value': -13.786809921264648, 'ave_value': -8.154868444392608} step=1368
2022-04-20 16:30.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162955/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:30.39 [info     ] CQL_20220420162955: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.00034767981858281366, 'time_algorithm_update': 0.023803020778455232, 'temp_loss': 3.738598030213027, 'temp': 0.9006014854935874, 'alpha_loss': -10.234094653213234, 'alpha': 1.145651404272046, 'critic_loss': 29.75500552417242, 'actor_loss': 10.309862262324282, 'time_step': 0.02425315714719003, 'td_error': 4.965522544095718, 'init_value': -19.212017059326172, 'ave_value': -10.854795135367024} step=1710
2022-04-20 16:30.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162955/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:30.48 [info     ] CQL_20220420162955: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.00034143143927144724, 'time_algorithm_update': 0.024389528391654033, 'temp_loss': 3.4763099582571733, 'temp': 0.8811374609930474, 'alpha_loss': -9.138100225325914, 'alpha': 1.175361174589012, 'critic_loss': 31.07845435783877, 'actor_loss': 13.385401352107177, 'time_step': 0.024830923442952117, 'td_error': 6.132326207268396, 'init_value': -23.891403198242188, 'ave_value': -13.202615883268818} step=2052
2022-04-20 16:30.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162955/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:30.57 [info     ] CQL_20220420162955: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.000339491325512267, 'time_algorithm_update': 0.024347793289095337, 'temp_loss': 3.238200691011217, 'temp': 0.8625235296132272, 'alpha_loss': -8.26094637140196, 'alpha': 1.2053196287294576, 'critic_loss': 33.891774913720916, 'actor_loss': 16.298171966396577, 'time_step': 0.024789143724051135, 'td_error': 7.556500281197341, 'init_value': -28.735370635986328, 'ave_value': -15.934499567234301} step=2394
2022-04-20 16:30.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162955/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:31.06 [info     ] CQL_20220420162955: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.00033667491890533623, 'time_algorithm_update': 0.024406167498806065, 'temp_loss': 3.0152770908255326, 'temp': 0.8446894066724163, 'alpha_loss': -7.571505916048909, 'alpha': 1.235975370421047, 'critic_loss': 37.07380240144786, 'actor_loss': 19.089724950623093, 'time_step': 0.024843162263345996, 'td_error': 9.215774562404858, 'init_value': -32.97273254394531, 'ave_value': -17.93299641867143} step=2736
2022-04-20 16:31.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162955/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:31.14 [info     ] CQL_20220420162955: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.0003394571661252027, 'time_algorithm_update': 0.024262263760929218, 'temp_loss': 2.81176569587306, 'temp': 0.8274947523373609, 'alpha_loss': -6.9799219111950075, 'alpha': 1.2673724467991387, 'critic_loss': 40.81951419651857, 'actor_loss': 21.68701300425836, 'time_step': 0.024702482753329806, 'td_error': 11.209495396349434, 'init_value': -37.33601760864258, 'ave_value': -20.414509665456574} step=3078
2022-04-20 16:31.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162955/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:31.23 [info     ] CQL_20220420162955: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.0003419487099898489, 'time_algorithm_update': 0.024427513630069488, 'temp_loss': 2.639901534158584, 'temp': 0.8107750128235733, 'alpha_loss': -6.463702530888786, 'alpha': 1.2998142033292537, 'critic_loss': 44.43898357145967, 'actor_loss': 24.15051081584908, 'time_step': 0.02487152292017351, 'td_error': 13.020135841669795, 'init_value': -40.64592742919922, 'ave_value': -21.994560647212843} step=3420
2022-04-20 16:31.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162955/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:31.32 [info     ] CQL_20220420162955: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.00033878025255705183, 'time_algorithm_update': 0.02425327147656714, 'temp_loss': 2.463680655635588, 'temp': 0.7945850739005016, 'alpha_loss': -5.901024524231403, 'alpha': 1.332968283117863, 'critic_loss': 48.274995736908494, 'actor_loss': 26.436194090815317, 'time_step': 0.024690181191204585, 'td_error': 15.066568753621395, 'init_value': -44.890342712402344, 'ave_value': -24.43501396960235} step=3762
2022-04-20 16:31.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162955/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:31.41 [info     ] CQL_20220420162955: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.00033982385668838235, 'time_algorithm_update': 0.02437670049611588, 'temp_loss': 2.311202561994742, 'temp': 0.7788053562766627, 'alpha_loss': -5.383948130914343, 'alpha': 1.3663174750512106, 'critic_loss': 52.26150735777024, 'actor_loss': 28.581138861806767, 'time_step': 0.024816227935211002, 'td_error': 17.41095176870747, 'init_value': -48.093162536621094, 'ave_value': -26.149774666938697} step=4104
2022-04-20 16:31.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162955/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:31.50 [info     ] CQL_20220420162955: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.00034278596353809736, 'time_algorithm_update': 0.024411364605552273, 'temp_loss': 2.1744274936224284, 'temp': 0.7633984341607456, 'alpha_loss': -4.905417378185786, 'alpha': 1.400581109244921, 'critic_loss': 56.51363174260011, 'actor_loss': 30.539892336081344, 'time_step': 0.024854298920659292, 'td_error': 19.457517798653114, 'init_value': -51.24834060668945, 'ave_value': -27.693326347180985} step=4446
2022-04-20 16:31.50 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162955/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:31.59 [info     ] CQL_20220420162955: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.00034233004029034177, 'time_algorithm_update': 0.024221651038231207, 'temp_loss': 2.030543988211113, 'temp': 0.7483345911516781, 'alpha_loss': -4.421836499233692, 'alpha': 1.435095132791508, 'critic_loss': 60.923890755190484, 'actor_loss': 32.3850655806692, 'time_step': 0.024667101296765064, 'td_error': 21.44273599898936, 'init_value': -53.91522216796875, 'ave_value': -28.802713185093022} step=4788
2022-04-20 16:31.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162955/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:32.08 [info     ] CQL_20220420162955: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.00033867359161376953, 'time_algorithm_update': 0.024347163780390868, 'temp_loss': 1.9089026743905586, 'temp': 0.7336791297148543, 'alpha_loss': -3.9344795316632033, 'alpha': 1.469486361358598, 'critic_loss': 65.3120552419919, 'actor_loss': 34.120097260726126, 'time_step': 0.02478361338899847, 'td_error': 23.905977540145997, 'init_value': -56.848167419433594, 'ave_value': -30.461243011752764} step=5130
2022-04-20 16:32.08 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162955/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:32.17 [info     ] CQL_20220420162955: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.00033599591394614057, 'time_algorithm_update': 0.024485832766482706, 'temp_loss': 1.7817659486106963, 'temp': 0.7193349622494993, 'alpha_loss': -3.4315679028020267, 'alpha': 1.5032745878598843, 'critic_loss': 69.85792782432155, 'actor_loss': 35.728139129995604, 'time_step': 0.0249202913708157, 'td_error': 26.024972454728633, 'init_value': -59.577003479003906, 'ave_value': -31.89730268938987} step=5472
2022-04-20 16:32.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162955/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:32.26 [info     ] CQL_20220420162955: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.0003408486383003101, 'time_algorithm_update': 0.02559889338867009, 'temp_loss': 1.6734868720260978, 'temp': 0.7053326357874954, 'alpha_loss': -2.9292744367290835, 'alpha': 1.536390153288144, 'critic_loss': 74.47423826741894, 'actor_loss': 37.220341213962485, 'time_step': 0.026038632755391082, 'td_error': 28.43581958084771, 'init_value': -62.08709716796875, 'ave_value': -33.2586550538752} step=5814
2022-04-20 16:32.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162955/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:32.35 [info     ] CQL_20220420162955: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.00034431616465250653, 'time_algorithm_update': 0.02592478160969695, 'temp_loss': 1.55426711994305, 'temp': 0.6916777256288027, 'alpha_loss': -2.4082364244767795, 'alpha': 1.5673220680471052, 'critic_loss': 79.28650463394254, 'actor_loss': 38.58617752476742, 'time_step': 0.02636875255763182, 'td_error': 30.267595783951545, 'init_value': -64.43327331542969, 'ave_value': -34.39514580826367} step=6156
2022-04-20 16:32.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162955/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:32.45 [info     ] CQL_20220420162955: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.0003381451668097959, 'time_algorithm_update': 0.02561529965428581, 'temp_loss': 1.4516066692725957, 'temp': 0.6783754982446369, 'alpha_loss': -1.933743881813905, 'alpha': 1.59642606834222, 'critic_loss': 83.91462727596885, 'actor_loss': 39.87939453125, 'time_step': 0.026054656296445614, 'td_error': 32.268429181610635, 'init_value': -66.64765930175781, 'ave_value': -35.42488993142102} step=6498
2022-04-20 16:32.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162955/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:32.54 [info     ] CQL_20220420162955: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.00034106474870826767, 'time_algorithm_update': 0.025822155656870346, 'temp_loss': 1.3574541915229887, 'temp': 0.6653571590694071, 'alpha_loss': -1.4605870149561275, 'alpha': 1.6225189671181797, 'critic_loss': 88.65141046535201, 'actor_loss': 41.055437026665224, 'time_step': 0.026263747996056987, 'td_error': 33.97499558691589, 'init_value': -68.32598876953125, 'ave_value': -35.922188078749016} step=6840
2022-04-20 16:32.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162955/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:33.04 [info     ] CQL_20220420162955: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.00034349285371122306, 'time_algorithm_update': 0.025679577163785522, 'temp_loss': 1.2626259121281362, 'temp': 0.6526515869020718, 'alpha_loss': -0.9082267774563086, 'alpha': 1.642845112329338, 'critic_loss': 93.08668660838701, 'actor_loss': 42.21028209708587, 'time_step': 0.02612505042762087, 'td_error': 36.13243618842999, 'init_value': -71.11687469482422, 'ave_value': -37.29636264909092} step=7182
2022-04-20 16:33.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162955/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:33.13 [info     ] CQL_20220420162955: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.0003489520814683702, 'time_algorithm_update': 0.025966708423101413, 'temp_loss': 1.174535777659444, 'temp': 0.6402671481782233, 'alpha_loss': -0.4461949477196884, 'alpha': 1.6565781329807483, 'critic_loss': 97.8415683500948, 'actor_loss': 43.20620317068713, 'time_step': 0.02641527067150986, 'td_error': 37.8921955419526, 'init_value': -72.92610931396484, 'ave_value': -37.88122498743706} step=7524
2022-04-20 16:33.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162955/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:33.22 [info     ] CQL_20220420162955: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.00034341477511221904, 'time_algorithm_update': 0.02599431896767421, 'temp_loss': 1.0893065603504404, 'temp': 0.6281412571494343, 'alpha_loss': 0.03965610858292608, 'alpha': 1.6619633708083839, 'critic_loss': 102.17143325359501, 'actor_loss': 44.2232975987663, 'time_step': 0.02643966814230757, 'td_error': 39.80643324275191, 'init_value': -74.2061996459961, 'ave_value': -38.21066520525931} step=7866
2022-04-20 16:33.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162955/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:33.32 [info     ] CQL_20220420162955: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.0003415764423838833, 'time_algorithm_update': 0.02563188856805277, 'temp_loss': 1.0201984161173392, 'temp': 0.6163538490471087, 'alpha_loss': 0.4835683527451597, 'alpha': 1.6534620453739723, 'critic_loss': 106.79996715791044, 'actor_loss': 45.11732393677472, 'time_step': 0.026073875483016522, 'td_error': 41.24036709804986, 'init_value': -75.720947265625, 'ave_value': -39.161024759187626} step=8208
2022-04-20 16:33.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162955/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:33.41 [info     ] CQL_20220420162955: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.00034035785853514197, 'time_algorithm_update': 0.025955386329115482, 'temp_loss': 0.9568284695957139, 'temp': 0.6046917856785289, 'alpha_loss': 0.9237175316683273, 'alpha': 1.6344656435369749, 'critic_loss': 111.10287502355743, 'actor_loss': 46.02600640860217, 'time_step': 0.026396379833332977, 'td_error': 43.11938352600161, 'init_value': -77.73249816894531, 'ave_value': -39.967649448202536} step=8550
2022-04-20 16:33.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162955/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:33.51 [info     ] CQL_20220420162955: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.0003398538332933571, 'time_algorithm_update': 0.025695636955618162, 'temp_loss': 0.871586848594989, 'temp': 0.5933971286517138, 'alpha_loss': 1.2747768136449376, 'alpha': 1.6012398558750487, 'critic_loss': 115.57749711421498, 'actor_loss': 46.77726323981034, 'time_step': 0.02613715818750928, 'td_error': 44.310722048709074, 'init_value': -78.6163558959961, 'ave_value': -40.27241829086934} step=8892
2022-04-20 16:33.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162955/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:34.00 [info     ] CQL_20220420162955: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.0003395854381092808, 'time_algorithm_update': 0.026073438382288167, 'temp_loss': 0.8218743738375212, 'temp': 0.5823119498832882, 'alpha_loss': 1.6222364362628794, 'alpha': 1.5615151475744637, 'critic_loss': 120.14540229485048, 'actor_loss': 47.487632494920874, 'time_step': 0.026513918101439, 'td_error': 45.967203004420014, 'init_value': -80.94498443603516, 'ave_value': -41.31752436140368} step=9234
2022-04-20 16:34.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162955/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:34.10 [info     ] CQL_20220420162955: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.00034173747949432906, 'time_algorithm_update': 0.02580441787228947, 'temp_loss': 0.7615020574882017, 'temp': 0.5715121273060291, 'alpha_loss': 1.881627630412426, 'alpha': 1.5125596770766185, 'critic_loss': 124.33295088204724, 'actor_loss': 48.19640378227011, 'time_step': 0.02624799354731688, 'td_error': 47.43300192539952, 'init_value': -82.5036849975586, 'ave_value': -42.169281384900614} step=9576
2022-04-20 16:34.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162955/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:34.19 [info     ] CQL_20220420162955: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.000342403238976908, 'time_algorithm_update': 0.025871658185769243, 'temp_loss': 0.7111603703938032, 'temp': 0.5607955752060427, 'alpha_loss': 2.1233097407694537, 'alpha': 1.4622257885180021, 'critic_loss': 127.87094238905878, 'actor_loss': 48.83368272390979, 'time_step': 0.026315516895718045, 'td_error': 48.29165995369315, 'init_value': -83.73052215576172, 'ave_value': -42.434537476025994} step=9918
2022-04-20 16:34.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162955/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:34.28 [info     ] CQL_20220420162955: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.00034194801285950064, 'time_algorithm_update': 0.025899645180730093, 'temp_loss': 0.6652381193568135, 'temp': 0.5503502432365863, 'alpha_loss': 2.2654196375304267, 'alpha': 1.4128951224667288, 'critic_loss': 131.77528550889758, 'actor_loss': 49.4596538264849, 'time_step': 0.026345625258328623, 'td_error': 49.66384650327563, 'init_value': -85.01213073730469, 'ave_value': -42.961414080561546} step=10260
2022-04-20 16:34.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162955/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:34.38 [info     ] CQL_20220420162955: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.00034100061271622866, 'time_algorithm_update': 0.025662305759407623, 'temp_loss': 0.6204680006580743, 'temp': 0.5401086029950638, 'alpha_loss': 2.40219742121315, 'alpha': 1.3641139049279063, 'critic_loss': 135.4531305770428, 'actor_loss': 50.02612975048043, 'time_step': 0.02610337803935447, 'td_error': 51.412228668481816, 'init_value': -86.6552963256836, 'ave_value': -43.57427503338134} step=10602
2022-04-20 16:34.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162955/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:34.47 [info     ] CQL_20220420162955: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.00034583102889925414, 'time_algorithm_update': 0.025962927188092504, 'temp_loss': 0.570179961055343, 'temp': 0.5300845894200062, 'alpha_loss': 2.4977944451357015, 'alpha': 1.31743807227988, 'critic_loss': 139.25498389082347, 'actor_loss': 50.55828208254095, 'time_step': 0.026410298040735792, 'td_error': 52.54780661583541, 'init_value': -87.96559143066406, 'ave_value': -44.396459794056575} step=10944
2022-04-20 16:34.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162955/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:34.57 [info     ] CQL_20220420162955: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.000349082444843493, 'time_algorithm_update': 0.025520763899150648, 'temp_loss': 0.5436418334468763, 'temp': 0.5201830668756139, 'alpha_loss': 2.571918653433173, 'alpha': 1.2730473977083352, 'critic_loss': 142.56728001644737, 'actor_loss': 51.101412321391855, 'time_step': 0.025968221893087465, 'td_error': 53.06191314925826, 'init_value': -89.1057357788086, 'ave_value': -44.65820325714511} step=11286
2022-04-20 16:34.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162955/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:35.06 [info     ] CQL_20220420162955: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.00034341686650326376, 'time_algorithm_update': 0.02597635879851224, 'temp_loss': 0.49415740266180874, 'temp': 0.5105567842896221, 'alpha_loss': 2.688146150056872, 'alpha': 1.2295558891798322, 'critic_loss': 146.81616770872597, 'actor_loss': 51.58682213052671, 'time_step': 0.02641993517067, 'td_error': 54.06653083235164, 'init_value': -90.01036071777344, 'ave_value': -44.90948136655329} step=11628
2022-04-20 16:35.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162955/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:35.15 [info     ] CQL_20220420162955: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.00034031045367146094, 'time_algorithm_update': 0.025810838442796854, 'temp_loss': 0.45239479081672535, 'temp': 0.5013879516668487, 'alpha_loss': 2.7354782175773766, 'alpha': 1.1874470937321757, 'critic_loss': 150.33475550155194, 'actor_loss': 52.066940831859206, 'time_step': 0.026251753868415342, 'td_error': 55.02739224536281, 'init_value': -91.07514953613281, 'ave_value': -45.814513328551975} step=11970
2022-04-20 16:35.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162955/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:35.24 [info     ] CQL_20220420162955: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.00034417395006146345, 'time_algorithm_update': 0.024466940534045124, 'temp_loss': 0.43281119495455983, 'temp': 0.4921708679512927, 'alpha_loss': 2.757690317863435, 'alpha': 1.1478697577415153, 'critic_loss': 153.78620736640795, 'actor_loss': 52.52473623710766, 'time_step': 0.024912242303814804, 'td_error': 55.96849712019786, 'init_value': -92.4081039428711, 'ave_value': -46.16080944346952} step=12312
2022-04-20 16:35.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162955/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:35.33 [info     ] CQL_20220420162955: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.0003445483090584738, 'time_algorithm_update': 0.02429297235276964, 'temp_loss': 0.3804946401270858, 'temp': 0.4832482936612347, 'alpha_loss': 2.8254040364633526, 'alpha': 1.1100581981982405, 'critic_loss': 156.92296575802808, 'actor_loss': 52.91957652220252, 'time_step': 0.024738856923510456, 'td_error': 56.86268841426592, 'init_value': -92.4163818359375, 'ave_value': -45.974624468343215} step=12654
2022-04-20 16:35.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162955/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:35.42 [info     ] CQL_20220420162955: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.00033842611034014074, 'time_algorithm_update': 0.024264065842879445, 'temp_loss': 0.3608740442677548, 'temp': 0.4747484076267097, 'alpha_loss': 2.8292300679356033, 'alpha': 1.0728212113966022, 'critic_loss': 160.04413647121854, 'actor_loss': 53.28272354393675, 'time_step': 0.024700967889083058, 'td_error': 57.85063682989684, 'init_value': -93.25569152832031, 'ave_value': -46.20617644668834} step=12996
2022-04-20 16:35.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162955/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:35.51 [info     ] CQL_20220420162955: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.00034325304087142497, 'time_algorithm_update': 0.02420074688760858, 'temp_loss': 0.3419380992520274, 'temp': 0.4661127014292611, 'alpha_loss': 2.8147643478641733, 'alpha': 1.0385099433318914, 'critic_loss': 163.31797063280965, 'actor_loss': 53.653088195979244, 'time_step': 0.024642503749557405, 'td_error': 57.754570379735384, 'init_value': -94.63619232177734, 'ave_value': -46.710090830483146} step=13338
2022-04-20 16:35.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162955/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:36.00 [info     ] CQL_20220420162955: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.0003430975808037652, 'time_algorithm_update': 0.024369966914082133, 'temp_loss': 0.30733130853732077, 'temp': 0.4578434777015831, 'alpha_loss': 2.819563768934785, 'alpha': 1.0042732656002045, 'critic_loss': 166.32820945873596, 'actor_loss': 53.981493085448506, 'time_step': 0.024812721369559306, 'td_error': 58.934646542762756, 'init_value': -95.19706726074219, 'ave_value': -46.96659324312264} step=13680
2022-04-20 16:36.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162955/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:36.09 [info     ] CQL_20220420162955: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.00034418301275599073, 'time_algorithm_update': 0.024012497991149188, 'temp_loss': 0.27576905033654636, 'temp': 0.44987541463291436, 'alpha_loss': 2.8015504399238274, 'alpha': 0.9720551962392372, 'critic_loss': 169.7093740095172, 'actor_loss': 54.2997853574697, 'time_step': 0.02445545182590596, 'td_error': 60.19785572502588, 'init_value': -96.2822265625, 'ave_value': -47.740599977159285} step=14022
2022-04-20 16:36.09 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162955/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:36.17 [info     ] CQL_20220420162955: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.00033614021992822837, 'time_algorithm_update': 0.024243898559034915, 'temp_loss': 0.2590187295824725, 'temp': 0.4421576473273729, 'alpha_loss': 2.782781023118231, 'alpha': 0.9410913579645213, 'critic_loss': 172.33638656348512, 'actor_loss': 54.571674358077914, 'time_step': 0.024680244295220626, 'td_error': 60.6731405317742, 'init_value': -96.46886444091797, 'ave_value': -47.49959771477961} step=14364
2022-04-20 16:36.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162955/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:36.26 [info     ] CQL_20220420162955: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.00034002532735902664, 'time_algorithm_update': 0.024214624661451193, 'temp_loss': 0.23121248190536312, 'temp': 0.43459641532591214, 'alpha_loss': 2.7490328137289013, 'alpha': 0.9109433498996043, 'critic_loss': 174.7801282559222, 'actor_loss': 54.8478956724468, 'time_step': 0.024656168201513458, 'td_error': 60.71391555345285, 'init_value': -97.50508117675781, 'ave_value': -48.04952139499891} step=14706
2022-04-20 16:36.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162955/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:36.35 [info     ] CQL_20220420162955: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.0003401710276018109, 'time_algorithm_update': 0.02435744296737582, 'temp_loss': 0.22044520971653928, 'temp': 0.427335765800978, 'alpha_loss': 2.6647470110689686, 'alpha': 0.8822181568159695, 'critic_loss': 177.75187424330684, 'actor_loss': 55.10650090446249, 'time_step': 0.02480122499298631, 'td_error': 61.604758889371034, 'init_value': -97.59892272949219, 'ave_value': -48.27193644707133} step=15048
2022-04-20 16:36.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162955/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:36.44 [info     ] CQL_20220420162955: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.0003424855003580015, 'time_algorithm_update': 0.02432517140929462, 'temp_loss': 0.1881771320068653, 'temp': 0.4201242725925836, 'alpha_loss': 2.607280315624343, 'alpha': 0.8551876581551736, 'critic_loss': 179.6449588530245, 'actor_loss': 55.202153211448625, 'time_step': 0.024767196666427523, 'td_error': 60.99862814057503, 'init_value': -97.0296859741211, 'ave_value': -48.473343412325725} step=15390
2022-04-20 16:36.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162955/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:36.53 [info     ] CQL_20220420162955: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.0003391385775560524, 'time_algorithm_update': 0.024313044129756458, 'temp_loss': 0.15580866429736914, 'temp': 0.4139297654231389, 'alpha_loss': 2.5714005514195093, 'alpha': 0.8287999861770206, 'critic_loss': 181.574692040159, 'actor_loss': 55.21746438009697, 'time_step': 0.02475088799906056, 'td_error': 60.9028683960756, 'init_value': -96.2280502319336, 'ave_value': -48.01690643138043} step=15732
2022-04-20 16:36.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162955/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:37.02 [info     ] CQL_20220420162955: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.0003412536710326435, 'time_algorithm_update': 0.02429028212675574, 'temp_loss': 0.14362420112831376, 'temp': 0.407918260024305, 'alpha_loss': 2.4733206030221013, 'alpha': 0.8038115288779052, 'critic_loss': 183.75692271628574, 'actor_loss': 55.2422052796124, 'time_step': 0.024730248757970263, 'td_error': 60.1591214117279, 'init_value': -96.26951599121094, 'ave_value': -48.395512812451734} step=16074
2022-04-20 16:37.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162955/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:37.11 [info     ] CQL_20220420162955: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.0003392926433630157, 'time_algorithm_update': 0.0242943038717348, 'temp_loss': 0.13193670915009464, 'temp': 0.40205721330572985, 'alpha_loss': 2.4246198704891038, 'alpha': 0.779330713707104, 'critic_loss': 185.4056790000514, 'actor_loss': 55.336856741654245, 'time_step': 0.024729878581755342, 'td_error': 60.179232198939836, 'init_value': -95.82600402832031, 'ave_value': -48.5774807671236} step=16416
2022-04-20 16:37.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162955/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:37.19 [info     ] CQL_20220420162955: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.0003378649204097993, 'time_algorithm_update': 0.024241802985208075, 'temp_loss': 0.12091906359201374, 'temp': 0.39620697968884516, 'alpha_loss': 2.3421654183613625, 'alpha': 0.7559744609378235, 'critic_loss': 186.16424957632321, 'actor_loss': 55.377435126499826, 'time_step': 0.024678005112542048, 'td_error': 60.360579644194594, 'init_value': -95.58708190917969, 'ave_value': -48.88289423998546} step=16758
2022-04-20 16:37.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162955/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:37.28 [info     ] CQL_20220420162955: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.00034101594958389016, 'time_algorithm_update': 0.024260026669641683, 'temp_loss': 0.100062594815751, 'temp': 0.39065822331528915, 'alpha_loss': 2.2824836676145157, 'alpha': 0.7336226501311475, 'critic_loss': 188.09928273875812, 'actor_loss': 55.41898266753258, 'time_step': 0.0247030230293497, 'td_error': 60.705797801538935, 'init_value': -96.40584564208984, 'ave_value': -49.31171943904446} step=17100
2022-04-20 16:37.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420162955/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.519

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 16:37.29 [info     ] FQE_20220420163728: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.0001590754612382636, 'time_algorithm_update': 0.0034472669463559806, 'loss': 0.007138290027358446, 'time_step': 0.003676346985690565, 'init_value': -0.1263846755027771, 'ave_value': -0.08904373798653617, 'soft_opc': nan} step=166




2022-04-20 16:37.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163728/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:37.30 [info     ] FQE_20220420163728: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.0001581390219998647, 'time_algorithm_update': 0.0036172349768948844, 'loss': 0.006108689583355494, 'time_step': 0.003843634961599327, 'init_value': -0.2684849798679352, 'ave_value': -0.17515268435550702, 'soft_opc': nan} step=332




2022-04-20 16:37.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163728/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:37.31 [info     ] FQE_20220420163728: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00015652179718017578, 'time_algorithm_update': 0.0034373108162937395, 'loss': 0.005769553646187197, 'time_step': 0.0036626005747232094, 'init_value': -0.34472033381462097, 'ave_value': -0.1858409710534689, 'soft_opc': nan} step=498




2022-04-20 16:37.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163728/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:37.31 [info     ] FQE_20220420163728: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00016295909881591797, 'time_algorithm_update': 0.0035206381096897356, 'loss': 0.005665458658855992, 'time_step': 0.0037540125559611492, 'init_value': -0.4492731988430023, 'ave_value': -0.2320915833757435, 'soft_opc': nan} step=664




2022-04-20 16:37.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163728/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:37.32 [info     ] FQE_20220420163728: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00015684208237981223, 'time_algorithm_update': 0.003543106906385307, 'loss': 0.0052721768208072485, 'time_step': 0.0037733833473849007, 'init_value': -0.5361701250076294, 'ave_value': -0.27555739792684714, 'soft_opc': nan} step=830




2022-04-20 16:37.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163728/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:37.33 [info     ] FQE_20220420163728: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.0001646395189216338, 'time_algorithm_update': 0.0034312569951436608, 'loss': 0.004864004512839244, 'time_step': 0.003666261592543269, 'init_value': -0.5972883701324463, 'ave_value': -0.3088923266392436, 'soft_opc': nan} step=996




2022-04-20 16:37.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163728/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:37.33 [info     ] FQE_20220420163728: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00015853255628103233, 'time_algorithm_update': 0.003535899771265237, 'loss': 0.004670464021366942, 'time_step': 0.0037648864539272815, 'init_value': -0.6546542644500732, 'ave_value': -0.3384937865441447, 'soft_opc': nan} step=1162




2022-04-20 16:37.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163728/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:37.34 [info     ] FQE_20220420163728: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00016796158020754894, 'time_algorithm_update': 0.003450232816029744, 'loss': 0.0043492937426879465, 'time_step': 0.00369391814771905, 'init_value': -0.7501293420791626, 'ave_value': -0.40496905521397386, 'soft_opc': nan} step=1328




2022-04-20 16:37.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163728/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:37.35 [info     ] FQE_20220420163728: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00016115946942065135, 'time_algorithm_update': 0.00357954760631883, 'loss': 0.0042057866672424215, 'time_step': 0.003816979477204472, 'init_value': -0.8035528063774109, 'ave_value': -0.43803987774477754, 'soft_opc': nan} step=1494




2022-04-20 16:37.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163728/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:37.35 [info     ] FQE_20220420163728: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00016221799046160226, 'time_algorithm_update': 0.0034590514309435004, 'loss': 0.004106286400508315, 'time_step': 0.0036943418433867305, 'init_value': -0.8725845813751221, 'ave_value': -0.4667344791143461, 'soft_opc': nan} step=1660




2022-04-20 16:37.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163728/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:37.36 [info     ] FQE_20220420163728: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00015956378844847162, 'time_algorithm_update': 0.0035426631031266176, 'loss': 0.003956774365386914, 'time_step': 0.00377547022808029, 'init_value': -0.9158902168273926, 'ave_value': -0.4849512599047844, 'soft_opc': nan} step=1826




2022-04-20 16:37.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163728/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:37.37 [info     ] FQE_20220420163728: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00016231134713414204, 'time_algorithm_update': 0.003423832985291998, 'loss': 0.0036852464215908513, 'time_step': 0.003659585872328425, 'init_value': -0.9351325035095215, 'ave_value': -0.4969234413643544, 'soft_opc': nan} step=1992




2022-04-20 16:37.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163728/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:37.37 [info     ] FQE_20220420163728: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00016053469784288522, 'time_algorithm_update': 0.0034995337566697455, 'loss': 0.0037350656341267638, 'time_step': 0.0037305283259196453, 'init_value': -1.0344188213348389, 'ave_value': -0.5600893473697339, 'soft_opc': nan} step=2158




2022-04-20 16:37.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163728/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:37.38 [info     ] FQE_20220420163728: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00015879969999014614, 'time_algorithm_update': 0.003356524260647326, 'loss': 0.0037462795237415887, 'time_step': 0.0035915489656379424, 'init_value': -1.120689034461975, 'ave_value': -0.6075341128114913, 'soft_opc': nan} step=2324




2022-04-20 16:37.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163728/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:37.39 [info     ] FQE_20220420163728: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.0001588083175291498, 'time_algorithm_update': 0.003616370350481516, 'loss': 0.003738242684700807, 'time_step': 0.0038478144680161074, 'init_value': -1.1606855392456055, 'ave_value': -0.625833161317886, 'soft_opc': nan} step=2490




2022-04-20 16:37.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163728/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:37.39 [info     ] FQE_20220420163728: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00015741658498005695, 'time_algorithm_update': 0.003380350319735975, 'loss': 0.003934516903876989, 'time_step': 0.0036082095410450397, 'init_value': -1.207728624343872, 'ave_value': -0.6423258090458878, 'soft_opc': nan} step=2656




2022-04-20 16:37.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163728/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:37.40 [info     ] FQE_20220420163728: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.0001608118953475033, 'time_algorithm_update': 0.0035524540636912884, 'loss': 0.0039186568963004795, 'time_step': 0.0037823685680527285, 'init_value': -1.3338348865509033, 'ave_value': -0.7146738278536915, 'soft_opc': nan} step=2822




2022-04-20 16:37.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163728/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:37.41 [info     ] FQE_20220420163728: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.0001604470861963479, 'time_algorithm_update': 0.0027408786566860705, 'loss': 0.0041099942320149884, 'time_step': 0.002974202833979963, 'init_value': -1.4123661518096924, 'ave_value': -0.7732482898275594, 'soft_opc': nan} step=2988




2022-04-20 16:37.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163728/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:37.42 [info     ] FQE_20220420163728: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00015928371843085232, 'time_algorithm_update': 0.004522207271621888, 'loss': 0.00447026738779148, 'time_step': 0.004752630210784544, 'init_value': -1.4642632007598877, 'ave_value': -0.7843035446949773, 'soft_opc': nan} step=3154




2022-04-20 16:37.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163728/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:37.42 [info     ] FQE_20220420163728: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00016579857791762753, 'time_algorithm_update': 0.004505114383008106, 'loss': 0.004800122302777347, 'time_step': 0.004748015518648079, 'init_value': -1.5935574769973755, 'ave_value': -0.864179684176437, 'soft_opc': nan} step=3320




2022-04-20 16:37.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163728/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:37.43 [info     ] FQE_20220420163728: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00016250667801822526, 'time_algorithm_update': 0.004471359482730727, 'loss': 0.005205184328566159, 'time_step': 0.0047096272548997255, 'init_value': -1.6701319217681885, 'ave_value': -0.9250074641812627, 'soft_opc': nan} step=3486




2022-04-20 16:37.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163728/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:37.44 [info     ] FQE_20220420163728: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00016134331025272967, 'time_algorithm_update': 0.0038286289536809347, 'loss': 0.005862493669692741, 'time_step': 0.004063343427267419, 'init_value': -1.7831820249557495, 'ave_value': -1.0105273103240777, 'soft_opc': nan} step=3652




2022-04-20 16:37.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163728/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:37.45 [info     ] FQE_20220420163728: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.000159658581377512, 'time_algorithm_update': 0.004507017422871417, 'loss': 0.006125213337097183, 'time_step': 0.004740192229489246, 'init_value': -1.9142591953277588, 'ave_value': -1.0959038807028854, 'soft_opc': nan} step=3818




2022-04-20 16:37.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163728/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:37.46 [info     ] FQE_20220420163728: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00016610450055225785, 'time_algorithm_update': 0.004518261875014707, 'loss': 0.006473846333830741, 'time_step': 0.004757223359073501, 'init_value': -1.9409440755844116, 'ave_value': -1.118486938279055, 'soft_opc': nan} step=3984




2022-04-20 16:37.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163728/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:37.46 [info     ] FQE_20220420163728: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00015873937721712044, 'time_algorithm_update': 0.0039396903601037455, 'loss': 0.007073017762951463, 'time_step': 0.004167848322764936, 'init_value': -2.0520143508911133, 'ave_value': -1.1971709977139022, 'soft_opc': nan} step=4150




2022-04-20 16:37.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163728/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:37.47 [info     ] FQE_20220420163728: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00016203414962952397, 'time_algorithm_update': 0.004526909575404891, 'loss': 0.0075029618126418755, 'time_step': 0.004760862833046052, 'init_value': -2.1920080184936523, 'ave_value': -1.3056027074554213, 'soft_opc': nan} step=4316




2022-04-20 16:37.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163728/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:37.48 [info     ] FQE_20220420163728: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.0001606036381549146, 'time_algorithm_update': 0.004501347082207002, 'loss': 0.008099350215676409, 'time_step': 0.004735339118773679, 'init_value': -2.1881165504455566, 'ave_value': -1.2833403535959151, 'soft_opc': nan} step=4482




2022-04-20 16:37.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163728/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:37.49 [info     ] FQE_20220420163728: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.0001637045159397355, 'time_algorithm_update': 0.004522389676197466, 'loss': 0.00821784163365163, 'time_step': 0.004757682961153697, 'init_value': -2.302265167236328, 'ave_value': -1.3676516937545022, 'soft_opc': nan} step=4648




2022-04-20 16:37.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163728/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:37.50 [info     ] FQE_20220420163728: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00016215623143207595, 'time_algorithm_update': 0.0037815556468733824, 'loss': 0.0092546407284821, 'time_step': 0.004015549119696559, 'init_value': -2.42207670211792, 'ave_value': -1.4727817759614203, 'soft_opc': nan} step=4814




2022-04-20 16:37.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163728/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:37.51 [info     ] FQE_20220420163728: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00016185030879744565, 'time_algorithm_update': 0.0045367177710475695, 'loss': 0.009318748010036322, 'time_step': 0.004770936736141343, 'init_value': -2.472588539123535, 'ave_value': -1.5104224149179686, 'soft_opc': nan} step=4980




2022-04-20 16:37.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163728/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:37.52 [info     ] FQE_20220420163728: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00016383521528129117, 'time_algorithm_update': 0.004516720771789551, 'loss': 0.009241474740465272, 'time_step': 0.004755168076021126, 'init_value': -2.585334300994873, 'ave_value': -1.5820212373342197, 'soft_opc': nan} step=5146




2022-04-20 16:37.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163728/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:37.52 [info     ] FQE_20220420163728: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.0001633267804800746, 'time_algorithm_update': 0.003913313509470009, 'loss': 0.010571189276615989, 'time_step': 0.004145831946867058, 'init_value': -2.6286630630493164, 'ave_value': -1.6075664094951305, 'soft_opc': nan} step=5312




2022-04-20 16:37.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163728/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:37.53 [info     ] FQE_20220420163728: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.0001656477709850633, 'time_algorithm_update': 0.004510529070015413, 'loss': 0.01089070012851854, 'time_step': 0.004748755190745893, 'init_value': -2.669168472290039, 'ave_value': -1.6411560241888101, 'soft_opc': nan} step=5478




2022-04-20 16:37.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163728/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:37.54 [info     ] FQE_20220420163728: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00017074935407523648, 'time_algorithm_update': 0.004611589822424464, 'loss': 0.009799014868113739, 'time_step': 0.004857235644237104, 'init_value': -2.7647435665130615, 'ave_value': -1.6923227022930576, 'soft_opc': nan} step=5644




2022-04-20 16:37.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163728/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:37.55 [info     ] FQE_20220420163728: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00016178711351141873, 'time_algorithm_update': 0.004521122897963926, 'loss': 0.012046381284157375, 'time_step': 0.0047550014702670545, 'init_value': -2.86968994140625, 'ave_value': -1.795495170095583, 'soft_opc': nan} step=5810




2022-04-20 16:37.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163728/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:37.56 [info     ] FQE_20220420163728: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00017087861716029155, 'time_algorithm_update': 0.0038173672664596372, 'loss': 0.012849409058858383, 'time_step': 0.004063043249658792, 'init_value': -2.8976564407348633, 'ave_value': -1.8405034221398333, 'soft_opc': nan} step=5976




2022-04-20 16:37.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163728/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:37.57 [info     ] FQE_20220420163728: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00016360397798469267, 'time_algorithm_update': 0.004586102014564606, 'loss': 0.013516628852461925, 'time_step': 0.004823235144098121, 'init_value': -3.010812997817993, 'ave_value': -1.9253778934445198, 'soft_opc': nan} step=6142




2022-04-20 16:37.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163728/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:37.57 [info     ] FQE_20220420163728: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.0001683335706412074, 'time_algorithm_update': 0.004559766815369387, 'loss': 0.014529568945472858, 'time_step': 0.004802382135965738, 'init_value': -3.057091236114502, 'ave_value': -1.9601580303464388, 'soft_opc': nan} step=6308




2022-04-20 16:37.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163728/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:37.58 [info     ] FQE_20220420163728: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00016356663531567677, 'time_algorithm_update': 0.003858471491250647, 'loss': 0.016168623139481454, 'time_step': 0.004096318440264966, 'init_value': -3.232266902923584, 'ave_value': -2.0638643590213213, 'soft_opc': nan} step=6474




2022-04-20 16:37.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163728/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:37.59 [info     ] FQE_20220420163728: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00016418853438044168, 'time_algorithm_update': 0.004488748240183635, 'loss': 0.01589986136679661, 'time_step': 0.004728847239390913, 'init_value': -3.2944841384887695, 'ave_value': -2.082166487837697, 'soft_opc': nan} step=6640




2022-04-20 16:37.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163728/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:38.00 [info     ] FQE_20220420163728: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.0001672391431877412, 'time_algorithm_update': 0.004509025309459272, 'loss': 0.01596826863270365, 'time_step': 0.004746230251817818, 'init_value': -3.2939553260803223, 'ave_value': -2.0626374919823296, 'soft_opc': nan} step=6806




2022-04-20 16:38.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163728/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:38.01 [info     ] FQE_20220420163728: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00016349051372114434, 'time_algorithm_update': 0.00457728627216385, 'loss': 0.016675513511921942, 'time_step': 0.0048112237309835045, 'init_value': -3.380021572113037, 'ave_value': -2.0954795730812057, 'soft_opc': nan} step=6972




2022-04-20 16:38.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163728/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:38.01 [info     ] FQE_20220420163728: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00016207723732454232, 'time_algorithm_update': 0.0039039878960115365, 'loss': 0.017212197641808404, 'time_step': 0.004140435931194259, 'init_value': -3.490550994873047, 'ave_value': -2.1944507285398807, 'soft_opc': nan} step=7138




2022-04-20 16:38.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163728/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:38.02 [info     ] FQE_20220420163728: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00016974828329431005, 'time_algorithm_update': 0.004521357007773526, 'loss': 0.01811980251685705, 'time_step': 0.004764312721160521, 'init_value': -3.5727601051330566, 'ave_value': -2.2438515564208634, 'soft_opc': nan} step=7304




2022-04-20 16:38.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163728/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:38.03 [info     ] FQE_20220420163728: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.0001678481159440006, 'time_algorithm_update': 0.004541802119059735, 'loss': 0.018438338288751216, 'time_step': 0.00478529642863446, 'init_value': -3.5853898525238037, 'ave_value': -2.218843682912422, 'soft_opc': nan} step=7470




2022-04-20 16:38.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163728/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:38.04 [info     ] FQE_20220420163728: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.0001618704163884542, 'time_algorithm_update': 0.0038147848772715374, 'loss': 0.018758983110044015, 'time_step': 0.004047800259417798, 'init_value': -3.7313976287841797, 'ave_value': -2.3366541243284136, 'soft_opc': nan} step=7636




2022-04-20 16:38.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163728/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:38.05 [info     ] FQE_20220420163728: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00016272355274981763, 'time_algorithm_update': 0.0045413410807230385, 'loss': 0.01950434376682009, 'time_step': 0.004778037588280368, 'init_value': -3.7417478561401367, 'ave_value': -2.3542117496423827, 'soft_opc': nan} step=7802




2022-04-20 16:38.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163728/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:38.06 [info     ] FQE_20220420163728: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00016702226845614882, 'time_algorithm_update': 0.004527677972632718, 'loss': 0.020460558832196676, 'time_step': 0.004765150058700378, 'init_value': -3.860848903656006, 'ave_value': -2.48900461499789, 'soft_opc': nan} step=7968




2022-04-20 16:38.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163728/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:38.07 [info     ] FQE_20220420163728: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00016554005174751742, 'time_algorithm_update': 0.004528947623379259, 'loss': 0.02085841028230175, 'time_step': 0.004769171576902091, 'init_value': -3.9045286178588867, 'ave_value': -2.5298509357119534, 'soft_opc': nan} step=8134




2022-04-20 16:38.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163728/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:38.07 [info     ] FQE_20220420163728: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00016402767365237316, 'time_algorithm_update': 0.0038903678756162345, 'loss': 0.021441186749494176, 'time_step': 0.0041267455342304275, 'init_value': -3.840864658355713, 'ave_value': -2.490103228933908, 'soft_opc': nan} step=8300




2022-04-20 16:38.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163728/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

2022-04-20 16:38.08 [info     ] Directory is created at d3rlpy_logs/FQE_20220420163808
2022-04-20 16:38.08 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 16:38.08 [debug    ] Building models...
2022-04-20 16:38.08 [debug    ] Models have been built.
2022-04-20 16:38.08 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420163808/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 16:38.09 [info     ] FQE_20220420163808: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00016353851140931596, 'time_algorithm_update': 0.004514920850132787, 'loss': 0.025482378986685775, 'time_step': 0.004754125378852667, 'init_value': -1.178788423538208, 'ave_value': -1.1925676348308722, 'soft_opc': nan} step=344




2022-04-20 16:38.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163808/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:38.11 [info     ] FQE_20220420163808: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.0001627983048904774, 'time_algorithm_update': 0.004290948080462079, 'loss': 0.02382852473881009, 'time_step': 0.004527165446170541, 'init_value': -1.910672903060913, 'ave_value': -1.9572907681266467, 'soft_opc': nan} step=688




2022-04-20 16:38.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163808/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:38.13 [info     ] FQE_20220420163808: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00016442010569017986, 'time_algorithm_update': 0.004161671150562375, 'loss': 0.027537522986422966, 'time_step': 0.004400992116262746, 'init_value': -2.6815619468688965, 'ave_value': -2.77668472784865, 'soft_opc': nan} step=1032




2022-04-20 16:38.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163808/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:38.14 [info     ] FQE_20220420163808: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00016323563664458519, 'time_algorithm_update': 0.004505543514739635, 'loss': 0.030630116532340126, 'time_step': 0.004738809757454451, 'init_value': -3.2161149978637695, 'ave_value': -3.364063891231477, 'soft_opc': nan} step=1376




2022-04-20 16:38.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163808/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:38.16 [info     ] FQE_20220420163808: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.0001652857591939527, 'time_algorithm_update': 0.004163279089816781, 'loss': 0.03943161776852469, 'time_step': 0.004405330779940583, 'init_value': -3.877290725708008, 'ave_value': -4.09807805601541, 'soft_opc': nan} step=1720




2022-04-20 16:38.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163808/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:38.18 [info     ] FQE_20220420163808: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00016515268835910532, 'time_algorithm_update': 0.00448796083760816, 'loss': 0.047139331238171045, 'time_step': 0.0047295038090195765, 'init_value': -4.273226737976074, 'ave_value': -4.5998419943962015, 'soft_opc': nan} step=2064




2022-04-20 16:38.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163808/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:38.19 [info     ] FQE_20220420163808: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00016193888908208801, 'time_algorithm_update': 0.004055494485899459, 'loss': 0.05706312265626133, 'time_step': 0.004290794217309287, 'init_value': -5.027956008911133, 'ave_value': -5.437127422252754, 'soft_opc': nan} step=2408




2022-04-20 16:38.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163808/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:38.21 [info     ] FQE_20220420163808: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00016479713972224745, 'time_algorithm_update': 0.004392087459564209, 'loss': 0.06836987910575645, 'time_step': 0.004633678946384164, 'init_value': -5.270750999450684, 'ave_value': -5.818658656096673, 'soft_opc': nan} step=2752




2022-04-20 16:38.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163808/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:38.23 [info     ] FQE_20220420163808: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00015249917673510174, 'time_algorithm_update': 0.00413531550141268, 'loss': 0.07833951223251778, 'time_step': 0.0043582541998042615, 'init_value': -5.541751384735107, 'ave_value': -6.149972690722427, 'soft_opc': nan} step=3096




2022-04-20 16:38.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163808/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:38.24 [info     ] FQE_20220420163808: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.0001476136750953142, 'time_algorithm_update': 0.003825455210929693, 'loss': 0.09269806648182227, 'time_step': 0.004039121228595113, 'init_value': -6.03594446182251, 'ave_value': -6.807535144377936, 'soft_opc': nan} step=3440




2022-04-20 16:38.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163808/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:38.26 [info     ] FQE_20220420163808: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00014208569083102915, 'time_algorithm_update': 0.004010351591332014, 'loss': 0.10535196337285777, 'time_step': 0.004214712353639825, 'init_value': -6.479446887969971, 'ave_value': -7.351099508242296, 'soft_opc': nan} step=3784




2022-04-20 16:38.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163808/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:38.27 [info     ] FQE_20220420163808: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00014835804007774176, 'time_algorithm_update': 0.0037941281185593714, 'loss': 0.12187910291128033, 'time_step': 0.004009354253147923, 'init_value': -6.864853382110596, 'ave_value': -7.854620932207048, 'soft_opc': nan} step=4128




2022-04-20 16:38.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163808/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:38.29 [info     ] FQE_20220420163808: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00015550435975540515, 'time_algorithm_update': 0.004313634578571763, 'loss': 0.1393687209184823, 'time_step': 0.004538749539574912, 'init_value': -7.321539878845215, 'ave_value': -8.416741995275993, 'soft_opc': nan} step=4472




2022-04-20 16:38.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163808/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:38.31 [info     ] FQE_20220420163808: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00016499743905178335, 'time_algorithm_update': 0.004160307867582454, 'loss': 0.15750702538765793, 'time_step': 0.004399619130201118, 'init_value': -7.800848007202148, 'ave_value': -8.969373599533359, 'soft_opc': nan} step=4816




2022-04-20 16:38.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163808/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:38.32 [info     ] FQE_20220420163808: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00016663032908772313, 'time_algorithm_update': 0.004160402819167736, 'loss': 0.17580889349500106, 'time_step': 0.004401605489642121, 'init_value': -8.228294372558594, 'ave_value': -9.462618210027374, 'soft_opc': nan} step=5160




2022-04-20 16:38.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163808/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:38.34 [info     ] FQE_20220420163808: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00016996888227240983, 'time_algorithm_update': 0.004561730595522149, 'loss': 0.1928337631952884, 'time_step': 0.004807219948879508, 'init_value': -9.001062393188477, 'ave_value': -10.254127148465466, 'soft_opc': nan} step=5504




2022-04-20 16:38.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163808/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:38.36 [info     ] FQE_20220420163808: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.0001658658648646155, 'time_algorithm_update': 0.004176346368567888, 'loss': 0.21345248086334662, 'time_step': 0.004417821418407352, 'init_value': -9.05880355834961, 'ave_value': -10.32076968294778, 'soft_opc': nan} step=5848




2022-04-20 16:38.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163808/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:38.38 [info     ] FQE_20220420163808: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00016541536464247594, 'time_algorithm_update': 0.004524009172306504, 'loss': 0.23176910749333368, 'time_step': 0.004764331634654556, 'init_value': -9.41558837890625, 'ave_value': -10.725426866957312, 'soft_opc': nan} step=6192




2022-04-20 16:38.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163808/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:38.39 [info     ] FQE_20220420163808: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00016735251559767613, 'time_algorithm_update': 0.0041472308857496394, 'loss': 0.2522732975600331, 'time_step': 0.004389584757560907, 'init_value': -10.036288261413574, 'ave_value': -11.291578755020646, 'soft_opc': nan} step=6536




2022-04-20 16:38.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163808/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:38.41 [info     ] FQE_20220420163808: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.0001636979191802269, 'time_algorithm_update': 0.0044256892315177035, 'loss': 0.2750886624332431, 'time_step': 0.004663217899411223, 'init_value': -10.582442283630371, 'ave_value': -11.805337592061957, 'soft_opc': nan} step=6880




2022-04-20 16:38.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163808/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:38.43 [info     ] FQE_20220420163808: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.0001644097095312074, 'time_algorithm_update': 0.004294804362363593, 'loss': 0.29750412893156675, 'time_step': 0.004533313041509584, 'init_value': -10.753864288330078, 'ave_value': -11.956555540170095, 'soft_opc': nan} step=7224




2022-04-20 16:38.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163808/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:38.44 [info     ] FQE_20220420163808: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.0001678466796875, 'time_algorithm_update': 0.004169766293015591, 'loss': 0.31014043668306673, 'time_step': 0.004410580147144406, 'init_value': -10.899190902709961, 'ave_value': -11.963939248211682, 'soft_opc': nan} step=7568




2022-04-20 16:38.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163808/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:38.46 [info     ] FQE_20220420163808: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.0001659552718317786, 'time_algorithm_update': 0.004554043675577918, 'loss': 0.3313281230877565, 'time_step': 0.004794027916220731, 'init_value': -11.370856285095215, 'ave_value': -12.306022825633828, 'soft_opc': nan} step=7912




2022-04-20 16:38.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163808/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:38.48 [info     ] FQE_20220420163808: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.000163995249326839, 'time_algorithm_update': 0.004102787999219672, 'loss': 0.34038936566604777, 'time_step': 0.004343353038610414, 'init_value': -11.938901901245117, 'ave_value': -12.764392632503206, 'soft_opc': nan} step=8256




2022-04-20 16:38.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163808/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:38.49 [info     ] FQE_20220420163808: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.0001660134903220243, 'time_algorithm_update': 0.004549221937046494, 'loss': 0.36288249893872027, 'time_step': 0.004791230656379877, 'init_value': -12.409120559692383, 'ave_value': -13.126344872957938, 'soft_opc': nan} step=8600




2022-04-20 16:38.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163808/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:38.51 [info     ] FQE_20220420163808: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00016527536303498024, 'time_algorithm_update': 0.004109042328457499, 'loss': 0.384128445575317, 'time_step': 0.0043482267579367, 'init_value': -13.000706672668457, 'ave_value': -13.630115726235248, 'soft_opc': nan} step=8944




2022-04-20 16:38.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163808/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:38.53 [info     ] FQE_20220420163808: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00016566833784413892, 'time_algorithm_update': 0.004383150228234225, 'loss': 0.3891781486928203, 'time_step': 0.004625596279321715, 'init_value': -13.205020904541016, 'ave_value': -13.591321807958423, 'soft_opc': nan} step=9288




2022-04-20 16:38.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163808/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:38.54 [info     ] FQE_20220420163808: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.0001648394174354021, 'time_algorithm_update': 0.004440446925717731, 'loss': 0.40366409387612756, 'time_step': 0.004681673853896385, 'init_value': -13.895123481750488, 'ave_value': -13.997029449747243, 'soft_opc': nan} step=9632




2022-04-20 16:38.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163808/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:38.56 [info     ] FQE_20220420163808: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.0001669976600380831, 'time_algorithm_update': 0.004176394190899161, 'loss': 0.42485014954581857, 'time_step': 0.004419753717821698, 'init_value': -14.489347457885742, 'ave_value': -14.35123910145789, 'soft_opc': nan} step=9976




2022-04-20 16:38.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163808/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:38.58 [info     ] FQE_20220420163808: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00015544891357421875, 'time_algorithm_update': 0.004280871430108714, 'loss': 0.44259428949268576, 'time_step': 0.0045074286848999735, 'init_value': -14.894620895385742, 'ave_value': -14.585100168505525, 'soft_opc': nan} step=10320




2022-04-20 16:38.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163808/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:38.59 [info     ] FQE_20220420163808: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00015173540558925894, 'time_algorithm_update': 0.0038187829561011737, 'loss': 0.4511087964245573, 'time_step': 0.004039918267449667, 'init_value': -15.256007194519043, 'ave_value': -14.795106990612794, 'soft_opc': nan} step=10664




2022-04-20 16:38.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163808/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:39.01 [info     ] FQE_20220420163808: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00014434096425078636, 'time_algorithm_update': 0.003976686056270156, 'loss': 0.46946105199086285, 'time_step': 0.004189274338788764, 'init_value': -16.16838836669922, 'ave_value': -15.336206211973934, 'soft_opc': nan} step=11008




2022-04-20 16:39.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163808/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:39.02 [info     ] FQE_20220420163808: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00015169243479883947, 'time_algorithm_update': 0.003960038340368936, 'loss': 0.48365625765087994, 'time_step': 0.004182227822237237, 'init_value': -16.39032745361328, 'ave_value': -15.308842672534373, 'soft_opc': nan} step=11352




2022-04-20 16:39.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163808/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:39.04 [info     ] FQE_20220420163808: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00016434456026831338, 'time_algorithm_update': 0.004108765097551568, 'loss': 0.4937468524697388, 'time_step': 0.004349359246187432, 'init_value': -17.131885528564453, 'ave_value': -15.958182811630977, 'soft_opc': nan} step=11696




2022-04-20 16:39.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163808/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:39.06 [info     ] FQE_20220420163808: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.0001675888549449832, 'time_algorithm_update': 0.004522019347479177, 'loss': 0.5120485574061181, 'time_step': 0.004767878111018691, 'init_value': -17.706212997436523, 'ave_value': -16.256624107508237, 'soft_opc': nan} step=12040




2022-04-20 16:39.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163808/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:39.07 [info     ] FQE_20220420163808: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00016058669533840445, 'time_algorithm_update': 0.004104687724002572, 'loss': 0.5231671973380672, 'time_step': 0.0043405183525972585, 'init_value': -18.13253402709961, 'ave_value': -16.424893848759307, 'soft_opc': nan} step=12384




2022-04-20 16:39.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163808/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:39.09 [info     ] FQE_20220420163808: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.000164244064064913, 'time_algorithm_update': 0.004552966633508372, 'loss': 0.5267897059891893, 'time_step': 0.004792216212250466, 'init_value': -18.113815307617188, 'ave_value': -16.43050950871731, 'soft_opc': nan} step=12728




2022-04-20 16:39.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163808/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:39.11 [info     ] FQE_20220420163808: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00016292652418447095, 'time_algorithm_update': 0.004086897816768912, 'loss': 0.5256986625026911, 'time_step': 0.0043254439220872035, 'init_value': -18.393470764160156, 'ave_value': -16.49269455621986, 'soft_opc': nan} step=13072




2022-04-20 16:39.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163808/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:39.12 [info     ] FQE_20220420163808: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00017051918562068495, 'time_algorithm_update': 0.00445479293202245, 'loss': 0.5289091753264412, 'time_step': 0.004704545403635779, 'init_value': -18.55539321899414, 'ave_value': -16.35439115025917, 'soft_opc': nan} step=13416




2022-04-20 16:39.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163808/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:39.14 [info     ] FQE_20220420163808: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00016407633936682412, 'time_algorithm_update': 0.004230821548506271, 'loss': 0.5315755918448748, 'time_step': 0.004472684028536775, 'init_value': -18.726619720458984, 'ave_value': -16.429676417684863, 'soft_opc': nan} step=13760




2022-04-20 16:39.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163808/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:39.16 [info     ] FQE_20220420163808: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.0001637256422708201, 'time_algorithm_update': 0.0040578149085821105, 'loss': 0.5411668828591097, 'time_step': 0.004294608914574912, 'init_value': -18.955934524536133, 'ave_value': -16.534082647211335, 'soft_opc': nan} step=14104




2022-04-20 16:39.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163808/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:39.18 [info     ] FQE_20220420163808: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00016780856043793435, 'time_algorithm_update': 0.004566504511722299, 'loss': 0.5390376847962913, 'time_step': 0.004810493352801301, 'init_value': -19.139904022216797, 'ave_value': -16.69266408863729, 'soft_opc': nan} step=14448




2022-04-20 16:39.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163808/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:39.19 [info     ] FQE_20220420163808: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00016462872194689373, 'time_algorithm_update': 0.0040884925875552865, 'loss': 0.5367424074820307, 'time_step': 0.004332125186920166, 'init_value': -19.363035202026367, 'ave_value': -16.84048313697305, 'soft_opc': nan} step=14792




2022-04-20 16:39.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163808/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:39.21 [info     ] FQE_20220420163808: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00016656379367029943, 'time_algorithm_update': 0.004541022833003555, 'loss': 0.5463671253853326, 'time_step': 0.004782859669175259, 'init_value': -19.701412200927734, 'ave_value': -17.056116333372287, 'soft_opc': nan} step=15136




2022-04-20 16:39.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163808/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:39.23 [info     ] FQE_20220420163808: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.000161943047545677, 'time_algorithm_update': 0.004051202951475631, 'loss': 0.5536521839053739, 'time_step': 0.004286382087441378, 'init_value': -19.873397827148438, 'ave_value': -17.0296563892361, 'soft_opc': nan} step=15480




2022-04-20 16:39.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163808/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:39.24 [info     ] FQE_20220420163808: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00016550546468690385, 'time_algorithm_update': 0.004352977109509845, 'loss': 0.5574009496567034, 'time_step': 0.004592971053234366, 'init_value': -19.885072708129883, 'ave_value': -17.109969707102376, 'soft_opc': nan} step=15824




2022-04-20 16:39.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163808/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:39.26 [info     ] FQE_20220420163808: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00016576398250668547, 'time_algorithm_update': 0.004371609105620273, 'loss': 0.5565502026963026, 'time_step': 0.004613196433976639, 'init_value': -20.59149932861328, 'ave_value': -17.657367588801158, 'soft_opc': nan} step=16168




2022-04-20 16:39.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163808/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:39.28 [info     ] FQE_20220420163808: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00016506258831467738, 'time_algorithm_update': 0.004087770401045333, 'loss': 0.5723418096827646, 'time_step': 0.0043293812940287035, 'init_value': -20.19839096069336, 'ave_value': -17.298337183650457, 'soft_opc': nan} step=16512




2022-04-20 16:39.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163808/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:39.29 [info     ] FQE_20220420163808: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00016607170881227006, 'time_algorithm_update': 0.004502686650254006, 'loss': 0.5663562112509511, 'time_step': 0.0047469894553339755, 'init_value': -20.223445892333984, 'ave_value': -17.398967694766455, 'soft_opc': nan} step=16856




2022-04-20 16:39.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163808/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:39.31 [info     ] FQE_20220420163808: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.0001664910205574923, 'time_algorithm_update': 0.004142986480579819, 'loss': 0.5674895918332473, 'time_step': 0.004386047291201215, 'init_value': -20.637847900390625, 'ave_value': -17.85998600317639, 'soft_opc': nan} step=17200




2022-04-20 16:39.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420163808/model_17200.pt
most optimal hyper params for cql at this point:  [0.004940147045651946, 0.0006002394062648449, 7.204121497423649e-05, 3]
search iteration:  8
using hyper params:  [0.006471994412934196, 0.0032353506503543595, 3.5629422458107715e-05, 7]
2022-04-20 16:39.31 [debug    ] RoundIterator is selected.
2022-04-20 16:39.31 [info     ] Directory is created at d3rlpy_logs/CQL_20220420163931
2022-04-20 16:39.31 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 16:39.31 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 16:39.31 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220420163931/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_

  minimum = torch.tensor(
  maximum = torch.tensor(


Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:39.40 [info     ] CQL_20220420163931: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.0003868745781524837, 'time_algorithm_update': 0.025492960946601733, 'temp_loss': 4.441996422427439, 'temp': 0.994111479548683, 'alpha_loss': -18.626742477305452, 'alpha': 1.0178201742339552, 'critic_loss': 67.70455448529874, 'actor_loss': 5.19676905626442, 'time_step': 0.025978498291551022, 'td_error': 6.234255456549054, 'init_value': -9.513201713562012, 'ave_value': -7.574338862365027} step=342
2022-04-20 16:39.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420163931/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:39.50 [info     ] CQL_20220420163931: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.00038762050762511136, 'time_algorithm_update': 0.025249186315034564, 'temp_loss': 4.339002040394566, 'temp': 0.9822443619109037, 'alpha_loss': -10.824377379222224, 'alpha': 1.0467263324218883, 'critic_loss': 35.350081231858994, 'actor_loss': 11.130242786909404, 'time_step': 0.025735551850837573, 'td_error': 5.1393129298437366, 'init_value': -21.633657455444336, 'ave_value': -14.035593395242048} step=684
2022-04-20 16:39.50 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420163931/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:39.59 [info     ] CQL_20220420163931: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.0003921825286240606, 'time_algorithm_update': 0.02550480588834885, 'temp_loss': 3.8219592933766324, 'temp': 0.9710644412110423, 'alpha_loss': -6.803712821146201, 'alpha': 1.06742472014232, 'critic_loss': 41.09815819500483, 'actor_loss': 18.95121877792983, 'time_step': 0.025994740034404554, 'td_error': 8.135462633095237, 'init_value': -32.41679382324219, 'ave_value': -20.61713286299292} step=1026
2022-04-20 16:39.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420163931/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:40.08 [info     ] CQL_20220420163931: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.0003861335285922937, 'time_algorithm_update': 0.025336922260752896, 'temp_loss': 3.4233948529115197, 'temp': 0.9607636925072698, 'alpha_loss': -4.914652129362898, 'alpha': 1.0855841709856402, 'critic_loss': 52.16624239871376, 'actor_loss': 26.285869297228363, 'time_step': 0.025822513981869345, 'td_error': 11.939189226600892, 'init_value': -42.623374938964844, 'ave_value': -26.628933485670537} step=1368
2022-04-20 16:40.08 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420163931/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:40.17 [info     ] CQL_20220420163931: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.0003872733367116828, 'time_algorithm_update': 0.024872634145948622, 'temp_loss': 3.108825453540735, 'temp': 0.9509279389827572, 'alpha_loss': -3.5214986351498387, 'alpha': 1.101872282070026, 'critic_loss': 66.72101047582794, 'actor_loss': 32.92431911111575, 'time_step': 0.025361487739964536, 'td_error': 15.881055027019748, 'init_value': -51.635658264160156, 'ave_value': -32.44204288225151} step=1710
2022-04-20 16:40.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420163931/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:40.26 [info     ] CQL_20220420163931: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.00038935078514946834, 'time_algorithm_update': 0.024075017337910613, 'temp_loss': 2.8319342728943853, 'temp': 0.9414565871682083, 'alpha_loss': -2.231431631796193, 'alpha': 1.1153491676202294, 'critic_loss': 84.22907949748792, 'actor_loss': 38.895290871112664, 'time_step': 0.024564047305904634, 'td_error': 20.41343625646942, 'init_value': -60.149200439453125, 'ave_value': -37.698953313252275} step=2052
2022-04-20 16:40.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420163931/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:40.35 [info     ] CQL_20220420163931: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.00038696450796740794, 'time_algorithm_update': 0.02393456230386656, 'temp_loss': 2.582354035293847, 'temp': 0.9322925304111681, 'alpha_loss': -0.9900957419317793, 'alpha': 1.1244451030653122, 'critic_loss': 101.91239099335252, 'actor_loss': 44.23573224028649, 'time_step': 0.024420480979116338, 'td_error': 24.743744578276036, 'init_value': -67.18194580078125, 'ave_value': -42.100577344001685} step=2394
2022-04-20 16:40.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420163931/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:40.44 [info     ] CQL_20220420163931: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.00038350883283113176, 'time_algorithm_update': 0.02403310307285242, 'temp_loss': 2.379532618132251, 'temp': 0.923399674787856, 'alpha_loss': 0.0568652539248922, 'alpha': 1.1273233329343517, 'critic_loss': 118.70938712672184, 'actor_loss': 48.99275487487079, 'time_step': 0.024513952216209722, 'td_error': 28.95858397821462, 'init_value': -73.60124206542969, 'ave_value': -46.01230402725371} step=2736
2022-04-20 16:40.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420163931/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:40.52 [info     ] CQL_20220420163931: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.00039181374666983623, 'time_algorithm_update': 0.02398073603535256, 'temp_loss': 2.216783692962245, 'temp': 0.9145669924934008, 'alpha_loss': 0.9081170330838809, 'alpha': 1.1235260102483962, 'critic_loss': 135.50054008082338, 'actor_loss': 53.30602484139783, 'time_step': 0.02447260402099431, 'td_error': 32.37858218533607, 'init_value': -78.5352783203125, 'ave_value': -48.996158761782155} step=3078
2022-04-20 16:40.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420163931/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:41.01 [info     ] CQL_20220420163931: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.0003835701803017778, 'time_algorithm_update': 0.024170022261770147, 'temp_loss': 2.0657162774376006, 'temp': 0.905898046946665, 'alpha_loss': 1.7018584773871688, 'alpha': 1.1108764831782783, 'critic_loss': 151.94267757036533, 'actor_loss': 57.149441111157515, 'time_step': 0.024652360475551315, 'td_error': 36.30123658648018, 'init_value': -85.470458984375, 'ave_value': -53.015997911301014} step=3420
2022-04-20 16:41.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420163931/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:41.10 [info     ] CQL_20220420163931: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.0003884347558718676, 'time_algorithm_update': 0.02358627877040216, 'temp_loss': 1.919779341820388, 'temp': 0.8973171060893967, 'alpha_loss': 2.3379313796595262, 'alpha': 1.0891215857009442, 'critic_loss': 166.95511919434307, 'actor_loss': 60.65600970753452, 'time_step': 0.02407376808032655, 'td_error': 39.61471089336172, 'init_value': -89.16165924072266, 'ave_value': -55.26641320028418} step=3762
2022-04-20 16:41.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420163931/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:41.19 [info     ] CQL_20220420163931: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.0003859822513067234, 'time_algorithm_update': 0.024083730770133392, 'temp_loss': 1.804912789523253, 'temp': 0.8887890678400184, 'alpha_loss': 2.867037326337127, 'alpha': 1.0597969701415615, 'critic_loss': 181.85374022366707, 'actor_loss': 63.778113973071, 'time_step': 0.02456653118133545, 'td_error': 42.809569393616094, 'init_value': -92.46260070800781, 'ave_value': -58.969568849202616} step=4104
2022-04-20 16:41.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420163931/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:41.27 [info     ] CQL_20220420163931: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.00038682020198532014, 'time_algorithm_update': 0.024125266493412487, 'temp_loss': 1.6603453473040932, 'temp': 0.8804169570841984, 'alpha_loss': 3.350414971510569, 'alpha': 1.0252983793180588, 'critic_loss': 194.70831187287268, 'actor_loss': 66.50424125180606, 'time_step': 0.02460809966974091, 'td_error': 44.69365200887192, 'init_value': -98.02783203125, 'ave_value': -61.62200038154651} step=4446
2022-04-20 16:41.27 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420163931/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:41.36 [info     ] CQL_20220420163931: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.0003924334955494306, 'time_algorithm_update': 0.02427244604679576, 'temp_loss': 1.546974559973555, 'temp': 0.8721931961893338, 'alpha_loss': 3.7026893977533306, 'alpha': 0.9885615263766016, 'critic_loss': 206.16135196797333, 'actor_loss': 69.14978958709895, 'time_step': 0.02476247569970917, 'td_error': 45.46011644720702, 'init_value': -98.37338256835938, 'ave_value': -61.884647077119965} step=4788
2022-04-20 16:41.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420163931/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:41.45 [info     ] CQL_20220420163931: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.00038581981993558116, 'time_algorithm_update': 0.024050714676840265, 'temp_loss': 1.42458581959295, 'temp': 0.8641075786442785, 'alpha_loss': 4.053490326233339, 'alpha': 0.9511541243185077, 'critic_loss': 215.52909690455385, 'actor_loss': 71.3995378282335, 'time_step': 0.024535777276022394, 'td_error': 47.65080677069006, 'init_value': -102.27775573730469, 'ave_value': -64.03012119329057} step=5130
2022-04-20 16:41.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420163931/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:41.54 [info     ] CQL_20220420163931: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.00038835528301216704, 'time_algorithm_update': 0.02429435127659848, 'temp_loss': 1.335879793118315, 'temp': 0.8560927806541934, 'alpha_loss': 4.310952949942204, 'alpha': 0.9152632641513445, 'critic_loss': 225.3670843024003, 'actor_loss': 73.53873812803748, 'time_step': 0.024780177233511943, 'td_error': 48.97597435173717, 'init_value': -104.58869934082031, 'ave_value': -66.160554618622} step=5472
2022-04-20 16:41.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420163931/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:42.03 [info     ] CQL_20220420163931: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.00039136688611660786, 'time_algorithm_update': 0.024237290460463853, 'temp_loss': 1.2501265910285257, 'temp': 0.8481219464923904, 'alpha_loss': 4.5374985197831315, 'alpha': 0.8803606845482052, 'critic_loss': 233.558670133178, 'actor_loss': 75.50400409363864, 'time_step': 0.02472366924174348, 'td_error': 50.87792077635398, 'init_value': -107.78056335449219, 'ave_value': -68.14541309565843} step=5814
2022-04-20 16:42.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420163931/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:42.12 [info     ] CQL_20220420163931: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.0003821159663953279, 'time_algorithm_update': 0.02400385566622193, 'temp_loss': 1.146984126484185, 'temp': 0.840229474661643, 'alpha_loss': 4.7317397970902295, 'alpha': 0.8477410088505661, 'critic_loss': 242.20748031348512, 'actor_loss': 77.23932441354495, 'time_step': 0.024485342683847885, 'td_error': 53.00990524118223, 'init_value': -110.74638366699219, 'ave_value': -70.56380107082023} step=6156
2022-04-20 16:42.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420163931/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:42.20 [info     ] CQL_20220420163931: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.000388070853830081, 'time_algorithm_update': 0.024280818582278246, 'temp_loss': 1.088125700316234, 'temp': 0.8325502164879738, 'alpha_loss': 4.931261519939579, 'alpha': 0.8158849946588104, 'critic_loss': 249.89369696901556, 'actor_loss': 78.88777912429899, 'time_step': 0.02476771951418871, 'td_error': 53.5212090134406, 'init_value': -113.6240463256836, 'ave_value': -70.9609451436144} step=6498
2022-04-20 16:42.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420163931/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:42.29 [info     ] CQL_20220420163931: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.00038336104119730276, 'time_algorithm_update': 0.023924694423787078, 'temp_loss': 1.0192524654823436, 'temp': 0.8247579949641088, 'alpha_loss': 4.992672430841546, 'alpha': 0.786117150421031, 'critic_loss': 260.6056262903046, 'actor_loss': 80.5451654690748, 'time_step': 0.024405009565297623, 'td_error': 52.40668806161025, 'init_value': -113.76472473144531, 'ave_value': -72.32597852258398} step=6840
2022-04-20 16:42.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420163931/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:42.38 [info     ] CQL_20220420163931: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.00038980949691861695, 'time_algorithm_update': 0.024132672806232297, 'temp_loss': 0.9536653174120083, 'temp': 0.8170358524685017, 'alpha_loss': 5.066547247401455, 'alpha': 0.7578094230409254, 'critic_loss': 271.1540849919905, 'actor_loss': 81.82726561116894, 'time_step': 0.024623210667169583, 'td_error': 55.20818596795677, 'init_value': -117.27946472167969, 'ave_value': -74.08002107638228} step=7182
2022-04-20 16:42.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420163931/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:42.47 [info     ] CQL_20220420163931: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.00039173427381013567, 'time_algorithm_update': 0.02506575946919402, 'temp_loss': 0.8790760921281681, 'temp': 0.8093855448982172, 'alpha_loss': 5.111650462736163, 'alpha': 0.7313881953905897, 'critic_loss': 280.44297821758784, 'actor_loss': 83.2392920555427, 'time_step': 0.025556326609605936, 'td_error': 55.06230472871895, 'init_value': -119.709716796875, 'ave_value': -75.5808200374051} step=7524
2022-04-20 16:42.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420163931/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:42.57 [info     ] CQL_20220420163931: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.00038909284692061573, 'time_algorithm_update': 0.025638412313851697, 'temp_loss': 0.8336604011860508, 'temp': 0.801930124299568, 'alpha_loss': 5.080692766702663, 'alpha': 0.7058572310801835, 'critic_loss': 291.18322896678546, 'actor_loss': 84.44184888873184, 'time_step': 0.02612457707611441, 'td_error': 54.66304942452012, 'init_value': -122.0446548461914, 'ave_value': -76.01619413019502} step=7866
2022-04-20 16:42.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420163931/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:43.06 [info     ] CQL_20220420163931: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.0003939330229285168, 'time_algorithm_update': 0.02580028946636713, 'temp_loss': 0.7552182079232924, 'temp': 0.7945359206339072, 'alpha_loss': 5.111779885682446, 'alpha': 0.6819994358988534, 'critic_loss': 299.75199229814854, 'actor_loss': 85.46132626449852, 'time_step': 0.026294831644024765, 'td_error': 54.146096933637516, 'init_value': -121.10379791259766, 'ave_value': -77.28807364636243} step=8208
2022-04-20 16:43.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420163931/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:43.15 [info     ] CQL_20220420163931: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.0003953461061444199, 'time_algorithm_update': 0.02574865232434189, 'temp_loss': 0.7013110199082665, 'temp': 0.7873294979159595, 'alpha_loss': 4.942241951736094, 'alpha': 0.6589877929603845, 'critic_loss': 308.1829964710258, 'actor_loss': 86.43642117684348, 'time_step': 0.026243955071209468, 'td_error': 53.45762007281916, 'init_value': -122.4549331665039, 'ave_value': -77.84244287740674} step=8550
2022-04-20 16:43.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420163931/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:43.25 [info     ] CQL_20220420163931: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.0003955343313384474, 'time_algorithm_update': 0.025621442766914592, 'temp_loss': 0.6257767885574821, 'temp': 0.7804184508951086, 'alpha_loss': 4.884412313762464, 'alpha': 0.6374907214739169, 'critic_loss': 314.9437532480697, 'actor_loss': 87.23834099128233, 'time_step': 0.02611818160229956, 'td_error': 51.95848782281278, 'init_value': -122.04988098144531, 'ave_value': -77.35179381103934} step=8892
2022-04-20 16:43.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420163931/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:43.34 [info     ] CQL_20220420163931: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.00039336277030364814, 'time_algorithm_update': 0.025619973913270828, 'temp_loss': 0.5585992115587868, 'temp': 0.7737458459815086, 'alpha_loss': 4.815645868318123, 'alpha': 0.6167469737473984, 'critic_loss': 320.2601704290736, 'actor_loss': 87.81185147915667, 'time_step': 0.026111372730188202, 'td_error': 52.20449166760074, 'init_value': -123.02115631103516, 'ave_value': -78.84157643483431} step=9234
2022-04-20 16:43.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420163931/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:43.43 [info     ] CQL_20220420163931: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.0003908022105345252, 'time_algorithm_update': 0.02554136479807179, 'temp_loss': 0.5119771095243288, 'temp': 0.7673315085165682, 'alpha_loss': 4.746843118416636, 'alpha': 0.5967584689806776, 'critic_loss': 326.3798817863241, 'actor_loss': 88.31417880141944, 'time_step': 0.026030771216453863, 'td_error': 51.978593242609996, 'init_value': -124.7860107421875, 'ave_value': -79.41314733586064} step=9576
2022-04-20 16:43.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420163931/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:43.53 [info     ] CQL_20220420163931: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.00039157951087282416, 'time_algorithm_update': 0.02569164588437443, 'temp_loss': 0.44244201624580815, 'temp': 0.7612249924425494, 'alpha_loss': 4.645732282895094, 'alpha': 0.5775692788829581, 'critic_loss': 330.5951677735089, 'actor_loss': 88.65370436997442, 'time_step': 0.02618518558859128, 'td_error': 50.62148047863918, 'init_value': -124.6702651977539, 'ave_value': -78.78541379979333} step=9918
2022-04-20 16:43.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420163931/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:44.02 [info     ] CQL_20220420163931: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.0003987620448508458, 'time_algorithm_update': 0.02540941336001569, 'temp_loss': 0.441656046487086, 'temp': 0.7551194678621682, 'alpha_loss': 4.5665260887982555, 'alpha': 0.5592041369418652, 'critic_loss': 335.0295171012655, 'actor_loss': 88.98015344630905, 'time_step': 0.025904919668944957, 'td_error': 49.632148599928875, 'init_value': -124.02406311035156, 'ave_value': -79.8535827723904} step=10260
2022-04-20 16:44.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420163931/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:44.12 [info     ] CQL_20220420163931: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.00039721720399912337, 'time_algorithm_update': 0.025786360104878742, 'temp_loss': 0.4113998483234679, 'temp': 0.748766289119832, 'alpha_loss': 4.404618992442973, 'alpha': 0.5416015911172007, 'critic_loss': 339.0885386327554, 'actor_loss': 89.24391537939596, 'time_step': 0.026284944941425883, 'td_error': 49.2672730821407, 'init_value': -123.90409851074219, 'ave_value': -79.3057389671765} step=10602
2022-04-20 16:44.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420163931/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:44.21 [info     ] CQL_20220420163931: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.0003941142768190618, 'time_algorithm_update': 0.025554514070700484, 'temp_loss': 0.3853774977227052, 'temp': 0.7423977100709725, 'alpha_loss': 4.314757155396088, 'alpha': 0.5246722346160844, 'critic_loss': 342.21837352730375, 'actor_loss': 89.47642242699338, 'time_step': 0.026049290484155132, 'td_error': 48.14492173081198, 'init_value': -123.41777038574219, 'ave_value': -79.56391712001987} step=10944
2022-04-20 16:44.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420163931/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:44.30 [info     ] CQL_20220420163931: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.0003979464023433931, 'time_algorithm_update': 0.02580216265561288, 'temp_loss': 0.32601742232381775, 'temp': 0.7364411212896046, 'alpha_loss': 4.18407027972372, 'alpha': 0.5083651896457226, 'critic_loss': 344.23965079324284, 'actor_loss': 89.54089596396999, 'time_step': 0.0263005941234834, 'td_error': 47.306531110958694, 'init_value': -120.9005126953125, 'ave_value': -80.13445125079538} step=11286
2022-04-20 16:44.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420163931/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:44.40 [info     ] CQL_20220420163931: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.00038633290787189325, 'time_algorithm_update': 0.0255889286074722, 'temp_loss': 0.30222036358382964, 'temp': 0.7307138889156587, 'alpha_loss': 4.093848710868791, 'alpha': 0.49275097913212246, 'critic_loss': 346.3116854840552, 'actor_loss': 89.56672655072128, 'time_step': 0.026075520013508043, 'td_error': 45.859939191864605, 'init_value': -119.13607025146484, 'ave_value': -79.12292303248837} step=11628
2022-04-20 16:44.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420163931/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:44.49 [info     ] CQL_20220420163931: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.0003915578998320284, 'time_algorithm_update': 0.025729247701098346, 'temp_loss': 0.2537086008944445, 'temp': 0.7255955477904158, 'alpha_loss': 3.914538480385005, 'alpha': 0.47753628565554035, 'critic_loss': 347.3379652235243, 'actor_loss': 89.56999543396353, 'time_step': 0.02622125650707044, 'td_error': 47.15524655905922, 'init_value': -121.0965576171875, 'ave_value': -80.82229040578814} step=11970
2022-04-20 16:44.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420163931/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:44.58 [info     ] CQL_20220420163931: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.0003927248960349992, 'time_algorithm_update': 0.025714603781003004, 'temp_loss': 0.25933114945213176, 'temp': 0.7200798453294743, 'alpha_loss': 3.844203725892898, 'alpha': 0.46298814107451525, 'critic_loss': 351.02782471416987, 'actor_loss': 89.65477302060489, 'time_step': 0.026209497312356157, 'td_error': 45.53549632728572, 'init_value': -120.70430755615234, 'ave_value': -80.35947933520954} step=12312
2022-04-20 16:44.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420163931/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:45.08 [info     ] CQL_20220420163931: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.00038843963578430533, 'time_algorithm_update': 0.025665901557743898, 'temp_loss': 0.2499319427096617, 'temp': 0.7149419162357062, 'alpha_loss': 3.711109729886752, 'alpha': 0.4487755539124472, 'critic_loss': 352.79912080820543, 'actor_loss': 89.66818464848033, 'time_step': 0.026154852750008565, 'td_error': 46.394750887247824, 'init_value': -120.0999984741211, 'ave_value': -80.87257793021463} step=12654
2022-04-20 16:45.08 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420163931/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:45.17 [info     ] CQL_20220420163931: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.00039118632935641103, 'time_algorithm_update': 0.025700946997480784, 'temp_loss': 0.25798013196354025, 'temp': 0.7090536049234937, 'alpha_loss': 3.614574403442137, 'alpha': 0.4351850118901994, 'critic_loss': 355.05302143654626, 'actor_loss': 89.68754381324813, 'time_step': 0.02619041336907281, 'td_error': 44.7841475523624, 'init_value': -117.62532806396484, 'ave_value': -80.002115619341} step=12996
2022-04-20 16:45.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420163931/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:45.27 [info     ] CQL_20220420163931: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.00039412612803498204, 'time_algorithm_update': 0.025523332127353603, 'temp_loss': 0.2355670452891299, 'temp': 0.7033568782764569, 'alpha_loss': 3.4958217924798443, 'alpha': 0.42201137638580033, 'critic_loss': 356.4390242727179, 'actor_loss': 89.6762365151567, 'time_step': 0.026016139147574443, 'td_error': 44.76020860893872, 'init_value': -121.09233093261719, 'ave_value': -80.62274242696361} step=13338
2022-04-20 16:45.27 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420163931/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:45.36 [info     ] CQL_20220420163931: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.00038807643087286696, 'time_algorithm_update': 0.02561165017691272, 'temp_loss': 0.17524490375289617, 'temp': 0.6984514480794383, 'alpha_loss': 3.3906266821755304, 'alpha': 0.4091740850119563, 'critic_loss': 357.87409999914337, 'actor_loss': 89.59998464305498, 'time_step': 0.026097886743601303, 'td_error': 46.83293920723017, 'init_value': -121.46781158447266, 'ave_value': -81.03348298172492} step=13680
2022-04-20 16:45.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420163931/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:45.45 [info     ] CQL_20220420163931: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.0003891123665703668, 'time_algorithm_update': 0.02500546536250421, 'temp_loss': 0.19190109700334873, 'temp': 0.6932938966834754, 'alpha_loss': 3.257104938838914, 'alpha': 0.3968595284641835, 'critic_loss': 357.59963587710735, 'actor_loss': 89.47507992125394, 'time_step': 0.025492965826514172, 'td_error': 44.950047707600966, 'init_value': -117.15620422363281, 'ave_value': -81.07742794401977} step=14022
2022-04-20 16:45.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420163931/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:45.54 [info     ] CQL_20220420163931: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.00038871918505395365, 'time_algorithm_update': 0.02406324280632867, 'temp_loss': 0.1898790821378604, 'temp': 0.6885116048026503, 'alpha_loss': 3.1387329066705982, 'alpha': 0.38496332088409113, 'critic_loss': 359.28326603404264, 'actor_loss': 89.46567435013621, 'time_step': 0.024551109960901808, 'td_error': 45.53052935082613, 'init_value': -115.3544692993164, 'ave_value': -79.44428444084853} step=14364
2022-04-20 16:45.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420163931/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:46.03 [info     ] CQL_20220420163931: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.0003867267865186546, 'time_algorithm_update': 0.023952023327699183, 'temp_loss': 0.19426353836747987, 'temp': 0.6833032965660095, 'alpha_loss': 3.0558081433089854, 'alpha': 0.3734481731179165, 'critic_loss': 360.06259744209154, 'actor_loss': 89.36205670986956, 'time_step': 0.024435762076349985, 'td_error': 44.54015506368195, 'init_value': -117.14505767822266, 'ave_value': -80.03576387951705} step=14706
2022-04-20 16:46.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420163931/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:46.11 [info     ] CQL_20220420163931: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.00038981507396140293, 'time_algorithm_update': 0.0241085269994903, 'temp_loss': 0.20544007514761378, 'temp': 0.677859955538086, 'alpha_loss': 2.93874725786566, 'alpha': 0.3622800902142162, 'critic_loss': 360.2203073780439, 'actor_loss': 89.31239495082208, 'time_step': 0.024596597716125133, 'td_error': 42.59402057593368, 'init_value': -112.68099212646484, 'ave_value': -78.80334049474146} step=15048
2022-04-20 16:46.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420163931/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:46.20 [info     ] CQL_20220420163931: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.00038972932692856816, 'time_algorithm_update': 0.02405715058421531, 'temp_loss': 0.19321728767336985, 'temp': 0.6723727165606984, 'alpha_loss': 2.8119489205510995, 'alpha': 0.3514715883641215, 'critic_loss': 359.9125530399077, 'actor_loss': 89.21970760055453, 'time_step': 0.024547065907751606, 'td_error': 42.005954292511, 'init_value': -112.91487121582031, 'ave_value': -79.2370831152988} step=15390
2022-04-20 16:46.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420163931/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:46.29 [info     ] CQL_20220420163931: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.0003859627316569724, 'time_algorithm_update': 0.02408397894853737, 'temp_loss': 0.153526596049642, 'temp': 0.6677930513669176, 'alpha_loss': 2.6996251104170814, 'alpha': 0.3410794603371481, 'critic_loss': 358.24819821363303, 'actor_loss': 89.07622420996951, 'time_step': 0.02456637084135535, 'td_error': 41.66530834037116, 'init_value': -112.92667388916016, 'ave_value': -79.1020733472329} step=15732
2022-04-20 16:46.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420163931/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:46.38 [info     ] CQL_20220420163931: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.0003841773808351037, 'time_algorithm_update': 0.024029901153162905, 'temp_loss': 0.14938416548225789, 'temp': 0.6635222344370614, 'alpha_loss': 2.6220094721916825, 'alpha': 0.3310252218044292, 'critic_loss': 357.14944627549914, 'actor_loss': 88.83829507214284, 'time_step': 0.024512430380659495, 'td_error': 44.13705490427921, 'init_value': -114.5080337524414, 'ave_value': -80.35151401989094} step=16074
2022-04-20 16:46.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420163931/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:46.47 [info     ] CQL_20220420163931: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.00038557094440125583, 'time_algorithm_update': 0.02436164805763646, 'temp_loss': 0.165901285334768, 'temp': 0.658489651847304, 'alpha_loss': 2.5171891565908466, 'alpha': 0.3212639465492371, 'critic_loss': 356.04042294150906, 'actor_loss': 88.75585716649105, 'time_step': 0.02484095445153309, 'td_error': 43.39419033650608, 'init_value': -112.3375244140625, 'ave_value': -78.25159017623511} step=16416
2022-04-20 16:46.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420163931/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:46.55 [info     ] CQL_20220420163931: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.00038483826040524487, 'time_algorithm_update': 0.024168568744994047, 'temp_loss': 0.1385878106818823, 'temp': 0.6538546867886482, 'alpha_loss': 2.441883441648985, 'alpha': 0.31181561964297155, 'critic_loss': 353.93443744503264, 'actor_loss': 88.51595736944188, 'time_step': 0.024649789458826968, 'td_error': 41.01592158094784, 'init_value': -111.18550109863281, 'ave_value': -78.42400227099523} step=16758
2022-04-20 16:46.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420163931/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:47.04 [info     ] CQL_20220420163931: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.0003892629467255888, 'time_algorithm_update': 0.024193038020217626, 'temp_loss': 0.17101891520188042, 'temp': 0.6490935481082626, 'alpha_loss': 2.3317640479545148, 'alpha': 0.30255529423903305, 'critic_loss': 351.0998254073294, 'actor_loss': 88.39529927571614, 'time_step': 0.02468054127274898, 'td_error': 41.45235766264261, 'init_value': -110.8961181640625, 'ave_value': -79.13558992722113} step=17100
2022-04-20 16:47.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420163931/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.519

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 16:47.05 [info     ] FQE_20220420164704: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00015304749270519578, 'time_algorithm_update': 0.0034324275441916592, 'loss': 0.005271199530310345, 'time_step': 0.0036572318479239224, 'init_value': -0.2896161675453186, 'ave_value': -0.25246774412121054, 'soft_opc': nan} step=166




2022-04-20 16:47.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164704/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:47.06 [info     ] FQE_20220420164704: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00015644998435514518, 'time_algorithm_update': 0.003541231155395508, 'loss': 0.0039355025734718755, 'time_step': 0.003770519451922681, 'init_value': -0.410487562417984, 'ave_value': -0.32056350413592416, 'soft_opc': nan} step=332




2022-04-20 16:47.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164704/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:47.06 [info     ] FQE_20220420164704: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.0001546388649078737, 'time_algorithm_update': 0.0034699913967086607, 'loss': 0.003539998273656939, 'time_step': 0.003692585301686482, 'init_value': -0.481197714805603, 'ave_value': -0.36568762606915994, 'soft_opc': nan} step=498




2022-04-20 16:47.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164704/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:47.07 [info     ] FQE_20220420164704: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00015751137790909732, 'time_algorithm_update': 0.0035350495074168743, 'loss': 0.0037275870430215925, 'time_step': 0.0037626430212733255, 'init_value': -0.5359705090522766, 'ave_value': -0.4056628699590628, 'soft_opc': nan} step=664




2022-04-20 16:47.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164704/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:47.08 [info     ] FQE_20220420164704: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.0001533045826188053, 'time_algorithm_update': 0.00338897647627865, 'loss': 0.003684319818044553, 'time_step': 0.0036066382764333702, 'init_value': -0.6109986305236816, 'ave_value': -0.45402772241011935, 'soft_opc': nan} step=830




2022-04-20 16:47.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164704/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:47.09 [info     ] FQE_20220420164704: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00019984072949512895, 'time_algorithm_update': 0.0048562862786902, 'loss': 0.0038113730317206086, 'time_step': 0.0051266184772353575, 'init_value': -0.6595898866653442, 'ave_value': -0.4838163410355379, 'soft_opc': nan} step=996




2022-04-20 16:47.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164704/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:47.09 [info     ] FQE_20220420164704: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00015357603509742092, 'time_algorithm_update': 0.003583899463515684, 'loss': 0.0039033018093253204, 'time_step': 0.0038060854716473317, 'init_value': -0.6758371591567993, 'ave_value': -0.4887060361503212, 'soft_opc': nan} step=1162




2022-04-20 16:47.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164704/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:47.10 [info     ] FQE_20220420164704: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.0001568664987403226, 'time_algorithm_update': 0.0034294659832873977, 'loss': 0.0037930462024096086, 'time_step': 0.0036535061985613353, 'init_value': -0.7308381199836731, 'ave_value': -0.5263409608773686, 'soft_opc': nan} step=1328




2022-04-20 16:47.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164704/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:47.11 [info     ] FQE_20220420164704: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00015520474996911474, 'time_algorithm_update': 0.0034712035971951774, 'loss': 0.0037090943099549376, 'time_step': 0.0037005536527518765, 'init_value': -0.7620357871055603, 'ave_value': -0.5336481294838024, 'soft_opc': nan} step=1494




2022-04-20 16:47.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164704/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:47.11 [info     ] FQE_20220420164704: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00015909700508577278, 'time_algorithm_update': 0.00356905431632536, 'loss': 0.0036569106096615003, 'time_step': 0.0037969422627644367, 'init_value': -0.8236261010169983, 'ave_value': -0.5859544706488917, 'soft_opc': nan} step=1660




2022-04-20 16:47.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164704/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:47.12 [info     ] FQE_20220420164704: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00015653759600168252, 'time_algorithm_update': 0.0034675985933786415, 'loss': 0.0036856488826830522, 'time_step': 0.003695024065224521, 'init_value': -0.8818728923797607, 'ave_value': -0.6260623243511528, 'soft_opc': nan} step=1826




2022-04-20 16:47.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164704/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:47.13 [info     ] FQE_20220420164704: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00015911567642028072, 'time_algorithm_update': 0.003607135221182582, 'loss': 0.003657528890033129, 'time_step': 0.003838503217122641, 'init_value': -0.9076589345932007, 'ave_value': -0.6368297453231371, 'soft_opc': nan} step=1992




2022-04-20 16:47.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164704/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:47.14 [info     ] FQE_20220420164704: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.000159196106784315, 'time_algorithm_update': 0.003531351146927799, 'loss': 0.003683257165662271, 'time_step': 0.0037632893366986012, 'init_value': -0.9621599912643433, 'ave_value': -0.6705468536765726, 'soft_opc': nan} step=2158




2022-04-20 16:47.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164704/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:47.14 [info     ] FQE_20220420164704: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.0001533663416483316, 'time_algorithm_update': 0.0035047904554619848, 'loss': 0.003645844146025827, 'time_step': 0.0037262568990868257, 'init_value': -1.0588942766189575, 'ave_value': -0.744885437687238, 'soft_opc': nan} step=2324




2022-04-20 16:47.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164704/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:47.15 [info     ] FQE_20220420164704: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00015952788203595634, 'time_algorithm_update': 0.003497774342456496, 'loss': 0.0037090294224389614, 'time_step': 0.0037314460938235364, 'init_value': -1.1151247024536133, 'ave_value': -0.7790932824080055, 'soft_opc': nan} step=2490




2022-04-20 16:47.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164704/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:47.16 [info     ] FQE_20220420164704: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.0001557088760008295, 'time_algorithm_update': 0.0035288405705647297, 'loss': 0.0037484114108809145, 'time_step': 0.0037568462900368564, 'init_value': -1.217960000038147, 'ave_value': -0.8542525537379153, 'soft_opc': nan} step=2656




2022-04-20 16:47.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164704/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:47.16 [info     ] FQE_20220420164704: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00015607943017798733, 'time_algorithm_update': 0.0034610909151743694, 'loss': 0.0038008874672162065, 'time_step': 0.0036906650267451643, 'init_value': -1.2927727699279785, 'ave_value': -0.9041135484570856, 'soft_opc': nan} step=2822




2022-04-20 16:47.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164704/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:47.17 [info     ] FQE_20220420164704: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00015547045742172794, 'time_algorithm_update': 0.0035401209291205347, 'loss': 0.0038050188541311368, 'time_step': 0.0037633984921926476, 'init_value': -1.3683085441589355, 'ave_value': -0.9534589037500524, 'soft_opc': nan} step=2988




2022-04-20 16:47.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164704/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:47.18 [info     ] FQE_20220420164704: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00015469057014189572, 'time_algorithm_update': 0.003377298274672175, 'loss': 0.00403164779691654, 'time_step': 0.003604311540902379, 'init_value': -1.4535815715789795, 'ave_value': -0.9989193216033347, 'soft_opc': nan} step=3154




2022-04-20 16:47.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164704/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:47.18 [info     ] FQE_20220420164704: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00015406579856412956, 'time_algorithm_update': 0.003650010350238846, 'loss': 0.004347349866994668, 'time_step': 0.0038778839341129163, 'init_value': -1.5065981149673462, 'ave_value': -0.9997019164465569, 'soft_opc': nan} step=3320




2022-04-20 16:47.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164704/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:47.19 [info     ] FQE_20220420164704: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00015529810664165452, 'time_algorithm_update': 0.0033373373100556523, 'loss': 0.0045238053061472, 'time_step': 0.0035663182476916946, 'init_value': -1.5521531105041504, 'ave_value': -1.0256884739608378, 'soft_opc': nan} step=3486




2022-04-20 16:47.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164704/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:47.20 [info     ] FQE_20220420164704: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.0001567444169377706, 'time_algorithm_update': 0.0035474128033741413, 'loss': 0.00469931765811529, 'time_step': 0.0037761625037135847, 'init_value': -1.6690599918365479, 'ave_value': -1.0938541490618172, 'soft_opc': nan} step=3652




2022-04-20 16:47.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164704/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:47.20 [info     ] FQE_20220420164704: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00015513150088758353, 'time_algorithm_update': 0.0034528640379388647, 'loss': 0.004898343864668453, 'time_step': 0.0036825343786952005, 'init_value': -1.7491589784622192, 'ave_value': -1.1415292806520656, 'soft_opc': nan} step=3818




2022-04-20 16:47.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164704/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:47.21 [info     ] FQE_20220420164704: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00015733040959002023, 'time_algorithm_update': 0.0035609351583274014, 'loss': 0.005253224581738104, 'time_step': 0.0037911426590149662, 'init_value': -1.7941526174545288, 'ave_value': -1.1575110814123004, 'soft_opc': nan} step=3984




2022-04-20 16:47.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164704/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:47.22 [info     ] FQE_20220420164704: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00015696416418236423, 'time_algorithm_update': 0.003405404378132648, 'loss': 0.005634649372687675, 'time_step': 0.0036318373967366047, 'init_value': -1.8928959369659424, 'ave_value': -1.2142813784112265, 'soft_opc': nan} step=4150




2022-04-20 16:47.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164704/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:47.22 [info     ] FQE_20220420164704: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00015708480972841563, 'time_algorithm_update': 0.0035760531942528413, 'loss': 0.005666895816523403, 'time_step': 0.0038065264023930193, 'init_value': -1.9539141654968262, 'ave_value': -1.2387561331930999, 'soft_opc': nan} step=4316




2022-04-20 16:47.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164704/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:47.23 [info     ] FQE_20220420164704: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.0001534525170383683, 'time_algorithm_update': 0.003469830535980592, 'loss': 0.006136140793124326, 'time_step': 0.003698498369699501, 'init_value': -2.031139850616455, 'ave_value': -1.2796725747351712, 'soft_opc': nan} step=4482




2022-04-20 16:47.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164704/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:47.24 [info     ] FQE_20220420164704: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00015836738678346197, 'time_algorithm_update': 0.003541226846626006, 'loss': 0.006679017952340375, 'time_step': 0.003772777247141643, 'init_value': -2.138770341873169, 'ave_value': -1.3636819019376694, 'soft_opc': nan} step=4648




2022-04-20 16:47.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164704/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:47.25 [info     ] FQE_20220420164704: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00015961980245199548, 'time_algorithm_update': 0.003407109214598874, 'loss': 0.006608859172332135, 'time_step': 0.0036417073514088093, 'init_value': -2.2157142162323, 'ave_value': -1.3987070608045067, 'soft_opc': nan} step=4814




2022-04-20 16:47.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164704/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:47.25 [info     ] FQE_20220420164704: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00015339506677834383, 'time_algorithm_update': 0.0036259185836975834, 'loss': 0.006916486038910562, 'time_step': 0.0038524420864610785, 'init_value': -2.309999942779541, 'ave_value': -1.4507513824145537, 'soft_opc': nan} step=4980




2022-04-20 16:47.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164704/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:47.26 [info     ] FQE_20220420164704: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00015635519142610482, 'time_algorithm_update': 0.0033296375389558724, 'loss': 0.007408299716189504, 'time_step': 0.0035596396549638495, 'init_value': -2.42315411567688, 'ave_value': -1.5397393206822443, 'soft_opc': nan} step=5146




2022-04-20 16:47.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164704/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:47.27 [info     ] FQE_20220420164704: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.0001547250402979104, 'time_algorithm_update': 0.003489026104111269, 'loss': 0.007791611762206822, 'time_step': 0.003715780844171363, 'init_value': -2.52658748626709, 'ave_value': -1.6117328707766425, 'soft_opc': nan} step=5312




2022-04-20 16:47.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164704/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:47.27 [info     ] FQE_20220420164704: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00015215557741831583, 'time_algorithm_update': 0.0034296469516064748, 'loss': 0.00804939862419234, 'time_step': 0.003653998834541045, 'init_value': -2.566270112991333, 'ave_value': -1.6280719004772806, 'soft_opc': nan} step=5478




2022-04-20 16:47.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164704/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:47.28 [info     ] FQE_20220420164704: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00015817061964287814, 'time_algorithm_update': 0.003580916358763913, 'loss': 0.008575790868202472, 'time_step': 0.0038166836083653463, 'init_value': -2.6579315662384033, 'ave_value': -1.6728920331677875, 'soft_opc': nan} step=5644




2022-04-20 16:47.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164704/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:47.29 [info     ] FQE_20220420164704: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00015666255031723575, 'time_algorithm_update': 0.003423463867371341, 'loss': 0.008659849714779531, 'time_step': 0.0036528972258050756, 'init_value': -2.6913185119628906, 'ave_value': -1.687686915970869, 'soft_opc': nan} step=5810




2022-04-20 16:47.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164704/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:47.29 [info     ] FQE_20220420164704: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00015724997922598598, 'time_algorithm_update': 0.003529492631016007, 'loss': 0.008848325795415475, 'time_step': 0.00375753569315715, 'init_value': -2.7899997234344482, 'ave_value': -1.782439435904359, 'soft_opc': nan} step=5976




2022-04-20 16:47.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164704/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:47.30 [info     ] FQE_20220420164704: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00015703310449439358, 'time_algorithm_update': 0.0035056852432618657, 'loss': 0.009210673425941313, 'time_step': 0.003738367413899985, 'init_value': -2.861319065093994, 'ave_value': -1.8124717438006186, 'soft_opc': nan} step=6142




2022-04-20 16:47.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164704/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:47.31 [info     ] FQE_20220420164704: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00015616847808102527, 'time_algorithm_update': 0.0035237274974225514, 'loss': 0.009472396721956825, 'time_step': 0.0037517217268426733, 'init_value': -2.8719468116760254, 'ave_value': -1.8137483682114262, 'soft_opc': nan} step=6308




2022-04-20 16:47.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164704/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:47.31 [info     ] FQE_20220420164704: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00015483994081795933, 'time_algorithm_update': 0.0034495549029614552, 'loss': 0.009318218285257154, 'time_step': 0.00367959867040795, 'init_value': -2.94248628616333, 'ave_value': -1.8714108752506273, 'soft_opc': nan} step=6474




2022-04-20 16:47.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164704/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:47.32 [info     ] FQE_20220420164704: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00015720689153096763, 'time_algorithm_update': 0.0035120536045855784, 'loss': 0.009531769795618188, 'time_step': 0.003741252853209714, 'init_value': -3.011244058609009, 'ave_value': -1.8960896185523755, 'soft_opc': nan} step=6640




2022-04-20 16:47.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164704/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:47.33 [info     ] FQE_20220420164704: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00015432719724724092, 'time_algorithm_update': 0.003465873649321407, 'loss': 0.009867691257491663, 'time_step': 0.0036932876311152815, 'init_value': -3.148256301879883, 'ave_value': -2.0156136432612266, 'soft_opc': nan} step=6806




2022-04-20 16:47.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164704/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:47.33 [info     ] FQE_20220420164704: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00016125857111919358, 'time_algorithm_update': 0.0035425065511680513, 'loss': 0.010631215992854931, 'time_step': 0.0037827233234083795, 'init_value': -3.1527047157287598, 'ave_value': -1.9924398035549367, 'soft_opc': nan} step=6972




2022-04-20 16:47.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164704/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:47.34 [info     ] FQE_20220420164704: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00015547476619122977, 'time_algorithm_update': 0.0033927466495927558, 'loss': 0.010859528548050537, 'time_step': 0.0036245196698659874, 'init_value': -3.2252025604248047, 'ave_value': -2.0330603270313223, 'soft_opc': nan} step=7138




2022-04-20 16:47.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164704/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:47.35 [info     ] FQE_20220420164704: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00015302594885768662, 'time_algorithm_update': 0.003578108477305217, 'loss': 0.010714844367480892, 'time_step': 0.0038056488496711455, 'init_value': -3.298363447189331, 'ave_value': -2.097677580300752, 'soft_opc': nan} step=7304




2022-04-20 16:47.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164704/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:47.36 [info     ] FQE_20220420164704: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00015719827399196396, 'time_algorithm_update': 0.003528273249246988, 'loss': 0.011113299836730024, 'time_step': 0.0037608161030045473, 'init_value': -3.405848741531372, 'ave_value': -2.1858725719825105, 'soft_opc': nan} step=7470




2022-04-20 16:47.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164704/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:47.36 [info     ] FQE_20220420164704: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00015577781631285884, 'time_algorithm_update': 0.0035890886582523943, 'loss': 0.011430899128009262, 'time_step': 0.0038180983210184486, 'init_value': -3.4478511810302734, 'ave_value': -2.1733723113867076, 'soft_opc': nan} step=7636




2022-04-20 16:47.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164704/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:47.37 [info     ] FQE_20220420164704: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00015805284660982798, 'time_algorithm_update': 0.0035249698592955806, 'loss': 0.011764454737401018, 'time_step': 0.003753986703344138, 'init_value': -3.5408239364624023, 'ave_value': -2.2317930845594085, 'soft_opc': nan} step=7802




2022-04-20 16:47.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164704/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:47.38 [info     ] FQE_20220420164704: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00015831711780594056, 'time_algorithm_update': 0.003501662288803652, 'loss': 0.01231096343541271, 'time_step': 0.003731954528624753, 'init_value': -3.602940559387207, 'ave_value': -2.2792003973546597, 'soft_opc': nan} step=7968




2022-04-20 16:47.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164704/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:47.38 [info     ] FQE_20220420164704: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00015438895627676723, 'time_algorithm_update': 0.0035142223519015023, 'loss': 0.012235765554381153, 'time_step': 0.003739800797887595, 'init_value': -3.6872920989990234, 'ave_value': -2.329902218517151, 'soft_opc': nan} step=8134




2022-04-20 16:47.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164704/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:47.39 [info     ] FQE_20220420164704: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00015676883329828102, 'time_algorithm_update': 0.0035956652767686957, 'loss': 0.012528179621559579, 'time_step': 0.0038231553801571032, 'init_value': -3.7198638916015625, 'ave_value': -2.3273589177241734, 'soft_opc': nan} step=8300




2022-04-20 16:47.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164704/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

2022-04-20 16:47.39 [info     ] Directory is created at d3rlpy_logs/FQE_20220420164739
2022-04-20 16:47.39 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 16:47.39 [debug    ] Building models...
2022-04-20 16:47.39 [debug    ] Models have been built.
2022-04-20 16:47.39 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420164739/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 16:47.41 [info     ] FQE_20220420164739: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.0001634844513826592, 'time_algorithm_update': 0.0034505413022152213, 'loss': 0.021911767630859516, 'time_step': 0.0036862887615381284, 'init_value': -1.1485525369644165, 'ave_value': -1.1835792174277542, 'soft_opc': nan} step=344




2022-04-20 16:47.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164739/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:47.42 [info     ] FQE_20220420164739: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00016316355660904284, 'time_algorithm_update': 0.003470290538876556, 'loss': 0.020719698256661378, 'time_step': 0.003707362468852553, 'init_value': -1.9755418300628662, 'ave_value': -2.019739101155921, 'soft_opc': nan} step=688




2022-04-20 16:47.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164739/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:47.44 [info     ] FQE_20220420164739: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00016543892926948014, 'time_algorithm_update': 0.0035360968390176464, 'loss': 0.023503028660427864, 'time_step': 0.0037738355093224103, 'init_value': -3.0305445194244385, 'ave_value': -3.0971548521840893, 'soft_opc': nan} step=1032




2022-04-20 16:47.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164739/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:47.45 [info     ] FQE_20220420164739: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00016359742297682652, 'time_algorithm_update': 0.003506458082864451, 'loss': 0.02590862462881866, 'time_step': 0.003745294587556706, 'init_value': -3.8695740699768066, 'ave_value': -3.9758993949976054, 'soft_opc': nan} step=1376




2022-04-20 16:47.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164739/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:47.46 [info     ] FQE_20220420164739: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.0001642620840737986, 'time_algorithm_update': 0.0035232194634371027, 'loss': 0.03222469100187164, 'time_step': 0.0037608118944389875, 'init_value': -4.901235580444336, 'ave_value': -5.082942534352208, 'soft_opc': nan} step=1720




2022-04-20 16:47.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164739/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:47.48 [info     ] FQE_20220420164739: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.0001646093157834785, 'time_algorithm_update': 0.0034627193628355515, 'loss': 0.037540095882641886, 'time_step': 0.0037026017211204353, 'init_value': -5.574355125427246, 'ave_value': -5.862311675151189, 'soft_opc': nan} step=2064




2022-04-20 16:47.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164739/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:47.49 [info     ] FQE_20220420164739: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00016451367112093194, 'time_algorithm_update': 0.0034465422463971513, 'loss': 0.045600370596011366, 'time_step': 0.0036847362684649092, 'init_value': -6.455783367156982, 'ave_value': -6.89400660376828, 'soft_opc': nan} step=2408




2022-04-20 16:47.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164739/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:47.51 [info     ] FQE_20220420164739: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00016414911247963128, 'time_algorithm_update': 0.003516994936521663, 'loss': 0.05299730536657884, 'time_step': 0.003756370655325956, 'init_value': -7.028618812561035, 'ave_value': -7.593749191014616, 'soft_opc': nan} step=2752




2022-04-20 16:47.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164739/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:47.52 [info     ] FQE_20220420164739: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.000162624342497005, 'time_algorithm_update': 0.003430355426877044, 'loss': 0.06032294389077051, 'time_step': 0.0036670600259026817, 'init_value': -7.528118133544922, 'ave_value': -8.229862542893436, 'soft_opc': nan} step=3096




2022-04-20 16:47.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164739/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:47.54 [info     ] FQE_20220420164739: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.0001673767733019452, 'time_algorithm_update': 0.0045414823432301365, 'loss': 0.07102497675196202, 'time_step': 0.004787332096765208, 'init_value': -8.355827331542969, 'ave_value': -9.2516462004668, 'soft_opc': nan} step=3440




2022-04-20 16:47.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164739/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:47.55 [info     ] FQE_20220420164739: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.0001670274623604708, 'time_algorithm_update': 0.004084923239641412, 'loss': 0.07886031309554223, 'time_step': 0.004328364549681198, 'init_value': -8.9501953125, 'ave_value': -10.02498238983455, 'soft_opc': nan} step=3784




2022-04-20 16:47.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164739/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:47.57 [info     ] FQE_20220420164739: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00016881837401279184, 'time_algorithm_update': 0.004363556933957477, 'loss': 0.09151431314950419, 'time_step': 0.0046088328195172685, 'init_value': -9.549196243286133, 'ave_value': -10.834267614661035, 'soft_opc': nan} step=4128




2022-04-20 16:47.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164739/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:47.59 [info     ] FQE_20220420164739: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00016552348469578944, 'time_algorithm_update': 0.004392785388369893, 'loss': 0.09766659553184412, 'time_step': 0.0046332714169524435, 'init_value': -10.0969877243042, 'ave_value': -11.455054787150374, 'soft_opc': nan} step=4472




2022-04-20 16:47.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164739/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:48.00 [info     ] FQE_20220420164739: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.0001649634782658067, 'time_algorithm_update': 0.0040546045746914176, 'loss': 0.10855941845071611, 'time_step': 0.00429514397022336, 'init_value': -10.522041320800781, 'ave_value': -12.137700110996091, 'soft_opc': nan} step=4816




2022-04-20 16:48.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164739/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:48.02 [info     ] FQE_20220420164739: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00016937560813371524, 'time_algorithm_update': 0.0045554173547168115, 'loss': 0.11836252278150168, 'time_step': 0.004801345425982808, 'init_value': -11.277259826660156, 'ave_value': -13.007911293114628, 'soft_opc': nan} step=5160




2022-04-20 16:48.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164739/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:48.04 [info     ] FQE_20220420164739: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00016598230184510697, 'time_algorithm_update': 0.004113794066185175, 'loss': 0.13424413113010136, 'time_step': 0.004354873368906421, 'init_value': -11.991361618041992, 'ave_value': -13.813612024580037, 'soft_opc': nan} step=5504




2022-04-20 16:48.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164739/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:48.06 [info     ] FQE_20220420164739: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00016999175382214924, 'time_algorithm_update': 0.004533297793809758, 'loss': 0.14709756868826443, 'time_step': 0.00477626642515493, 'init_value': -12.405829429626465, 'ave_value': -14.370803527413187, 'soft_opc': nan} step=5848




2022-04-20 16:48.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164739/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:48.07 [info     ] FQE_20220420164739: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00016929729040278944, 'time_algorithm_update': 0.004187522239463274, 'loss': 0.1567914769280875, 'time_step': 0.004432527824889782, 'init_value': -13.064730644226074, 'ave_value': -15.019812728719668, 'soft_opc': nan} step=6192




2022-04-20 16:48.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164739/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:48.09 [info     ] FQE_20220420164739: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00016888144404389138, 'time_algorithm_update': 0.004245272902555244, 'loss': 0.17080026813024698, 'time_step': 0.0044908842375112135, 'init_value': -13.646766662597656, 'ave_value': -15.722575732743417, 'soft_opc': nan} step=6536




2022-04-20 16:48.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164739/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:48.11 [info     ] FQE_20220420164739: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.0001691600611043531, 'time_algorithm_update': 0.004529873992121497, 'loss': 0.1858649175332556, 'time_step': 0.004773128864377044, 'init_value': -14.140698432922363, 'ave_value': -16.296349414458145, 'soft_opc': nan} step=6880




2022-04-20 16:48.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164739/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:48.12 [info     ] FQE_20220420164739: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00016566695168960925, 'time_algorithm_update': 0.00413547768149265, 'loss': 0.2007688579994232, 'time_step': 0.004375004491140676, 'init_value': -15.033075332641602, 'ave_value': -17.165442989216196, 'soft_opc': nan} step=7224




2022-04-20 16:48.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164739/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:48.14 [info     ] FQE_20220420164739: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00016630804815957712, 'time_algorithm_update': 0.004531527674475381, 'loss': 0.22310531602327732, 'time_step': 0.004769449317178061, 'init_value': -15.272377967834473, 'ave_value': -17.5782359248883, 'soft_opc': nan} step=7568




2022-04-20 16:48.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164739/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:48.16 [info     ] FQE_20220420164739: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00016892302867978117, 'time_algorithm_update': 0.004086909599082414, 'loss': 0.2374589453440506, 'time_step': 0.004332535488660945, 'init_value': -16.11710548400879, 'ave_value': -18.371486168831318, 'soft_opc': nan} step=7912




2022-04-20 16:48.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164739/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:48.17 [info     ] FQE_20220420164739: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00016110165174617323, 'time_algorithm_update': 0.004352369973825854, 'loss': 0.2583962576355525, 'time_step': 0.004583658867104109, 'init_value': -16.384174346923828, 'ave_value': -18.747318086377135, 'soft_opc': nan} step=8256




2022-04-20 16:48.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164739/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:48.19 [info     ] FQE_20220420164739: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.0001499694447184718, 'time_algorithm_update': 0.00385765835296276, 'loss': 0.27595430857306996, 'time_step': 0.004075692836628403, 'init_value': -17.111412048339844, 'ave_value': -19.5163271871236, 'soft_opc': nan} step=8600




2022-04-20 16:48.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164739/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:48.20 [info     ] FQE_20220420164739: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00014737594959347746, 'time_algorithm_update': 0.0037845289984414746, 'loss': 0.29713851535086366, 'time_step': 0.003998359968495923, 'init_value': -17.686588287353516, 'ave_value': -20.03146231075128, 'soft_opc': nan} step=8944




2022-04-20 16:48.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164739/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:48.22 [info     ] FQE_20220420164739: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00014460294745689215, 'time_algorithm_update': 0.003957449696784796, 'loss': 0.3180796540353101, 'time_step': 0.004165354856224947, 'init_value': -18.596494674682617, 'ave_value': -20.93898091923009, 'soft_opc': nan} step=9288




2022-04-20 16:48.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164739/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:48.23 [info     ] FQE_20220420164739: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.0001505564811617829, 'time_algorithm_update': 0.003811534060988315, 'loss': 0.34381269883407756, 'time_step': 0.004032215406728345, 'init_value': -19.416683197021484, 'ave_value': -21.70975554196147, 'soft_opc': nan} step=9632




2022-04-20 16:48.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164739/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:48.25 [info     ] FQE_20220420164739: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00016604190648988236, 'time_algorithm_update': 0.00453063776326734, 'loss': 0.35649237624196295, 'time_step': 0.0047717960767967756, 'init_value': -19.71546173095703, 'ave_value': -21.925824328881127, 'soft_opc': nan} step=9976




2022-04-20 16:48.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164739/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:48.27 [info     ] FQE_20220420164739: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00016511664834133413, 'time_algorithm_update': 0.0041611658972363135, 'loss': 0.38428022702722703, 'time_step': 0.004402125990668008, 'init_value': -20.348081588745117, 'ave_value': -22.342973512142628, 'soft_opc': nan} step=10320




2022-04-20 16:48.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164739/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:48.29 [info     ] FQE_20220420164739: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.0001652975415074548, 'time_algorithm_update': 0.004414261773575184, 'loss': 0.4059644731378919, 'time_step': 0.0046547602775485015, 'init_value': -20.720226287841797, 'ave_value': -22.709002376112853, 'soft_opc': nan} step=10664




2022-04-20 16:48.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164739/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:48.30 [info     ] FQE_20220420164739: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00016772539116615472, 'time_algorithm_update': 0.00437288714009662, 'loss': 0.4245208356323717, 'time_step': 0.004618121441020522, 'init_value': -21.486351013183594, 'ave_value': -23.440588868738296, 'soft_opc': nan} step=11008




2022-04-20 16:48.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164739/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:48.32 [info     ] FQE_20220420164739: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00016767271729402764, 'time_algorithm_update': 0.004016537998997888, 'loss': 0.43711909031235546, 'time_step': 0.004258185625076294, 'init_value': -21.55417823791504, 'ave_value': -23.510530128752862, 'soft_opc': nan} step=11352




2022-04-20 16:48.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164739/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:48.34 [info     ] FQE_20220420164739: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.0001742382382237634, 'time_algorithm_update': 0.0045259858286658, 'loss': 0.45608509292956007, 'time_step': 0.004776196424351182, 'init_value': -22.001220703125, 'ave_value': -23.784141911606532, 'soft_opc': nan} step=11696




2022-04-20 16:48.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164739/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:48.35 [info     ] FQE_20220420164739: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00016808717749839606, 'time_algorithm_update': 0.004125795392102973, 'loss': 0.4752973825833219, 'time_step': 0.004368162432382273, 'init_value': -22.65296173095703, 'ave_value': -24.397346656628557, 'soft_opc': nan} step=12040




2022-04-20 16:48.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164739/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:48.37 [info     ] FQE_20220420164739: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00017086503117583519, 'time_algorithm_update': 0.004490290270295254, 'loss': 0.4935861175316711, 'time_step': 0.004740533440612083, 'init_value': -22.815454483032227, 'ave_value': -24.837135184294468, 'soft_opc': nan} step=12384




2022-04-20 16:48.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164739/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:48.39 [info     ] FQE_20220420164739: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00016704617544662122, 'time_algorithm_update': 0.004106749628865441, 'loss': 0.5151147586131078, 'time_step': 0.004348018834757251, 'init_value': -23.679922103881836, 'ave_value': -25.35653305531622, 'soft_opc': nan} step=12728




2022-04-20 16:48.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164739/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:48.40 [info     ] FQE_20220420164739: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00016917253649512002, 'time_algorithm_update': 0.004351183425548465, 'loss': 0.5383646800561787, 'time_step': 0.004596740007400513, 'init_value': -23.812477111816406, 'ave_value': -25.409823758806194, 'soft_opc': nan} step=13072




2022-04-20 16:48.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164739/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:48.42 [info     ] FQE_20220420164739: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00016960363055384435, 'time_algorithm_update': 0.004455881756405497, 'loss': 0.5524065708524959, 'time_step': 0.004701825075371321, 'init_value': -24.574209213256836, 'ave_value': -26.146044244798453, 'soft_opc': nan} step=13416




2022-04-20 16:48.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164739/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:48.44 [info     ] FQE_20220420164739: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.0001704581948213799, 'time_algorithm_update': 0.004115262003832085, 'loss': 0.5668272520577925, 'time_step': 0.004363044749858768, 'init_value': -25.20781707763672, 'ave_value': -26.641292909646893, 'soft_opc': nan} step=13760




2022-04-20 16:48.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164739/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:48.45 [info     ] FQE_20220420164739: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00017113325207732444, 'time_algorithm_update': 0.004571907742078914, 'loss': 0.5805707674928356, 'time_step': 0.004817334025405174, 'init_value': -25.054580688476562, 'ave_value': -26.780610954707807, 'soft_opc': nan} step=14104




2022-04-20 16:48.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164739/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:48.47 [info     ] FQE_20220420164739: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.0001659511133681896, 'time_algorithm_update': 0.004072729931321255, 'loss': 0.5855462259483025, 'time_step': 0.0043174222458240595, 'init_value': -25.666967391967773, 'ave_value': -27.502886327805818, 'soft_opc': nan} step=14448




2022-04-20 16:48.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164739/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:48.49 [info     ] FQE_20220420164739: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00016955719437710074, 'time_algorithm_update': 0.004551978998405989, 'loss': 0.6071195141683051, 'time_step': 0.004799933627594349, 'init_value': -25.61346435546875, 'ave_value': -27.66943572440126, 'soft_opc': nan} step=14792




2022-04-20 16:48.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164739/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:48.50 [info     ] FQE_20220420164739: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00017005690308504327, 'time_algorithm_update': 0.004134244697038518, 'loss': 0.6114822873022667, 'time_step': 0.004380296136057654, 'init_value': -26.365520477294922, 'ave_value': -28.37972695652966, 'soft_opc': nan} step=15136




2022-04-20 16:48.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164739/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:48.52 [info     ] FQE_20220420164739: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00016883292863535327, 'time_algorithm_update': 0.0042061618594236155, 'loss': 0.6355037923339147, 'time_step': 0.004452767067177351, 'init_value': -26.646387100219727, 'ave_value': -28.59902332819797, 'soft_opc': nan} step=15480




2022-04-20 16:48.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164739/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:48.54 [info     ] FQE_20220420164739: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00016992521840472554, 'time_algorithm_update': 0.004555107549179432, 'loss': 0.6527573562682022, 'time_step': 0.004800368187039397, 'init_value': -27.315021514892578, 'ave_value': -29.39275615943579, 'soft_opc': nan} step=15824




2022-04-20 16:48.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164739/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:48.56 [info     ] FQE_20220420164739: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00016763182573540267, 'time_algorithm_update': 0.0041280437347500825, 'loss': 0.676449331205858, 'time_step': 0.004369285217551298, 'init_value': -27.615089416503906, 'ave_value': -29.79876390846933, 'soft_opc': nan} step=16168




2022-04-20 16:48.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164739/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:48.57 [info     ] FQE_20220420164739: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.0001722269280012264, 'time_algorithm_update': 0.004552630491034929, 'loss': 0.6858906569554969, 'time_step': 0.004799365304237188, 'init_value': -27.601131439208984, 'ave_value': -30.21672331698843, 'soft_opc': nan} step=16512




2022-04-20 16:48.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164739/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:48.59 [info     ] FQE_20220420164739: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00016433624334113542, 'time_algorithm_update': 0.004072343887284745, 'loss': 0.7028835147868322, 'time_step': 0.004312343375627385, 'init_value': -28.563175201416016, 'ave_value': -31.20960571204757, 'soft_opc': nan} step=16856




2022-04-20 16:48.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164739/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 16:49.01 [info     ] FQE_20220420164739: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00016979838526526162, 'time_algorithm_update': 0.004499876221945119, 'loss': 0.7224655100198593, 'time_step': 0.0047459207301916075, 'init_value': -29.254554748535156, 'ave_value': -32.11382874126907, 'soft_opc': nan} step=17200




2022-04-20 16:49.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420164739/model_17200.pt
search iteration:  9
using hyper params:  [0.0013388479845037537, 0.003637010775448851, 4.3273546726487756e-05, 3]
2022-04-20 16:49.01 [debug    ] RoundIterator is selected.
2022-04-20 16:49.01 [info     ] Directory is created at d3rlpy_logs/CQL_20220420164901
2022-04-20 16:49.01 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 16:49.01 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 16:49.01 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220420164901/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0013388479845037537, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, '

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:49.10 [info     ] CQL_20220420164901: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.0003383382719162612, 'time_algorithm_update': 0.025630886094611987, 'temp_loss': 4.703487645812899, 'temp': 0.9927312229809008, 'alpha_loss': -15.240129152933756, 'alpha': 1.0165983089926647, 'critic_loss': 29.643391664962323, 'actor_loss': 1.2830962748457377, 'time_step': 0.02606719488288924, 'td_error': 4.509057128846437, 'init_value': -6.545088291168213, 'ave_value': -3.598045688396202} step=342
2022-04-20 16:49.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420164901/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:49.19 [info     ] CQL_20220420164901: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.0003160879625911601, 'time_algorithm_update': 0.024533253664161727, 'temp_loss': 4.174210526789838, 'temp': 0.9787307709281208, 'alpha_loss': -8.121280085970783, 'alpha': 1.0427911539523922, 'critic_loss': 18.71485433801573, 'actor_loss': 4.875069374926606, 'time_step': 0.024939786621004517, 'td_error': 3.6524888857821476, 'init_value': -10.85206413269043, 'ave_value': -5.686047461455738} step=684
2022-04-20 16:49.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420164901/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:49.28 [info     ] CQL_20220420164901: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.0003445476119281256, 'time_algorithm_update': 0.02598170997106541, 'temp_loss': 3.439452141349079, 'temp': 0.9662653761997557, 'alpha_loss': -4.712995368834824, 'alpha': 1.0622559482591194, 'critic_loss': 27.029167688380905, 'actor_loss': 8.329660529978792, 'time_step': 0.026427248068023147, 'td_error': 4.803013768173872, 'init_value': -16.316625595092773, 'ave_value': -8.355329213499486} step=1026
2022-04-20 16:49.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420164901/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:49.38 [info     ] CQL_20220420164901: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.0003376258047003495, 'time_algorithm_update': 0.025690538144251058, 'temp_loss': 2.9449048739427712, 'temp': 0.954873287886904, 'alpha_loss': -1.9067784437163575, 'alpha': 1.0752307971318562, 'critic_loss': 42.63985508924339, 'actor_loss': 12.01319232182196, 'time_step': 0.026129452108639723, 'td_error': 6.936380542199836, 'init_value': -21.94198226928711, 'ave_value': -11.086887023657024} step=1368
2022-04-20 16:49.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420164901/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:49.47 [info     ] CQL_20220420164901: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.0003373783234267207, 'time_algorithm_update': 0.025787200146948384, 'temp_loss': 2.5369752952229905, 'temp': 0.9442064394379219, 'alpha_loss': 0.5122333055117702, 'alpha': 1.078524628926439, 'critic_loss': 62.02694471258866, 'actor_loss': 15.568425047467327, 'time_step': 0.02622239561805948, 'td_error': 9.08172916401288, 'init_value': -27.424585342407227, 'ave_value': -13.514701692872757} step=1710
2022-04-20 16:49.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420164901/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:49.57 [info     ] CQL_20220420164901: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.00034122299729732043, 'time_algorithm_update': 0.02574614683787028, 'temp_loss': 2.184555010837421, 'temp': 0.9341942427451151, 'alpha_loss': 2.5727908830022255, 'alpha': 1.068523291258784, 'critic_loss': 83.39565625107079, 'actor_loss': 19.11629579220599, 'time_step': 0.026185936397976346, 'td_error': 12.275103683020534, 'init_value': -34.336830139160156, 'ave_value': -17.323006124226627} step=2052
2022-04-20 16:49.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420164901/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:50.06 [info     ] CQL_20220420164901: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.0003419724124216894, 'time_algorithm_update': 0.025965570706373068, 'temp_loss': 1.903694712976266, 'temp': 0.924650869348593, 'alpha_loss': 4.226332090228622, 'alpha': 1.044759300020006, 'critic_loss': 107.59506140814887, 'actor_loss': 22.57962175558882, 'time_step': 0.026406970637583592, 'td_error': 14.817204631975587, 'init_value': -38.975242614746094, 'ave_value': -18.985612969233376} step=2394
2022-04-20 16:50.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420164901/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:50.15 [info     ] CQL_20220420164901: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.00034519594315199825, 'time_algorithm_update': 0.025517004972312882, 'temp_loss': 1.6343124977677885, 'temp': 0.9156104322762517, 'alpha_loss': 5.645106003298397, 'alpha': 1.0097156494681598, 'critic_loss': 131.71285361574408, 'actor_loss': 25.97817790159705, 'time_step': 0.02596069846236915, 'td_error': 17.42581975832231, 'init_value': -43.71650314331055, 'ave_value': -22.028702505703446} step=2736
2022-04-20 16:50.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420164901/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:50.25 [info     ] CQL_20220420164901: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.00034285009953013637, 'time_algorithm_update': 0.025901509307281315, 'temp_loss': 1.4257098547896447, 'temp': 0.9070249705286751, 'alpha_loss': 6.80020641025744, 'alpha': 0.9694665076091276, 'critic_loss': 158.24084173726757, 'actor_loss': 29.39946610746328, 'time_step': 0.02634419195833262, 'td_error': 20.15896433567608, 'init_value': -49.82370376586914, 'ave_value': -24.46932801869673} step=3078
2022-04-20 16:50.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420164901/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:50.34 [info     ] CQL_20220420164901: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.0003339484421133298, 'time_algorithm_update': 0.025811988707871464, 'temp_loss': 1.2190468813243664, 'temp': 0.8987299026104442, 'alpha_loss': 7.5446809049238235, 'alpha': 0.9292661730657544, 'critic_loss': 188.5203629432366, 'actor_loss': 32.78391811303925, 'time_step': 0.026244434696889064, 'td_error': 23.237597061955984, 'init_value': -54.739479064941406, 'ave_value': -27.893276856855767} step=3420
2022-04-20 16:50.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420164901/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:50.43 [info     ] CQL_20220420164901: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.00034079774778488783, 'time_algorithm_update': 0.024436190811514158, 'temp_loss': 1.0244727408328251, 'temp': 0.8910704690810533, 'alpha_loss': 8.047811154036493, 'alpha': 0.8910314759664368, 'critic_loss': 216.79239061143664, 'actor_loss': 35.920466986315986, 'time_step': 0.024877997866848058, 'td_error': 27.40221976613034, 'init_value': -62.41644287109375, 'ave_value': -31.205815692705077} step=3762
2022-04-20 16:50.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420164901/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:50.52 [info     ] CQL_20220420164901: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.000335662685639677, 'time_algorithm_update': 0.02437427517963432, 'temp_loss': 0.8739047012351758, 'temp': 0.8836727029050303, 'alpha_loss': 8.32858868091427, 'alpha': 0.8560175568039654, 'critic_loss': 244.92544466431377, 'actor_loss': 39.11495619768288, 'time_step': 0.024806928913495695, 'td_error': 26.933950430022083, 'init_value': -65.53632354736328, 'ave_value': -32.75297650493883} step=4104
2022-04-20 16:50.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420164901/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:51.01 [info     ] CQL_20220420164901: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.00033837800834611147, 'time_algorithm_update': 0.024127585148950765, 'temp_loss': 0.7037714529804319, 'temp': 0.8769404403647484, 'alpha_loss': 8.657887747413234, 'alpha': 0.8229483486267558, 'critic_loss': 271.5517522800736, 'actor_loss': 41.96685138501619, 'time_step': 0.024563587199874788, 'td_error': 32.43512019136005, 'init_value': -71.03421783447266, 'ave_value': -35.56239059188613} step=4446
2022-04-20 16:51.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420164901/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:51.10 [info     ] CQL_20220420164901: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.0003354765518366942, 'time_algorithm_update': 0.024319214430468823, 'temp_loss': 0.565839876753022, 'temp': 0.8708352896205166, 'alpha_loss': 8.919772019860341, 'alpha': 0.7919656888783326, 'critic_loss': 297.53714836968317, 'actor_loss': 44.78498012698882, 'time_step': 0.024753388605619733, 'td_error': 37.05890527501701, 'init_value': -78.0792007446289, 'ave_value': -38.55180200320106} step=4788
2022-04-20 16:51.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420164901/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:51.18 [info     ] CQL_20220420164901: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.0003421334495321352, 'time_algorithm_update': 0.02426621927852519, 'temp_loss': 0.42960368332109955, 'temp': 0.8653929271893195, 'alpha_loss': 8.988355826216134, 'alpha': 0.7629511779860446, 'critic_loss': 324.38233894214295, 'actor_loss': 47.49434662980643, 'time_step': 0.02470601580993474, 'td_error': 39.087955195280244, 'init_value': -82.83747863769531, 'ave_value': -40.49884623855889} step=5130
2022-04-20 16:51.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420164901/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:51.27 [info     ] CQL_20220420164901: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.0003328002684297617, 'time_algorithm_update': 0.023787198708071347, 'temp_loss': 0.34836847506659596, 'temp': 0.8604050756197924, 'alpha_loss': 9.003527217441135, 'alpha': 0.7357167178078702, 'critic_loss': 349.0373116654959, 'actor_loss': 50.06183714615671, 'time_step': 0.02421813680414568, 'td_error': 40.49546220016749, 'init_value': -87.8795166015625, 'ave_value': -44.64988847991905} step=5472
2022-04-20 16:51.27 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420164901/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:51.36 [info     ] CQL_20220420164901: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.00033717894414712115, 'time_algorithm_update': 0.02433280359234726, 'temp_loss': 0.22381656366939606, 'temp': 0.8564835894874662, 'alpha_loss': 9.016290454139487, 'alpha': 0.7098579436366321, 'critic_loss': 373.31131713153326, 'actor_loss': 52.529582787675466, 'time_step': 0.024771187040540908, 'td_error': 45.61045768072478, 'init_value': -92.27008819580078, 'ave_value': -45.28595502438846} step=5814
2022-04-20 16:51.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420164901/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:51.45 [info     ] CQL_20220420164901: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.0003341624611302426, 'time_algorithm_update': 0.024265926483778927, 'temp_loss': 0.1998546335475835, 'temp': 0.8529736195739946, 'alpha_loss': 8.954740687420493, 'alpha': 0.6853785568859145, 'critic_loss': 397.5060265970509, 'actor_loss': 54.97066222296821, 'time_step': 0.024697989051104985, 'td_error': 48.12575367824074, 'init_value': -97.8981704711914, 'ave_value': -47.6436331655904} step=6156
2022-04-20 16:51.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420164901/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:51.54 [info     ] CQL_20220420164901: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.0003344845353511342, 'time_algorithm_update': 0.02429662531579447, 'temp_loss': 0.10598232419017636, 'temp': 0.8499202393648917, 'alpha_loss': 8.942285509834512, 'alpha': 0.66215629744948, 'critic_loss': 420.9845954047309, 'actor_loss': 57.28155360305519, 'time_step': 0.02472934179138719, 'td_error': 51.53665530111977, 'init_value': -101.32310485839844, 'ave_value': -50.17257856513466} step=6498
2022-04-20 16:51.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420164901/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:52.03 [info     ] CQL_20220420164901: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.0003349711323342128, 'time_algorithm_update': 0.024265557701824703, 'temp_loss': 0.027083072604404554, 'temp': 0.8484742268484239, 'alpha_loss': 8.854692994502553, 'alpha': 0.6398421827464076, 'critic_loss': 442.92468234948944, 'actor_loss': 59.447055783188134, 'time_step': 0.024698873012386567, 'td_error': 50.47168333511575, 'init_value': -105.07210540771484, 'ave_value': -50.798219599766774} step=6840
2022-04-20 16:52.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420164901/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:52.11 [info     ] CQL_20220420164901: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.00033516772309241937, 'time_algorithm_update': 0.024325010372184173, 'temp_loss': -0.007137139970491155, 'temp': 0.8483026588869373, 'alpha_loss': 8.666073464510733, 'alpha': 0.6185945824921479, 'critic_loss': 463.5473964758087, 'actor_loss': 61.42285911381593, 'time_step': 0.024760597630550985, 'td_error': 51.70983062522473, 'init_value': -107.08259582519531, 'ave_value': -52.82454391281347} step=7182
2022-04-20 16:52.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420164901/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:52.20 [info     ] CQL_20220420164901: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.0003371754584953799, 'time_algorithm_update': 0.024401582472505626, 'temp_loss': -0.08850143972392145, 'temp': 0.8496196404186606, 'alpha_loss': 8.630203612366614, 'alpha': 0.5983051689047563, 'critic_loss': 480.22839685629685, 'actor_loss': 63.18992094965706, 'time_step': 0.02483829350499382, 'td_error': 57.872267546605094, 'init_value': -114.63902282714844, 'ave_value': -54.774422762961805} step=7524
2022-04-20 16:52.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420164901/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:52.29 [info     ] CQL_20220420164901: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.0003417249311480606, 'time_algorithm_update': 0.02435492772107933, 'temp_loss': -0.1108097167391526, 'temp': 0.8526139959954379, 'alpha_loss': 8.395932719024302, 'alpha': 0.5788702940383152, 'critic_loss': 497.665807980543, 'actor_loss': 64.98323643556115, 'time_step': 0.02479757621274357, 'td_error': 54.83934610536079, 'init_value': -116.40775299072266, 'ave_value': -56.563958444874565} step=7866
2022-04-20 16:52.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420164901/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:52.38 [info     ] CQL_20220420164901: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.0003357637695401733, 'time_algorithm_update': 0.024231979721470884, 'temp_loss': -0.13947291592713337, 'temp': 0.8569403472002487, 'alpha_loss': 8.275639419667204, 'alpha': 0.5600341060008222, 'critic_loss': 514.0963061594823, 'actor_loss': 66.58862200954505, 'time_step': 0.02466268567313925, 'td_error': 54.19469816696905, 'init_value': -118.11262512207031, 'ave_value': -56.73019022285804} step=8208
2022-04-20 16:52.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420164901/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:52.47 [info     ] CQL_20220420164901: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.0003437459120276379, 'time_algorithm_update': 0.024331304064968175, 'temp_loss': -0.16543318183101408, 'temp': 0.8623350323640813, 'alpha_loss': 8.036146937755117, 'alpha': 0.542239017479601, 'critic_loss': 529.1165717052437, 'actor_loss': 68.00036795097485, 'time_step': 0.024775084696317975, 'td_error': 57.40939111070615, 'init_value': -121.78187561035156, 'ave_value': -58.40782335461126} step=8550
2022-04-20 16:52.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420164901/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:52.56 [info     ] CQL_20220420164901: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.0003354117187143069, 'time_algorithm_update': 0.024425509380318268, 'temp_loss': -0.15872494458106526, 'temp': 0.8685814060314357, 'alpha_loss': 7.804784840310526, 'alpha': 0.5250890425771301, 'critic_loss': 543.2706611142521, 'actor_loss': 69.44209162394206, 'time_step': 0.024859370543942813, 'td_error': 62.84893256175697, 'init_value': -125.2539291381836, 'ave_value': -60.272251371373464} step=8892
2022-04-20 16:52.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420164901/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:53.05 [info     ] CQL_20220420164901: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.00034426945691917374, 'time_algorithm_update': 0.024506146447700366, 'temp_loss': -0.12370861278117051, 'temp': 0.8733392563479686, 'alpha_loss': 7.557869225217585, 'alpha': 0.5087560863174193, 'critic_loss': 559.508256075675, 'actor_loss': 70.88086706016496, 'time_step': 0.02494761818333676, 'td_error': 59.68070131613124, 'init_value': -129.08583068847656, 'ave_value': -62.22429871158691} step=9234
2022-04-20 16:53.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420164901/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:53.14 [info     ] CQL_20220420164901: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.00034087582638389185, 'time_algorithm_update': 0.025577335329780803, 'temp_loss': -0.11943296412801185, 'temp': 0.8786840339501699, 'alpha_loss': 7.380207389418842, 'alpha': 0.49289561642540825, 'critic_loss': 571.3853129782872, 'actor_loss': 72.06893472504197, 'time_step': 0.026020240365413196, 'td_error': 60.33737712907731, 'init_value': -131.67662048339844, 'ave_value': -63.321219456939545} step=9576
2022-04-20 16:53.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420164901/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:53.24 [info     ] CQL_20220420164901: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.00034168170906646904, 'time_algorithm_update': 0.02594908845354939, 'temp_loss': -0.12142124331463673, 'temp': 0.8837389567790673, 'alpha_loss': 7.153502248184026, 'alpha': 0.4775741104668344, 'critic_loss': 583.2396722983199, 'actor_loss': 73.24775886535645, 'time_step': 0.02638854129969725, 'td_error': 60.79777288421456, 'init_value': -132.82339477539062, 'ave_value': -63.48635778122523} step=9918
2022-04-20 16:53.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420164901/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:53.33 [info     ] CQL_20220420164901: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.00034450717836792706, 'time_algorithm_update': 0.025720642324079546, 'temp_loss': -0.10469343807352216, 'temp': 0.8877751149280727, 'alpha_loss': 6.921380975093061, 'alpha': 0.4630152100359487, 'critic_loss': 594.87516320658, 'actor_loss': 74.41066245586552, 'time_step': 0.02616725748742533, 'td_error': 64.99281513914558, 'init_value': -133.30551147460938, 'ave_value': -64.42379904216608} step=10260
2022-04-20 16:53.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420164901/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:53.42 [info     ] CQL_20220420164901: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.00034036831549036573, 'time_algorithm_update': 0.025895477735508255, 'temp_loss': -0.10026831086180364, 'temp': 0.8921715116988845, 'alpha_loss': 6.790707975800275, 'alpha': 0.4487799025244183, 'critic_loss': 605.4200996265076, 'actor_loss': 75.4150781129536, 'time_step': 0.026337475804557576, 'td_error': 67.48848456652227, 'init_value': -134.37881469726562, 'ave_value': -64.77980982411418} step=10602
2022-04-20 16:53.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420164901/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:53.52 [info     ] CQL_20220420164901: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.00034687951294302245, 'time_algorithm_update': 0.025801539421081543, 'temp_loss': -0.0665115909487532, 'temp': 0.8956946211251599, 'alpha_loss': 6.529874359655102, 'alpha': 0.43492307291741955, 'critic_loss': 616.9089154695209, 'actor_loss': 76.42924372355144, 'time_step': 0.026250721418369583, 'td_error': 62.24480853344211, 'init_value': -137.86117553710938, 'ave_value': -66.80908212313646} step=10944
2022-04-20 16:53.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420164901/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:54.01 [info     ] CQL_20220420164901: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.00033495649259689955, 'time_algorithm_update': 0.025697839190388285, 'temp_loss': -0.07736260558298805, 'temp': 0.8979914456780194, 'alpha_loss': 6.319186258037188, 'alpha': 0.42173131977954104, 'critic_loss': 626.9151163380049, 'actor_loss': 77.41855960422092, 'time_step': 0.026132076804400883, 'td_error': 64.29025730381977, 'init_value': -136.8411407470703, 'ave_value': -65.79961700767815} step=11286
2022-04-20 16:54.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420164901/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:54.11 [info     ] CQL_20220420164901: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.000345172937850506, 'time_algorithm_update': 0.025713169086746306, 'temp_loss': -0.05828206001492272, 'temp': 0.9017335369921567, 'alpha_loss': 6.130481820357473, 'alpha': 0.4088873198332145, 'critic_loss': 634.976968151784, 'actor_loss': 78.24621225100512, 'time_step': 0.026159835837737858, 'td_error': 68.83107085708859, 'init_value': -141.2298126220703, 'ave_value': -67.75827907084881} step=11628
2022-04-20 16:54.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420164901/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:54.20 [info     ] CQL_20220420164901: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.00033355735198796146, 'time_algorithm_update': 0.025625812379937423, 'temp_loss': -0.05764113003333584, 'temp': 0.9040321242391017, 'alpha_loss': 5.954513646705806, 'alpha': 0.39650665650590816, 'critic_loss': 644.3269740768343, 'actor_loss': 79.18994495324921, 'time_step': 0.02606025007035997, 'td_error': 69.07651137681871, 'init_value': -142.14833068847656, 'ave_value': -68.71377739411082} step=11970
2022-04-20 16:54.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420164901/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:54.29 [info     ] CQL_20220420164901: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.0003362182985272324, 'time_algorithm_update': 0.0257616168574283, 'temp_loss': -0.025276055646657247, 'temp': 0.9059066085787545, 'alpha_loss': 5.770798677589461, 'alpha': 0.3844293199958857, 'critic_loss': 652.2559589586759, 'actor_loss': 80.06933154279028, 'time_step': 0.026197839898672717, 'td_error': 66.37763256278473, 'init_value': -144.0673065185547, 'ave_value': -69.98196648684709} step=12312
2022-04-20 16:54.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420164901/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:54.39 [info     ] CQL_20220420164901: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.00033886460532919007, 'time_algorithm_update': 0.025577034169470356, 'temp_loss': -0.03632477109935897, 'temp': 0.9070748613591779, 'alpha_loss': 5.628969479025456, 'alpha': 0.37272362402308057, 'critic_loss': 661.2691484417832, 'actor_loss': 80.88158954374971, 'time_step': 0.026013460075646117, 'td_error': 69.32274946763276, 'init_value': -146.63218688964844, 'ave_value': -71.1557555670553} step=12654
2022-04-20 16:54.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420164901/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:54.48 [info     ] CQL_20220420164901: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.0003399423688475849, 'time_algorithm_update': 0.025650179176999813, 'temp_loss': 0.0055951428387248725, 'temp': 0.9079241953049487, 'alpha_loss': 5.420213720254731, 'alpha': 0.361397689894626, 'critic_loss': 670.0067973889803, 'actor_loss': 81.62852312946877, 'time_step': 0.026089749838176527, 'td_error': 67.59955072308598, 'init_value': -146.7305450439453, 'ave_value': -70.74565353139161} step=12996
2022-04-20 16:54.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420164901/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:54.57 [info     ] CQL_20220420164901: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.0003389468667102836, 'time_algorithm_update': 0.025539916161208126, 'temp_loss': -0.045656353725414524, 'temp': 0.909171744396812, 'alpha_loss': 5.232064462544625, 'alpha': 0.35047000271883627, 'critic_loss': 675.5902638574789, 'actor_loss': 82.30880255448191, 'time_step': 0.025976470339367962, 'td_error': 67.14164377373814, 'init_value': -148.42514038085938, 'ave_value': -72.99765873082988} step=13338
2022-04-20 16:54.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420164901/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:55.07 [info     ] CQL_20220420164901: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.00033780148154810856, 'time_algorithm_update': 0.025601727920666076, 'temp_loss': -0.006727869143132229, 'temp': 0.9100606349128032, 'alpha_loss': 5.085985140493738, 'alpha': 0.3398752538432852, 'critic_loss': 682.4601872315881, 'actor_loss': 83.01191497267338, 'time_step': 0.026037304024947316, 'td_error': 65.03213521172752, 'init_value': -146.9569091796875, 'ave_value': -71.73026942606042} step=13680
2022-04-20 16:55.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420164901/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:55.16 [info     ] CQL_20220420164901: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.000336679798817774, 'time_algorithm_update': 0.02557564618294699, 'temp_loss': -0.0052105133252882815, 'temp': 0.910654076010163, 'alpha_loss': 4.8562960073961845, 'alpha': 0.32969363992325745, 'critic_loss': 687.6066359135143, 'actor_loss': 83.61907811750446, 'time_step': 0.02601176186611778, 'td_error': 69.17401684492707, 'init_value': -149.07212829589844, 'ave_value': -72.43877456514014} step=14022
2022-04-20 16:55.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420164901/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:55.25 [info     ] CQL_20220420164901: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.0003338410840396993, 'time_algorithm_update': 0.025830307202032436, 'temp_loss': 0.031045312987782105, 'temp': 0.9102053412219935, 'alpha_loss': 4.7101935461947795, 'alpha': 0.31980922042626386, 'critic_loss': 693.8682151035956, 'actor_loss': 84.17760529992177, 'time_step': 0.026263860234043056, 'td_error': 68.4353054045095, 'init_value': -151.46591186523438, 'ave_value': -74.16996416283352} step=14364
2022-04-20 16:55.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420164901/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:55.35 [info     ] CQL_20220420164901: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.000342403238976908, 'time_algorithm_update': 0.025342301318519993, 'temp_loss': 0.06097124698871409, 'temp': 0.9079299377419098, 'alpha_loss': 4.556101224575824, 'alpha': 0.31019274158435955, 'critic_loss': 697.4198110480057, 'actor_loss': 84.78124569452297, 'time_step': 0.025784012169865835, 'td_error': 68.5778109370759, 'init_value': -152.75967407226562, 'ave_value': -74.7851327917396} step=14706
2022-04-20 16:55.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420164901/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:55.44 [info     ] CQL_20220420164901: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.0003413268697192097, 'time_algorithm_update': 0.025793498719644824, 'temp_loss': 0.04065393478941848, 'temp': 0.9056708148696966, 'alpha_loss': 4.411553902235645, 'alpha': 0.3009281858191853, 'critic_loss': 701.4711926555076, 'actor_loss': 85.2228554842765, 'time_step': 0.026233797184905115, 'td_error': 67.4442299270698, 'init_value': -151.5232696533203, 'ave_value': -74.2130092769982} step=15048
2022-04-20 16:55.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420164901/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:55.53 [info     ] CQL_20220420164901: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.000332708944354141, 'time_algorithm_update': 0.025514684922513905, 'temp_loss': 0.06970658228999516, 'temp': 0.9031327797306908, 'alpha_loss': 4.262597294578775, 'alpha': 0.2918731184207905, 'critic_loss': 706.5913334004363, 'actor_loss': 85.7622023911504, 'time_step': 0.025946708450540465, 'td_error': 68.07083530749573, 'init_value': -156.40150451660156, 'ave_value': -76.50783910232622} step=15390
2022-04-20 16:55.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420164901/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:56.03 [info     ] CQL_20220420164901: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.00034486132058483815, 'time_algorithm_update': 0.025769912011442127, 'temp_loss': 0.07487631744939333, 'temp': 0.9001791946023529, 'alpha_loss': 4.113044844036214, 'alpha': 0.28314773110966934, 'critic_loss': 710.4350071957236, 'actor_loss': 86.18433266355281, 'time_step': 0.026214956540113302, 'td_error': 66.82371187688712, 'init_value': -154.590087890625, 'ave_value': -75.83798517216836} step=15732
2022-04-20 16:56.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420164901/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:56.12 [info     ] CQL_20220420164901: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.00033628870869240567, 'time_algorithm_update': 0.02455045256698341, 'temp_loss': 0.08503257104179315, 'temp': 0.8961915239604593, 'alpha_loss': 3.9410772902226587, 'alpha': 0.27470744484000736, 'critic_loss': 714.3602398431789, 'actor_loss': 86.57266888980978, 'time_step': 0.024986974677147222, 'td_error': 65.8862480502555, 'init_value': -152.86685180664062, 'ave_value': -75.86429141383853} step=16074
2022-04-20 16:56.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420164901/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:56.20 [info     ] CQL_20220420164901: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.00033519839682774236, 'time_algorithm_update': 0.024197724130418565, 'temp_loss': 0.06540408965243756, 'temp': 0.8930153644572921, 'alpha_loss': 3.8180406776785154, 'alpha': 0.26652185435880693, 'critic_loss': 716.8384547540319, 'actor_loss': 86.86349518536127, 'time_step': 0.02463259473878738, 'td_error': 67.11858355819541, 'init_value': -153.52066040039062, 'ave_value': -76.16583283243148} step=16416
2022-04-20 16:56.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420164901/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:56.29 [info     ] CQL_20220420164901: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.00033778196189835757, 'time_algorithm_update': 0.02417393664867557, 'temp_loss': 0.09419596215372371, 'temp': 0.8892978581420162, 'alpha_loss': 3.6498304947077878, 'alpha': 0.2585676138164007, 'critic_loss': 719.019242136102, 'actor_loss': 87.32709326939276, 'time_step': 0.024609791605096114, 'td_error': 66.72141670074018, 'init_value': -154.7367401123047, 'ave_value': -77.38501988660913} step=16758
2022-04-20 16:56.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420164901/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:56.38 [info     ] CQL_20220420164901: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.0003303414896914833, 'time_algorithm_update': 0.024164486349674694, 'temp_loss': 0.06732761613719644, 'temp': 0.8860435803034152, 'alpha_loss': 3.5445807886402507, 'alpha': 0.2509061151254944, 'critic_loss': 719.9496540270353, 'actor_loss': 87.45624754163954, 'time_step': 0.024588365303842646, 'td_error': 67.78121367162127, 'init_value': -154.6940155029297, 'ave_value': -76.21675014069622} step=17100
2022-04-20 16:56.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420164901/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.519

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 16:56.39 [info     ] FQE_20220420165638: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00015547189367822855, 'time_algorithm_update': 0.003509077681116311, 'loss': 0.00803893960097872, 'time_step': 0.0037371652672089726, 'init_value': 0.024366402998566628, 'ave_value': 0.07213985118301804, 'soft_opc': nan} step=166




2022-04-20 16:56.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165638/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.40 [info     ] FQE_20220420165638: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00015348411468138178, 'time_algorithm_update': 0.003382804882095521, 'loss': 0.0060016734214185415, 'time_step': 0.003607781536607857, 'init_value': -0.126821830868721, 'ave_value': -0.02829925707281307, 'soft_opc': nan} step=332




2022-04-20 16:56.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165638/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.40 [info     ] FQE_20220420165638: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00015676021575927734, 'time_algorithm_update': 0.0034417172512376166, 'loss': 0.005281509566457426, 'time_step': 0.0036730005080441394, 'init_value': -0.16210050880908966, 'ave_value': -0.042319111690042654, 'soft_opc': nan} step=498




2022-04-20 16:56.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165638/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.41 [info     ] FQE_20220420165638: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.0001547365303499153, 'time_algorithm_update': 0.003455559891390513, 'loss': 0.005111153471754886, 'time_step': 0.003684471888714526, 'init_value': -0.23688077926635742, 'ave_value': -0.07978286609129125, 'soft_opc': nan} step=664




2022-04-20 16:56.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165638/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.42 [info     ] FQE_20220420165638: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00015538715454469244, 'time_algorithm_update': 0.0035009915570178665, 'loss': 0.00483222918007074, 'time_step': 0.003729790090078331, 'init_value': -0.29017913341522217, 'ave_value': -0.12224061288241599, 'soft_opc': nan} step=830




2022-04-20 16:56.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165638/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.42 [info     ] FQE_20220420165638: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.0001535961426884295, 'time_algorithm_update': 0.003462047462003777, 'loss': 0.004354471102334469, 'time_step': 0.0036841817649014025, 'init_value': -0.3051838278770447, 'ave_value': -0.12328784980450395, 'soft_opc': nan} step=996




2022-04-20 16:56.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165638/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.43 [info     ] FQE_20220420165638: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00015561695558479033, 'time_algorithm_update': 0.0033425983176173933, 'loss': 0.004261127827374871, 'time_step': 0.0035720718912331455, 'init_value': -0.3248429000377655, 'ave_value': -0.12916540307966104, 'soft_opc': nan} step=1162




2022-04-20 16:56.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165638/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.44 [info     ] FQE_20220420165638: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00015253474913447737, 'time_algorithm_update': 0.003418816141335361, 'loss': 0.003919945165784244, 'time_step': 0.003641806453107351, 'init_value': -0.3960646092891693, 'ave_value': -0.1883000319571914, 'soft_opc': nan} step=1328




2022-04-20 16:56.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165638/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.44 [info     ] FQE_20220420165638: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.0001517735331891531, 'time_algorithm_update': 0.0034317467586103692, 'loss': 0.003547953650024327, 'time_step': 0.0036475069551582797, 'init_value': -0.43850573897361755, 'ave_value': -0.21476470742265585, 'soft_opc': nan} step=1494




2022-04-20 16:56.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165638/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.45 [info     ] FQE_20220420165638: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00015506112431905357, 'time_algorithm_update': 0.003448642880083567, 'loss': 0.003502699550022427, 'time_step': 0.0036727563444390356, 'init_value': -0.5101264715194702, 'ave_value': -0.27617886448950246, 'soft_opc': nan} step=1660




2022-04-20 16:56.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165638/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.46 [info     ] FQE_20220420165638: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00015525932771613798, 'time_algorithm_update': 0.003464484789285315, 'loss': 0.00334898958751281, 'time_step': 0.003691920314926699, 'init_value': -0.5703843832015991, 'ave_value': -0.32282778193989586, 'soft_opc': nan} step=1826




2022-04-20 16:56.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165638/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.46 [info     ] FQE_20220420165638: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00016100291746208467, 'time_algorithm_update': 0.003533522766756724, 'loss': 0.0031130916722320826, 'time_step': 0.003766907266823642, 'init_value': -0.6161177158355713, 'ave_value': -0.3530437013574851, 'soft_opc': nan} step=1992




2022-04-20 16:56.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165638/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.47 [info     ] FQE_20220420165638: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00015477961804493363, 'time_algorithm_update': 0.0034359707889786684, 'loss': 0.0030848798229841583, 'time_step': 0.0036615147648087465, 'init_value': -0.6849527359008789, 'ave_value': -0.4027963782711005, 'soft_opc': nan} step=2158




2022-04-20 16:56.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165638/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.48 [info     ] FQE_20220420165638: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00015405574476862527, 'time_algorithm_update': 0.003404796841632889, 'loss': 0.003029350222211825, 'time_step': 0.003633500581764313, 'init_value': -0.7982548475265503, 'ave_value': -0.48609728374023425, 'soft_opc': nan} step=2324




2022-04-20 16:56.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165638/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.48 [info     ] FQE_20220420165638: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00015624172716255648, 'time_algorithm_update': 0.003431901874312435, 'loss': 0.0029654513538888015, 'time_step': 0.003658933811877147, 'init_value': -0.8497371077537537, 'ave_value': -0.5275198496710342, 'soft_opc': nan} step=2490




2022-04-20 16:56.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165638/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.49 [info     ] FQE_20220420165638: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.0001547982893794416, 'time_algorithm_update': 0.003426227224878518, 'loss': 0.003103823065813855, 'time_step': 0.0036518559398421324, 'init_value': -0.9005101919174194, 'ave_value': -0.5727188137267624, 'soft_opc': nan} step=2656




2022-04-20 16:56.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165638/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.50 [info     ] FQE_20220420165638: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00016186467136245175, 'time_algorithm_update': 0.0034864006272281507, 'loss': 0.003222285446581846, 'time_step': 0.0037211768598441617, 'init_value': -0.9834315180778503, 'ave_value': -0.6549263406276434, 'soft_opc': nan} step=2822




2022-04-20 16:56.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165638/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.50 [info     ] FQE_20220420165638: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00015845787094300053, 'time_algorithm_update': 0.003010000091001212, 'loss': 0.0033843902305065914, 'time_step': 0.0032392467360898673, 'init_value': -1.05771803855896, 'ave_value': -0.7225160341810536, 'soft_opc': nan} step=2988




2022-04-20 16:56.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165638/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.51 [info     ] FQE_20220420165638: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.0001481771469116211, 'time_algorithm_update': 0.0029074082891625093, 'loss': 0.003649690879245165, 'time_step': 0.0031261616442576946, 'init_value': -1.0755144357681274, 'ave_value': -0.7270917073127118, 'soft_opc': nan} step=3154




2022-04-20 16:56.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165638/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.52 [info     ] FQE_20220420165638: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00015446364161479905, 'time_algorithm_update': 0.003517861825874053, 'loss': 0.0036096648125157477, 'time_step': 0.003743211907076548, 'init_value': -1.1571930646896362, 'ave_value': -0.8116553090982609, 'soft_opc': nan} step=3320




2022-04-20 16:56.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165638/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.52 [info     ] FQE_20220420165638: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00015777134033570807, 'time_algorithm_update': 0.003334497830953943, 'loss': 0.003864472866596946, 'time_step': 0.0035644080265458808, 'init_value': -1.241992712020874, 'ave_value': -0.8756242594874657, 'soft_opc': nan} step=3486




2022-04-20 16:56.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165638/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.53 [info     ] FQE_20220420165638: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00015669702047325042, 'time_algorithm_update': 0.003521512789898608, 'loss': 0.004278929157923144, 'time_step': 0.0037536649818880013, 'init_value': -1.2573782205581665, 'ave_value': -0.8972858700062241, 'soft_opc': nan} step=3652




2022-04-20 16:56.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165638/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.54 [info     ] FQE_20220420165638: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00015818210969488304, 'time_algorithm_update': 0.0035025585128600338, 'loss': 0.004281840971614941, 'time_step': 0.0037379279194107973, 'init_value': -1.34358811378479, 'ave_value': -0.9532966159753972, 'soft_opc': nan} step=3818




2022-04-20 16:56.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165638/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.54 [info     ] FQE_20220420165638: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.0001560133623789592, 'time_algorithm_update': 0.0034801457301679864, 'loss': 0.004538839126457404, 'time_step': 0.003708359706832702, 'init_value': -1.3766567707061768, 'ave_value': -0.9877085473623362, 'soft_opc': nan} step=3984




2022-04-20 16:56.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165638/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.55 [info     ] FQE_20220420165638: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.0001566323889307229, 'time_algorithm_update': 0.0034701967813882484, 'loss': 0.004822292087069455, 'time_step': 0.0036976251257471292, 'init_value': -1.4226808547973633, 'ave_value': -1.0128694578803874, 'soft_opc': nan} step=4150




2022-04-20 16:56.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165638/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.56 [info     ] FQE_20220420165638: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00015800257763230657, 'time_algorithm_update': 0.0036080501165734716, 'loss': 0.0050983312203133685, 'time_step': 0.003837802323950342, 'init_value': -1.5279216766357422, 'ave_value': -1.1053068517363287, 'soft_opc': nan} step=4316




2022-04-20 16:56.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165638/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.56 [info     ] FQE_20220420165638: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00015705033957240093, 'time_algorithm_update': 0.003387125141649361, 'loss': 0.005474806082973549, 'time_step': 0.0036160830991813935, 'init_value': -1.5477867126464844, 'ave_value': -1.1272635817057914, 'soft_opc': nan} step=4482




2022-04-20 16:56.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165638/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.57 [info     ] FQE_20220420165638: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.0001606524708759354, 'time_algorithm_update': 0.0034798943852803795, 'loss': 0.005789758520430874, 'time_step': 0.0037127015102340513, 'init_value': -1.6315332651138306, 'ave_value': -1.200610143791985, 'soft_opc': nan} step=4648




2022-04-20 16:56.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165638/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.58 [info     ] FQE_20220420165638: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00015615124300301793, 'time_algorithm_update': 0.003415782767606069, 'loss': 0.006050046574722706, 'time_step': 0.0036422502563660405, 'init_value': -1.6858525276184082, 'ave_value': -1.234720786526665, 'soft_opc': nan} step=4814




2022-04-20 16:56.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165638/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.59 [info     ] FQE_20220420165638: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.0001592320131968303, 'time_algorithm_update': 0.003500715795769749, 'loss': 0.006372737201738609, 'time_step': 0.003738373158925987, 'init_value': -1.692746877670288, 'ave_value': -1.2164558368961553, 'soft_opc': nan} step=4980




2022-04-20 16:56.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165638/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:56.59 [info     ] FQE_20220420165638: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.0001576176608901426, 'time_algorithm_update': 0.0033608488289706678, 'loss': 0.006663012443494658, 'time_step': 0.003588853112186294, 'init_value': -1.7279136180877686, 'ave_value': -1.2765855609766534, 'soft_opc': nan} step=5146




2022-04-20 16:56.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165638/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:57.00 [info     ] FQE_20220420165638: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00015552072639924935, 'time_algorithm_update': 0.003623551632984575, 'loss': 0.0070134988629696774, 'time_step': 0.0038514740495796665, 'init_value': -1.801023006439209, 'ave_value': -1.3396243730643849, 'soft_opc': nan} step=5312




2022-04-20 16:57.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165638/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:57.01 [info     ] FQE_20220420165638: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00015984673097909214, 'time_algorithm_update': 0.003462956612368664, 'loss': 0.007099953256351088, 'time_step': 0.0036942427416881882, 'init_value': -1.8709594011306763, 'ave_value': -1.3994594898309793, 'soft_opc': nan} step=5478




2022-04-20 16:57.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165638/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:57.01 [info     ] FQE_20220420165638: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00016021728515625, 'time_algorithm_update': 0.0035868796957544535, 'loss': 0.007549250357453318, 'time_step': 0.003819610699113593, 'init_value': -1.8874365091323853, 'ave_value': -1.4005754525462786, 'soft_opc': nan} step=5644




2022-04-20 16:57.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165638/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:57.02 [info     ] FQE_20220420165638: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.0001603163868547922, 'time_algorithm_update': 0.003486710858632283, 'loss': 0.007778565832399707, 'time_step': 0.0037228141922548592, 'init_value': -1.9317173957824707, 'ave_value': -1.431049631079575, 'soft_opc': nan} step=5810




2022-04-20 16:57.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165638/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:57.03 [info     ] FQE_20220420165638: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00015880257250314736, 'time_algorithm_update': 0.003543697207807058, 'loss': 0.007976615169584885, 'time_step': 0.003776538802916745, 'init_value': -1.9284942150115967, 'ave_value': -1.416214720045661, 'soft_opc': nan} step=5976




2022-04-20 16:57.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165638/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:57.03 [info     ] FQE_20220420165638: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00015475663794092386, 'time_algorithm_update': 0.0035235048776649566, 'loss': 0.008535542757480395, 'time_step': 0.0037530129214367234, 'init_value': -2.0166709423065186, 'ave_value': -1.4693804767821823, 'soft_opc': nan} step=6142




2022-04-20 16:57.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165638/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:57.04 [info     ] FQE_20220420165638: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00015385897762804147, 'time_algorithm_update': 0.0035642126956617975, 'loss': 0.008632057227183089, 'time_step': 0.0037948065493480267, 'init_value': -2.042287588119507, 'ave_value': -1.500439239068477, 'soft_opc': nan} step=6308




2022-04-20 16:57.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165638/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:57.05 [info     ] FQE_20220420165638: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.0001576765474066677, 'time_algorithm_update': 0.0035193698951996952, 'loss': 0.009004253036404276, 'time_step': 0.0037493375410516576, 'init_value': -2.0639967918395996, 'ave_value': -1.5282336868815594, 'soft_opc': nan} step=6474




2022-04-20 16:57.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165638/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:57.05 [info     ] FQE_20220420165638: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00015982518713158298, 'time_algorithm_update': 0.00346355696758592, 'loss': 0.00933006988257641, 'time_step': 0.0036979425384337642, 'init_value': -2.115598201751709, 'ave_value': -1.5288236643951219, 'soft_opc': nan} step=6640




2022-04-20 16:57.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165638/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:57.06 [info     ] FQE_20220420165638: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00015592144196292004, 'time_algorithm_update': 0.003558779337319983, 'loss': 0.009425753040284097, 'time_step': 0.003791109625115452, 'init_value': -2.2012624740600586, 'ave_value': -1.587897373403649, 'soft_opc': nan} step=6806




2022-04-20 16:57.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165638/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:57.07 [info     ] FQE_20220420165638: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.0001555523240422628, 'time_algorithm_update': 0.003392894584012319, 'loss': 0.009769578805159643, 'time_step': 0.003620768167886389, 'init_value': -2.217182159423828, 'ave_value': -1.602446429307262, 'soft_opc': nan} step=6972




2022-04-20 16:57.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165638/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:57.07 [info     ] FQE_20220420165638: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00015816343836037508, 'time_algorithm_update': 0.0035143214536000446, 'loss': 0.010204020271874425, 'time_step': 0.0037430338112704725, 'init_value': -2.261258363723755, 'ave_value': -1.6168989537703293, 'soft_opc': nan} step=7138




2022-04-20 16:57.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165638/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:57.08 [info     ] FQE_20220420165638: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.0001544736954103033, 'time_algorithm_update': 0.003458225583455649, 'loss': 0.010639151846513107, 'time_step': 0.003686185342719756, 'init_value': -2.4240546226501465, 'ave_value': -1.7685876763417376, 'soft_opc': nan} step=7304




2022-04-20 16:57.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165638/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:57.09 [info     ] FQE_20220420165638: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00015541875218770592, 'time_algorithm_update': 0.003584749727364046, 'loss': 0.010793799956971556, 'time_step': 0.0038139303046536735, 'init_value': -2.4084455966949463, 'ave_value': -1.7388135520187584, 'soft_opc': nan} step=7470




2022-04-20 16:57.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165638/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:57.10 [info     ] FQE_20220420165638: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00015604065125247082, 'time_algorithm_update': 0.0034485883023365436, 'loss': 0.011278862610854859, 'time_step': 0.003679403339523867, 'init_value': -2.439696788787842, 'ave_value': -1.7421745009518959, 'soft_opc': nan} step=7636




2022-04-20 16:57.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165638/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:57.10 [info     ] FQE_20220420165638: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00015456417956984187, 'time_algorithm_update': 0.0034868358129478364, 'loss': 0.011808352753890297, 'time_step': 0.0037129715264561667, 'init_value': -2.549335479736328, 'ave_value': -1.88026348459828, 'soft_opc': nan} step=7802




2022-04-20 16:57.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165638/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:57.11 [info     ] FQE_20220420165638: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00015553508896425547, 'time_algorithm_update': 0.0034441114908241362, 'loss': 0.01228782725983288, 'time_step': 0.0036714163171239645, 'init_value': -2.555370330810547, 'ave_value': -1.8515493116232458, 'soft_opc': nan} step=7968




2022-04-20 16:57.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165638/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:57.12 [info     ] FQE_20220420165638: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00016031207808529037, 'time_algorithm_update': 0.0035335773445037476, 'loss': 0.01233581538979772, 'time_step': 0.003766727734761066, 'init_value': -2.560739278793335, 'ave_value': -1.8753544488535807, 'soft_opc': nan} step=8134




2022-04-20 16:57.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165638/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 16:57.12 [info     ] FQE_20220420165638: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00015455268951783697, 'time_algorithm_update': 0.003417940024869988, 'loss': 0.012752800863600863, 'time_step': 0.0036433518651020095, 'init_value': -2.6101627349853516, 'ave_value': -1.926321794434979, 'soft_opc': nan} step=8300




2022-04-20 16:57.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165638/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

2022-04-20 16:57.13 [info     ] Directory is created at d3rlpy_logs/FQE_20220420165713
2022-04-20 16:57.13 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 16:57.13 [debug    ] Building models...
2022-04-20 16:57.13 [debug    ] Models have been built.
2022-04-20 16:57.13 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420165713/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/355 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 16:57.14 [info     ] FQE_20220420165713: epoch=1 step=355 epoch=1 metrics={'time_sample_batch': 0.00015665981131540217, 'time_algorithm_update': 0.0034141728575800504, 'loss': 0.024490189578541568, 'time_step': 0.003641295097243618, 'init_value': -1.147078514099121, 'ave_value': -1.1562648130429758, 'soft_opc': nan} step=355




2022-04-20 16:57.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165713/model_355.pt


Epoch 2/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:57.15 [info     ] FQE_20220420165713: epoch=2 step=710 epoch=2 metrics={'time_sample_batch': 0.00016298629868198447, 'time_algorithm_update': 0.0034939107760577136, 'loss': 0.02269148772293833, 'time_step': 0.003729581832885742, 'init_value': -2.3283915519714355, 'ave_value': -2.3672462654083084, 'soft_opc': nan} step=710




2022-04-20 16:57.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165713/model_710.pt


Epoch 3/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:57.17 [info     ] FQE_20220420165713: epoch=3 step=1065 epoch=3 metrics={'time_sample_batch': 0.00016519519644723812, 'time_algorithm_update': 0.003517553168283382, 'loss': 0.024643721431493758, 'time_step': 0.0037603230543539557, 'init_value': -2.8737075328826904, 'ave_value': -2.910651764645693, 'soft_opc': nan} step=1065




2022-04-20 16:57.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165713/model_1065.pt


Epoch 4/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:57.18 [info     ] FQE_20220420165713: epoch=4 step=1420 epoch=4 metrics={'time_sample_batch': 0.0001641112314143651, 'time_algorithm_update': 0.0035080761976645026, 'loss': 0.029532500511218963, 'time_step': 0.0037447150324431944, 'init_value': -3.8782196044921875, 'ave_value': -3.9172270372874336, 'soft_opc': nan} step=1420




2022-04-20 16:57.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165713/model_1420.pt


Epoch 5/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:57.20 [info     ] FQE_20220420165713: epoch=5 step=1775 epoch=5 metrics={'time_sample_batch': 0.00016428517623686456, 'time_algorithm_update': 0.0034437790722914147, 'loss': 0.03611595774935165, 'time_step': 0.003679412519428092, 'init_value': -4.477503299713135, 'ave_value': -4.502173540552303, 'soft_opc': nan} step=1775




2022-04-20 16:57.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165713/model_1775.pt


Epoch 6/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:57.21 [info     ] FQE_20220420165713: epoch=6 step=2130 epoch=6 metrics={'time_sample_batch': 0.00016240402006767164, 'time_algorithm_update': 0.003481075797282474, 'loss': 0.04576459856257892, 'time_step': 0.0037167925230214296, 'init_value': -5.368110179901123, 'ave_value': -5.383421017565169, 'soft_opc': nan} step=2130




2022-04-20 16:57.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165713/model_2130.pt


Epoch 7/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:57.23 [info     ] FQE_20220420165713: epoch=7 step=2485 epoch=7 metrics={'time_sample_batch': 0.00016870632977552817, 'time_algorithm_update': 0.0034123326691103653, 'loss': 0.05695264194108231, 'time_step': 0.003655819154121506, 'init_value': -5.883232116699219, 'ave_value': -5.95112751696254, 'soft_opc': nan} step=2485




2022-04-20 16:57.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165713/model_2485.pt


Epoch 8/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:57.24 [info     ] FQE_20220420165713: epoch=8 step=2840 epoch=8 metrics={'time_sample_batch': 0.000161903676852374, 'time_algorithm_update': 0.0034668821684071716, 'loss': 0.07207445524055774, 'time_step': 0.003704431694997868, 'init_value': -6.6764445304870605, 'ave_value': -6.775915033875592, 'soft_opc': nan} step=2840




2022-04-20 16:57.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165713/model_2840.pt


Epoch 9/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:57.25 [info     ] FQE_20220420165713: epoch=9 step=3195 epoch=9 metrics={'time_sample_batch': 0.00016668883847518706, 'time_algorithm_update': 0.0034476958530049928, 'loss': 0.08562178364364614, 'time_step': 0.0036910755533567617, 'init_value': -7.151076316833496, 'ave_value': -7.375757805544744, 'soft_opc': nan} step=3195




2022-04-20 16:57.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165713/model_3195.pt


Epoch 10/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:57.27 [info     ] FQE_20220420165713: epoch=10 step=3550 epoch=10 metrics={'time_sample_batch': 0.00015757251793230084, 'time_algorithm_update': 0.00346604199476645, 'loss': 0.10084649618505172, 'time_step': 0.0036927485130202604, 'init_value': -7.657676696777344, 'ave_value': -8.039586942034031, 'soft_opc': nan} step=3550




2022-04-20 16:57.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165713/model_3550.pt


Epoch 11/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:57.28 [info     ] FQE_20220420165713: epoch=11 step=3905 epoch=11 metrics={'time_sample_batch': 0.00016616431760116363, 'time_algorithm_update': 0.0034422458057672203, 'loss': 0.11268063442597927, 'time_step': 0.003681306436028279, 'init_value': -8.363243103027344, 'ave_value': -8.943145233791615, 'soft_opc': nan} step=3905




2022-04-20 16:57.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165713/model_3905.pt


Epoch 12/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:57.30 [info     ] FQE_20220420165713: epoch=12 step=4260 epoch=12 metrics={'time_sample_batch': 0.00016501990842147612, 'time_algorithm_update': 0.0035029451612015846, 'loss': 0.1304218248949504, 'time_step': 0.0037431522154472243, 'init_value': -8.556787490844727, 'ave_value': -9.403357603528478, 'soft_opc': nan} step=4260




2022-04-20 16:57.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165713/model_4260.pt


Epoch 13/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:57.31 [info     ] FQE_20220420165713: epoch=13 step=4615 epoch=13 metrics={'time_sample_batch': 0.000166747939418739, 'time_algorithm_update': 0.0034630023257833133, 'loss': 0.1446163083885757, 'time_step': 0.0037092719279544456, 'init_value': -9.014094352722168, 'ave_value': -10.067721346423433, 'soft_opc': nan} step=4615




2022-04-20 16:57.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165713/model_4615.pt


Epoch 14/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:57.32 [info     ] FQE_20220420165713: epoch=14 step=4970 epoch=14 metrics={'time_sample_batch': 0.00016298495547872194, 'time_algorithm_update': 0.003456861872068593, 'loss': 0.15521908897749134, 'time_step': 0.0036922139181217676, 'init_value': -8.978900909423828, 'ave_value': -10.312845825700226, 'soft_opc': nan} step=4970




2022-04-20 16:57.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165713/model_4970.pt


Epoch 15/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:57.34 [info     ] FQE_20220420165713: epoch=15 step=5325 epoch=15 metrics={'time_sample_batch': 0.00016558674019826969, 'time_algorithm_update': 0.003470729774152729, 'loss': 0.1797608902704128, 'time_step': 0.0037086560692585688, 'init_value': -9.341633796691895, 'ave_value': -10.85896938155996, 'soft_opc': nan} step=5325




2022-04-20 16:57.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165713/model_5325.pt


Epoch 16/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:57.35 [info     ] FQE_20220420165713: epoch=16 step=5680 epoch=16 metrics={'time_sample_batch': 0.0001633053094568387, 'time_algorithm_update': 0.0034572849810962946, 'loss': 0.19650013188136295, 'time_step': 0.003696609550798443, 'init_value': -9.641806602478027, 'ave_value': -11.377722031706549, 'soft_opc': nan} step=5680




2022-04-20 16:57.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165713/model_5680.pt


Epoch 17/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:57.37 [info     ] FQE_20220420165713: epoch=17 step=6035 epoch=17 metrics={'time_sample_batch': 0.00016319046557789117, 'time_algorithm_update': 0.0034782067151136803, 'loss': 0.22078962123519938, 'time_step': 0.003715961080201915, 'init_value': -9.812932968139648, 'ave_value': -11.914791172677639, 'soft_opc': nan} step=6035




2022-04-20 16:57.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165713/model_6035.pt


Epoch 18/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:57.38 [info     ] FQE_20220420165713: epoch=18 step=6390 epoch=18 metrics={'time_sample_batch': 0.0001641542139187665, 'time_algorithm_update': 0.0034960834073348786, 'loss': 0.23590514543400684, 'time_step': 0.0037342877455160653, 'init_value': -9.859444618225098, 'ave_value': -12.094449268171187, 'soft_opc': nan} step=6390




2022-04-20 16:57.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165713/model_6390.pt


Epoch 19/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:57.40 [info     ] FQE_20220420165713: epoch=19 step=6745 epoch=19 metrics={'time_sample_batch': 0.00016581306994800836, 'time_algorithm_update': 0.0034955797061114245, 'loss': 0.2544904453443809, 'time_step': 0.003738784118437431, 'init_value': -9.964770317077637, 'ave_value': -12.583435314035516, 'soft_opc': nan} step=6745




2022-04-20 16:57.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165713/model_6745.pt


Epoch 20/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:57.41 [info     ] FQE_20220420165713: epoch=20 step=7100 epoch=20 metrics={'time_sample_batch': 0.00016402459480393102, 'time_algorithm_update': 0.0034879012846610914, 'loss': 0.27609469063160286, 'time_step': 0.0037275166578695806, 'init_value': -10.046632766723633, 'ave_value': -12.857968678629383, 'soft_opc': nan} step=7100




2022-04-20 16:57.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165713/model_7100.pt


Epoch 21/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:57.42 [info     ] FQE_20220420165713: epoch=21 step=7455 epoch=21 metrics={'time_sample_batch': 0.0001666660040197238, 'time_algorithm_update': 0.0034566033054405534, 'loss': 0.2951352955852176, 'time_step': 0.0036990817164031554, 'init_value': -10.018877983093262, 'ave_value': -13.055863240127245, 'soft_opc': nan} step=7455




2022-04-20 16:57.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165713/model_7455.pt


Epoch 22/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:57.44 [info     ] FQE_20220420165713: epoch=22 step=7810 epoch=22 metrics={'time_sample_batch': 0.00016250073070257483, 'time_algorithm_update': 0.0034179862116424133, 'loss': 0.31226538768116857, 'time_step': 0.003654445728785555, 'init_value': -10.059174537658691, 'ave_value': -13.263269799630345, 'soft_opc': nan} step=7810




2022-04-20 16:57.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165713/model_7810.pt


Epoch 23/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:57.45 [info     ] FQE_20220420165713: epoch=23 step=8165 epoch=23 metrics={'time_sample_batch': 0.0001649896863480689, 'time_algorithm_update': 0.003459841768506547, 'loss': 0.33420419374507077, 'time_step': 0.0036999568133287025, 'init_value': -10.168830871582031, 'ave_value': -13.60924713378017, 'soft_opc': nan} step=8165




2022-04-20 16:57.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165713/model_8165.pt


Epoch 24/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:57.47 [info     ] FQE_20220420165713: epoch=24 step=8520 epoch=24 metrics={'time_sample_batch': 0.00016373849250900913, 'time_algorithm_update': 0.0035016187479798224, 'loss': 0.3569424342154197, 'time_step': 0.0037391729757819377, 'init_value': -10.274065017700195, 'ave_value': -13.925953923142785, 'soft_opc': nan} step=8520




2022-04-20 16:57.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165713/model_8520.pt


Epoch 25/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:57.48 [info     ] FQE_20220420165713: epoch=25 step=8875 epoch=25 metrics={'time_sample_batch': 0.00016713008074693278, 'time_algorithm_update': 0.003521721799608687, 'loss': 0.38604641577727355, 'time_step': 0.0037658013088602417, 'init_value': -10.474832534790039, 'ave_value': -14.15357331961029, 'soft_opc': nan} step=8875




2022-04-20 16:57.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165713/model_8875.pt


Epoch 26/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:57.50 [info     ] FQE_20220420165713: epoch=26 step=9230 epoch=26 metrics={'time_sample_batch': 0.00016492521259146677, 'time_algorithm_update': 0.003476446447238116, 'loss': 0.41748354162653567, 'time_step': 0.0037166736495326943, 'init_value': -10.699003219604492, 'ave_value': -14.539701008231905, 'soft_opc': nan} step=9230




2022-04-20 16:57.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165713/model_9230.pt


Epoch 27/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:57.51 [info     ] FQE_20220420165713: epoch=27 step=9585 epoch=27 metrics={'time_sample_batch': 0.00016435300800162302, 'time_algorithm_update': 0.0034617457591312032, 'loss': 0.44359461928547267, 'time_step': 0.003700631772968131, 'init_value': -10.65105152130127, 'ave_value': -14.61868992882806, 'soft_opc': nan} step=9585




2022-04-20 16:57.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165713/model_9585.pt


Epoch 28/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:57.52 [info     ] FQE_20220420165713: epoch=28 step=9940 epoch=28 metrics={'time_sample_batch': 0.0001657136729065801, 'time_algorithm_update': 0.0034837037744656417, 'loss': 0.45489589093961347, 'time_step': 0.0037242473011285487, 'init_value': -10.87321662902832, 'ave_value': -14.978989358042083, 'soft_opc': nan} step=9940




2022-04-20 16:57.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165713/model_9940.pt


Epoch 29/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:57.54 [info     ] FQE_20220420165713: epoch=29 step=10295 epoch=29 metrics={'time_sample_batch': 0.00016859215749821192, 'time_algorithm_update': 0.0035088250334833707, 'loss': 0.48162599223285496, 'time_step': 0.0037554875226087974, 'init_value': -11.055262565612793, 'ave_value': -15.246852986384159, 'soft_opc': nan} step=10295




2022-04-20 16:57.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165713/model_10295.pt


Epoch 30/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:57.55 [info     ] FQE_20220420165713: epoch=30 step=10650 epoch=30 metrics={'time_sample_batch': 0.0001651931816423443, 'time_algorithm_update': 0.003478698327507771, 'loss': 0.5189895082737358, 'time_step': 0.003719100817828111, 'init_value': -11.365476608276367, 'ave_value': -15.685143987746235, 'soft_opc': nan} step=10650




2022-04-20 16:57.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165713/model_10650.pt


Epoch 31/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:57.57 [info     ] FQE_20220420165713: epoch=31 step=11005 epoch=31 metrics={'time_sample_batch': 0.00016652429607552543, 'time_algorithm_update': 0.003433650648090201, 'loss': 0.5476176262006793, 'time_step': 0.003675292243420238, 'init_value': -11.978553771972656, 'ave_value': -16.301104766045107, 'soft_opc': nan} step=11005




2022-04-20 16:57.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165713/model_11005.pt


Epoch 32/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:57.58 [info     ] FQE_20220420165713: epoch=32 step=11360 epoch=32 metrics={'time_sample_batch': 0.00016385535119285046, 'time_algorithm_update': 0.0034480779943331864, 'loss': 0.5971263638054821, 'time_step': 0.0036871668318627588, 'init_value': -12.494973182678223, 'ave_value': -16.924691951934587, 'soft_opc': nan} step=11360




2022-04-20 16:57.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165713/model_11360.pt


Epoch 33/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:58.00 [info     ] FQE_20220420165713: epoch=33 step=11715 epoch=33 metrics={'time_sample_batch': 0.00016728925033354422, 'time_algorithm_update': 0.0034277842078410405, 'loss': 0.6316959463794466, 'time_step': 0.003670177996998102, 'init_value': -12.822880744934082, 'ave_value': -17.45118404411671, 'soft_opc': nan} step=11715




2022-04-20 16:58.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165713/model_11715.pt


Epoch 34/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:58.01 [info     ] FQE_20220420165713: epoch=34 step=12070 epoch=34 metrics={'time_sample_batch': 0.00018087575133417694, 'time_algorithm_update': 0.004218072622594699, 'loss': 0.6625452674243232, 'time_step': 0.004474553255967691, 'init_value': -12.96202564239502, 'ave_value': -17.8128317540092, 'soft_opc': nan} step=12070




2022-04-20 16:58.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165713/model_12070.pt


Epoch 35/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:58.03 [info     ] FQE_20220420165713: epoch=35 step=12425 epoch=35 metrics={'time_sample_batch': 0.00016127640092876595, 'time_algorithm_update': 0.003561477929773465, 'loss': 0.6655864349868096, 'time_step': 0.0037959233136244225, 'init_value': -12.803038597106934, 'ave_value': -17.81534618262964, 'soft_opc': nan} step=12425




2022-04-20 16:58.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165713/model_12425.pt


Epoch 36/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:58.04 [info     ] FQE_20220420165713: epoch=36 step=12780 epoch=36 metrics={'time_sample_batch': 0.00016490237813600352, 'time_algorithm_update': 0.004410459625888878, 'loss': 0.6916272459682864, 'time_step': 0.004650290583221006, 'init_value': -12.943781852722168, 'ave_value': -17.975379425253685, 'soft_opc': nan} step=12780




2022-04-20 16:58.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165713/model_12780.pt


Epoch 37/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:58.06 [info     ] FQE_20220420165713: epoch=37 step=13135 epoch=37 metrics={'time_sample_batch': 0.00016805689099808814, 'time_algorithm_update': 0.0043201721889871945, 'loss': 0.7312602728185519, 'time_step': 0.00456394074668347, 'init_value': -13.767621994018555, 'ave_value': -18.982728195011116, 'soft_opc': nan} step=13135




2022-04-20 16:58.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165713/model_13135.pt


Epoch 38/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:58.08 [info     ] FQE_20220420165713: epoch=38 step=13490 epoch=38 metrics={'time_sample_batch': 0.00016477477382606184, 'time_algorithm_update': 0.004044740972384601, 'loss': 0.7328896940856332, 'time_step': 0.004286770081855881, 'init_value': -13.957818984985352, 'ave_value': -19.231147625395113, 'soft_opc': nan} step=13490




2022-04-20 16:58.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165713/model_13490.pt


Epoch 39/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:58.10 [info     ] FQE_20220420165713: epoch=39 step=13845 epoch=39 metrics={'time_sample_batch': 0.00016634162043181944, 'time_algorithm_update': 0.004523650693221831, 'loss': 0.7333158846682226, 'time_step': 0.004764480993781291, 'init_value': -14.487167358398438, 'ave_value': -19.690516660360565, 'soft_opc': nan} step=13845




2022-04-20 16:58.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165713/model_13845.pt


Epoch 40/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:58.11 [info     ] FQE_20220420165713: epoch=40 step=14200 epoch=40 metrics={'time_sample_batch': 0.0001447543291978433, 'time_algorithm_update': 0.0037250619539072817, 'loss': 0.7421232713989808, 'time_step': 0.003935586230855593, 'init_value': -14.30507755279541, 'ave_value': -19.65031775478729, 'soft_opc': nan} step=14200




2022-04-20 16:58.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165713/model_14200.pt


Epoch 41/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:58.13 [info     ] FQE_20220420165713: epoch=41 step=14555 epoch=41 metrics={'time_sample_batch': 0.00014731111660809583, 'time_algorithm_update': 0.004114852824681242, 'loss': 0.7365398864615971, 'time_step': 0.004331361743765818, 'init_value': -14.571091651916504, 'ave_value': -20.059913044914776, 'soft_opc': nan} step=14555




2022-04-20 16:58.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165713/model_14555.pt


Epoch 42/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:58.14 [info     ] FQE_20220420165713: epoch=42 step=14910 epoch=42 metrics={'time_sample_batch': 0.00013951315006739657, 'time_algorithm_update': 0.0036067546253472985, 'loss': 0.7348340637194859, 'time_step': 0.003809546752714775, 'init_value': -14.493403434753418, 'ave_value': -20.08365021120167, 'soft_opc': nan} step=14910




2022-04-20 16:58.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165713/model_14910.pt


Epoch 43/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:58.16 [info     ] FQE_20220420165713: epoch=43 step=15265 epoch=43 metrics={'time_sample_batch': 0.00014682286222216108, 'time_algorithm_update': 0.0038102787984928615, 'loss': 0.7268597481641131, 'time_step': 0.004025180574873804, 'init_value': -14.7569580078125, 'ave_value': -20.20956349737434, 'soft_opc': nan} step=15265




2022-04-20 16:58.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165713/model_15265.pt


Epoch 44/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:58.18 [info     ] FQE_20220420165713: epoch=44 step=15620 epoch=44 metrics={'time_sample_batch': 0.0001616323497933401, 'time_algorithm_update': 0.004455904893472161, 'loss': 0.7338731167601867, 'time_step': 0.004690978224848358, 'init_value': -15.148930549621582, 'ave_value': -20.819016167456272, 'soft_opc': nan} step=15620




2022-04-20 16:58.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165713/model_15620.pt


Epoch 45/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:58.19 [info     ] FQE_20220420165713: epoch=45 step=15975 epoch=45 metrics={'time_sample_batch': 0.00016502931084431393, 'time_algorithm_update': 0.0041226890725149234, 'loss': 0.7238927855248183, 'time_step': 0.004362177513015103, 'init_value': -14.977521896362305, 'ave_value': -20.620017753259557, 'soft_opc': nan} step=15975




2022-04-20 16:58.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165713/model_15975.pt


Epoch 46/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:58.21 [info     ] FQE_20220420165713: epoch=46 step=16330 epoch=46 metrics={'time_sample_batch': 0.00016214746824452575, 'time_algorithm_update': 0.004585609973316461, 'loss': 0.6843206515929229, 'time_step': 0.004826525567283093, 'init_value': -15.498075485229492, 'ave_value': -21.293558094588178, 'soft_opc': nan} step=16330




2022-04-20 16:58.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165713/model_16330.pt


Epoch 47/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:58.23 [info     ] FQE_20220420165713: epoch=47 step=16685 epoch=47 metrics={'time_sample_batch': 0.00016438457327829281, 'time_algorithm_update': 0.004132590495364767, 'loss': 0.6680605648374054, 'time_step': 0.004371393902201048, 'init_value': -15.400063514709473, 'ave_value': -21.356612094433228, 'soft_opc': nan} step=16685




2022-04-20 16:58.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165713/model_16685.pt


Epoch 48/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:58.24 [info     ] FQE_20220420165713: epoch=48 step=17040 epoch=48 metrics={'time_sample_batch': 0.00016590507937149263, 'time_algorithm_update': 0.004300978485967072, 'loss': 0.6730106911451464, 'time_step': 0.004541549548296862, 'init_value': -15.831767082214355, 'ave_value': -21.904534428825595, 'soft_opc': nan} step=17040




2022-04-20 16:58.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165713/model_17040.pt


Epoch 49/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:58.26 [info     ] FQE_20220420165713: epoch=49 step=17395 epoch=49 metrics={'time_sample_batch': 0.0001644953875474527, 'time_algorithm_update': 0.004557276443696358, 'loss': 0.6522462661522375, 'time_step': 0.004795650025488625, 'init_value': -15.913311958312988, 'ave_value': -21.958968430933957, 'soft_opc': nan} step=17395




2022-04-20 16:58.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165713/model_17395.pt


Epoch 50/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 16:58.28 [info     ] FQE_20220420165713: epoch=50 step=17750 epoch=50 metrics={'time_sample_batch': 0.00016426569978955765, 'time_algorithm_update': 0.004141360269465917, 'loss': 0.645566606867901, 'time_step': 0.004382274520229286, 'init_value': -15.508828163146973, 'ave_value': -21.842391767890756, 'soft_opc': nan} step=17750




2022-04-20 16:58.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420165713/model_17750.pt
search iteration:  10
using hyper params:  [0.0010789572397939136, 0.0008928585003157581, 5.39336849809646e-05, 7]
2022-04-20 16:58.28 [debug    ] RoundIterator is selected.
2022-04-20 16:58.28 [info     ] Directory is created at d3rlpy_logs/CQL_20220420165828
2022-04-20 16:58.28 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 16:58.28 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 16:58.28 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220420165828/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0010789572397939136, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, '

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:58.37 [info     ] CQL_20220420165828: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.0003923442628648546, 'time_algorithm_update': 0.025761982153730784, 'temp_loss': 4.557199071722421, 'temp': 0.9911090118146082, 'alpha_loss': -19.696445085848982, 'alpha': 1.0180551611889175, 'critic_loss': 103.53263009500782, 'actor_loss': 5.048684066172406, 'time_step': 0.026255231154592412, 'td_error': 10.655407741939245, 'init_value': -7.783585548400879, 'ave_value': -6.740000589143586} step=342
2022-04-20 16:58.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420165828/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:58.47 [info     ] CQL_20220420165828: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.0003876595469246134, 'time_algorithm_update': 0.025425940229181657, 'temp_loss': 4.257122212683249, 'temp': 0.9740807208401417, 'alpha_loss': -17.922090772996867, 'alpha': 1.0545088408286112, 'critic_loss': 74.36921377349319, 'actor_loss': 8.84801203047323, 'time_step': 0.02591897311963533, 'td_error': 7.235131954294648, 'init_value': -14.026971817016602, 'ave_value': -11.34329800692496} step=684
2022-04-20 16:58.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420165828/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:58.56 [info     ] CQL_20220420165828: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.0003900562810618975, 'time_algorithm_update': 0.02578008384035345, 'temp_loss': 4.0462909874163175, 'temp': 0.9571413939807847, 'alpha_loss': -13.700665203451413, 'alpha': 1.0852478986595109, 'critic_loss': 74.16699557834201, 'actor_loss': 15.070723823636596, 'time_step': 0.026270326815153424, 'td_error': 6.2708102811082895, 'init_value': -23.65363121032715, 'ave_value': -17.95243634397919} step=1026
2022-04-20 16:58.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420165828/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:59.05 [info     ] CQL_20220420165828: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.00036834136784425256, 'time_algorithm_update': 0.024420925051148176, 'temp_loss': 3.841808993913974, 'temp': 0.9407883452044593, 'alpha_loss': -10.59561180092438, 'alpha': 1.1124488509189316, 'critic_loss': 71.68454238266973, 'actor_loss': 22.147732037549826, 'time_step': 0.024885920056125575, 'td_error': 8.358975172799095, 'init_value': -34.60062026977539, 'ave_value': -24.31181074839186} step=1368
2022-04-20 16:59.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420165828/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:59.14 [info     ] CQL_20220420165828: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.0003856664512589661, 'time_algorithm_update': 0.02565541811156691, 'temp_loss': 3.5609554324233743, 'temp': 0.9251018752131546, 'alpha_loss': -8.165373308616772, 'alpha': 1.1371854749339365, 'critic_loss': 73.62259032712345, 'actor_loss': 29.07958911873444, 'time_step': 0.026140795813666448, 'td_error': 12.290811656065214, 'init_value': -44.26432418823242, 'ave_value': -29.582399405396735} step=1710
2022-04-20 16:59.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420165828/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:59.24 [info     ] CQL_20220420165828: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.0003964113213165462, 'time_algorithm_update': 0.02569937845419722, 'temp_loss': 3.2547541731282283, 'temp': 0.9103023678238629, 'alpha_loss': -6.557324003755, 'alpha': 1.1597643592204268, 'critic_loss': 77.73992616530748, 'actor_loss': 35.08224741060134, 'time_step': 0.02619473139444987, 'td_error': 16.31459020144571, 'init_value': -52.83000564575195, 'ave_value': -34.77826051362933} step=2052
2022-04-20 16:59.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420165828/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:59.33 [info     ] CQL_20220420165828: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.00039660721494440444, 'time_algorithm_update': 0.025735718464990804, 'temp_loss': 3.019877472816155, 'temp': 0.8961553540494707, 'alpha_loss': -5.526373639441373, 'alpha': 1.1816583836984913, 'critic_loss': 83.57461538928294, 'actor_loss': 40.24112441525822, 'time_step': 0.02623304916404144, 'td_error': 20.54086841328447, 'init_value': -60.4793701171875, 'ave_value': -39.54647551475814} step=2394
2022-04-20 16:59.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420165828/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:59.43 [info     ] CQL_20220420165828: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.00039321567580016733, 'time_algorithm_update': 0.025977298530221682, 'temp_loss': 2.8222069928520606, 'temp': 0.8824460272900543, 'alpha_loss': -4.723547264846445, 'alpha': 1.2033271831378602, 'critic_loss': 90.52623369540387, 'actor_loss': 44.840365292733175, 'time_step': 0.02646956457729228, 'td_error': 24.495727122372564, 'init_value': -66.95564270019531, 'ave_value': -43.0225964699256} step=2736
2022-04-20 16:59.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420165828/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 16:59.52 [info     ] CQL_20220420165828: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.00038914234317534153, 'time_algorithm_update': 0.025789634526124476, 'temp_loss': 2.65206225573668, 'temp': 0.8690380136869107, 'alpha_loss': -4.089061292988515, 'alpha': 1.2250206658714695, 'critic_loss': 98.32258824577109, 'actor_loss': 48.8741521556475, 'time_step': 0.026278906398349337, 'td_error': 28.30962243276919, 'init_value': -72.52566528320312, 'ave_value': -46.42757788902736} step=3078
2022-04-20 16:59.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420165828/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:00.02 [info     ] CQL_20220420165828: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.0003965061310439082, 'time_algorithm_update': 0.025784962358530502, 'temp_loss': 2.521203114972477, 'temp': 0.8558431539619178, 'alpha_loss': -3.59534262948566, 'alpha': 1.2470409702836422, 'critic_loss': 106.39057433814334, 'actor_loss': 52.377549154716625, 'time_step': 0.026278865267658793, 'td_error': 32.444560885282606, 'init_value': -77.1522216796875, 'ave_value': -49.141433421905646} step=3420
2022-04-20 17:00.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420165828/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:00.11 [info     ] CQL_20220420165828: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.00038890113607484696, 'time_algorithm_update': 0.025587726754751818, 'temp_loss': 2.361406555998395, 'temp': 0.8428815845509021, 'alpha_loss': -3.089887447175924, 'alpha': 1.2691218772826836, 'critic_loss': 114.30295752363595, 'actor_loss': 55.499971869396184, 'time_step': 0.02607593271467421, 'td_error': 36.00049443451478, 'init_value': -81.3094253540039, 'ave_value': -51.9809660480742} step=3762
2022-04-20 17:00.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420165828/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:00.20 [info     ] CQL_20220420165828: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.0003871450647276047, 'time_algorithm_update': 0.025935250416136625, 'temp_loss': 2.2447881245473673, 'temp': 0.8301249758193368, 'alpha_loss': -2.701106008686866, 'alpha': 1.2912579353092706, 'critic_loss': 121.76508262143497, 'actor_loss': 58.192293222884686, 'time_step': 0.02641952595515558, 'td_error': 39.142836160567676, 'init_value': -85.13314056396484, 'ave_value': -54.11700867059666} step=4104
2022-04-20 17:00.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420165828/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:00.30 [info     ] CQL_20220420165828: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.00039433735853050187, 'time_algorithm_update': 0.02543661259768302, 'temp_loss': 2.1290152641067728, 'temp': 0.8175209166362272, 'alpha_loss': -2.3213121557488425, 'alpha': 1.3136393100197552, 'critic_loss': 129.40293464883726, 'actor_loss': 60.551561009814165, 'time_step': 0.02593352362426401, 'td_error': 41.77933375066787, 'init_value': -87.73245239257812, 'ave_value': -56.12799199454621} step=4446
2022-04-20 17:00.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420165828/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:00.39 [info     ] CQL_20220420165828: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.00038883630295245966, 'time_algorithm_update': 0.02577681081336841, 'temp_loss': 2.0334331166674517, 'temp': 0.8050448596477509, 'alpha_loss': -1.9644711512966049, 'alpha': 1.3352831354615284, 'critic_loss': 137.1076324641356, 'actor_loss': 62.56623969719424, 'time_step': 0.026268637668319612, 'td_error': 44.61093431788511, 'init_value': -90.67110443115234, 'ave_value': -57.76585875960487} step=4788
2022-04-20 17:00.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420165828/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:00.48 [info     ] CQL_20220420165828: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.00039279530620017246, 'time_algorithm_update': 0.02548995004062764, 'temp_loss': 1.9167488102327312, 'temp': 0.7927976440616519, 'alpha_loss': -1.6397110971006734, 'alpha': 1.356455135415172, 'critic_loss': 143.4903200383772, 'actor_loss': 64.33339987860785, 'time_step': 0.025982071084585803, 'td_error': 46.58299581696142, 'init_value': -92.95751190185547, 'ave_value': -59.58219119304674} step=5130
2022-04-20 17:00.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420165828/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:00.58 [info     ] CQL_20220420165828: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.0003885449024668911, 'time_algorithm_update': 0.02575669999708209, 'temp_loss': 1.8191844740806267, 'temp': 0.7807274227253875, 'alpha_loss': -1.265863008323338, 'alpha': 1.3761915264770999, 'critic_loss': 149.69696716397826, 'actor_loss': 65.87538545173511, 'time_step': 0.02624610223268208, 'td_error': 48.96096431167913, 'init_value': -94.63433837890625, 'ave_value': -60.78522535102324} step=5472
2022-04-20 17:00.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420165828/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:01.07 [info     ] CQL_20220420165828: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.0003811664748610112, 'time_algorithm_update': 0.023837039345189145, 'temp_loss': 1.7248332803709465, 'temp': 0.7688428743889457, 'alpha_loss': -0.9044265710579896, 'alpha': 1.3934461861325984, 'critic_loss': 156.00373126470555, 'actor_loss': 67.24280488979049, 'time_step': 0.024317413045648942, 'td_error': 50.38055434868499, 'init_value': -95.8631362915039, 'ave_value': -61.55951239249996} step=5814
2022-04-20 17:01.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420165828/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:01.15 [info     ] CQL_20220420165828: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.00038242758366099574, 'time_algorithm_update': 0.024124008173133895, 'temp_loss': 1.631224280560923, 'temp': 0.7572068253107238, 'alpha_loss': -0.5783405514043887, 'alpha': 1.4062651489910327, 'critic_loss': 161.7011720311572, 'actor_loss': 68.36672076844333, 'time_step': 0.024606683798003615, 'td_error': 52.2899306005702, 'init_value': -97.12965393066406, 'ave_value': -62.34478422650984} step=6156
2022-04-20 17:01.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420165828/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:01.24 [info     ] CQL_20220420165828: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.00038583655106393913, 'time_algorithm_update': 0.02411661162013896, 'temp_loss': 1.5548093207398352, 'temp': 0.7456520353144372, 'alpha_loss': -0.2328144264596716, 'alpha': 1.4149247833162721, 'critic_loss': 166.84836774681045, 'actor_loss': 69.41726665608367, 'time_step': 0.024606157464590685, 'td_error': 53.91722103399673, 'init_value': -98.93639373779297, 'ave_value': -64.08581146865512} step=6498
2022-04-20 17:01.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420165828/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:01.33 [info     ] CQL_20220420165828: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.00038845566978231507, 'time_algorithm_update': 0.024176956617344193, 'temp_loss': 1.4540998775359484, 'temp': 0.7343615300474111, 'alpha_loss': 0.09465087945596008, 'alpha': 1.4172006803646422, 'critic_loss': 172.09138618156925, 'actor_loss': 70.26474650422035, 'time_step': 0.024665911295260603, 'td_error': 54.0282726558855, 'init_value': -97.88984680175781, 'ave_value': -63.5138646916139} step=6840
2022-04-20 17:01.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420165828/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:01.42 [info     ] CQL_20220420165828: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.0003889157758121602, 'time_algorithm_update': 0.024158638123183224, 'temp_loss': 1.3694751883110805, 'temp': 0.7233965154628308, 'alpha_loss': 0.3813755866141829, 'alpha': 1.4095264662776077, 'critic_loss': 177.58168110094573, 'actor_loss': 70.9327804610046, 'time_step': 0.024650101773222986, 'td_error': 55.27672014974719, 'init_value': -99.50064086914062, 'ave_value': -65.11608137830824} step=7182
2022-04-20 17:01.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420165828/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:01.51 [info     ] CQL_20220420165828: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.0003885644221166421, 'time_algorithm_update': 0.024004611355519435, 'temp_loss': 1.301486755672254, 'temp': 0.7124917176034715, 'alpha_loss': 0.6907384059036543, 'alpha': 1.3955353634399281, 'critic_loss': 182.11116063525105, 'actor_loss': 71.58059802808259, 'time_step': 0.024493609952647783, 'td_error': 56.20734231917354, 'init_value': -99.40534973144531, 'ave_value': -65.04692809421199} step=7524
2022-04-20 17:01.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420165828/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:01.59 [info     ] CQL_20220420165828: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.00038798162114550496, 'time_algorithm_update': 0.024359941482543945, 'temp_loss': 1.2277027518428556, 'temp': 0.7018280487660079, 'alpha_loss': 0.9085265349062999, 'alpha': 1.3710871948833354, 'critic_loss': 186.85097601818063, 'actor_loss': 72.08273756016068, 'time_step': 0.024851167411134953, 'td_error': 56.91672849036061, 'init_value': -99.78352355957031, 'ave_value': -65.47434118788152} step=7866
2022-04-20 17:01.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420165828/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:02.08 [info     ] CQL_20220420165828: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.0003854245470281233, 'time_algorithm_update': 0.02421622387847008, 'temp_loss': 1.152721092192053, 'temp': 0.6913270417012667, 'alpha_loss': 1.176706043208203, 'alpha': 1.3395661098218103, 'critic_loss': 191.57496977688973, 'actor_loss': 72.52929207874321, 'time_step': 0.02470301605804622, 'td_error': 57.28966953777256, 'init_value': -99.5125961303711, 'ave_value': -65.86038755719726} step=8208
2022-04-20 17:02.08 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420165828/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:02.17 [info     ] CQL_20220420165828: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.0003903881151076646, 'time_algorithm_update': 0.024241979359186185, 'temp_loss': 1.1023921722557113, 'temp': 0.6809377408864206, 'alpha_loss': 1.3040206146464624, 'alpha': 1.304250260891273, 'critic_loss': 195.87798563639322, 'actor_loss': 72.92113054286666, 'time_step': 0.024732758427223965, 'td_error': 57.81639894269717, 'init_value': -100.48008728027344, 'ave_value': -66.32422799419041} step=8550
2022-04-20 17:02.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420165828/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:02.26 [info     ] CQL_20220420165828: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.00038731865018431903, 'time_algorithm_update': 0.02435227444297389, 'temp_loss': 1.0478002774088007, 'temp': 0.670707182577479, 'alpha_loss': 1.4707791942120012, 'alpha': 1.2657318495170415, 'critic_loss': 199.86013825176752, 'actor_loss': 73.24549227708961, 'time_step': 0.024841017193264432, 'td_error': 57.85913448747085, 'init_value': -100.9860610961914, 'ave_value': -66.75981271134103} step=8892
2022-04-20 17:02.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420165828/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:02.35 [info     ] CQL_20220420165828: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.0003856525086520011, 'time_algorithm_update': 0.024310810524120666, 'temp_loss': 1.0128223116983448, 'temp': 0.6604985233984495, 'alpha_loss': 1.6236350720922588, 'alpha': 1.226245107357962, 'critic_loss': 203.86395397520903, 'actor_loss': 73.54788200199953, 'time_step': 0.02479692997291074, 'td_error': 57.672832044362394, 'init_value': -100.67306518554688, 'ave_value': -66.75551846650336} step=9234
2022-04-20 17:02.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420165828/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:02.44 [info     ] CQL_20220420165828: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.0003886425007156461, 'time_algorithm_update': 0.02419200069025943, 'temp_loss': 0.9435103610593673, 'temp': 0.6503857271364558, 'alpha_loss': 1.7713406533819802, 'alpha': 1.1859052881859897, 'critic_loss': 207.52106511523152, 'actor_loss': 73.81533385159676, 'time_step': 0.0246864494524504, 'td_error': 58.3515448082158, 'init_value': -101.36795806884766, 'ave_value': -67.20708031958624} step=9576
2022-04-20 17:02.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420165828/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:02.53 [info     ] CQL_20220420165828: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.0003888160861723604, 'time_algorithm_update': 0.024247334017391094, 'temp_loss': 0.8822318310277504, 'temp': 0.640612110408426, 'alpha_loss': 1.8852659064012836, 'alpha': 1.1463333622753968, 'critic_loss': 209.85645664505094, 'actor_loss': 74.04490505463896, 'time_step': 0.02473551000070851, 'td_error': 58.10699759110095, 'init_value': -101.34002685546875, 'ave_value': -67.7185456454935} step=9918
2022-04-20 17:02.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420165828/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:03.02 [info     ] CQL_20220420165828: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.0003865211330659208, 'time_algorithm_update': 0.024347467729222704, 'temp_loss': 0.8369040699207295, 'temp': 0.6310958450997782, 'alpha_loss': 1.9555067017566201, 'alpha': 1.1077501484525134, 'critic_loss': 213.05124128910532, 'actor_loss': 74.242446631716, 'time_step': 0.024833177962498357, 'td_error': 57.70209603963579, 'init_value': -101.6833267211914, 'ave_value': -67.92864098457603} step=10260
2022-04-20 17:03.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420165828/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:03.10 [info     ] CQL_20220420165828: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.000386354518912689, 'time_algorithm_update': 0.02430177710906804, 'temp_loss': 0.7943570964169084, 'temp': 0.6216357270179437, 'alpha_loss': 2.009430597095113, 'alpha': 1.071183879821621, 'critic_loss': 215.52174640677825, 'actor_loss': 74.46239791557802, 'time_step': 0.02479033302842525, 'td_error': 58.018563491997604, 'init_value': -100.71131896972656, 'ave_value': -67.49180397258283} step=10602
2022-04-20 17:03.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420165828/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:03.19 [info     ] CQL_20220420165828: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.0003903490758081626, 'time_algorithm_update': 0.02431759639093053, 'temp_loss': 0.7392836474535758, 'temp': 0.6124505642910449, 'alpha_loss': 2.034686298021088, 'alpha': 1.0360762623318456, 'critic_loss': 218.30431017959327, 'actor_loss': 74.50285128543251, 'time_step': 0.024806082597252917, 'td_error': 57.92548252457408, 'init_value': -100.72447204589844, 'ave_value': -67.94469793530197} step=10944
2022-04-20 17:03.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420165828/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:03.28 [info     ] CQL_20220420165828: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.0003910796684131288, 'time_algorithm_update': 0.024559776685391253, 'temp_loss': 0.6959307065658402, 'temp': 0.6034753299944582, 'alpha_loss': 2.08916892745799, 'alpha': 1.0022806223721532, 'critic_loss': 220.2995412279988, 'actor_loss': 74.5780457613761, 'time_step': 0.025052228866264833, 'td_error': 57.49754826904174, 'init_value': -100.40535736083984, 'ave_value': -67.92907711853181} step=11286
2022-04-20 17:03.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420165828/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:03.38 [info     ] CQL_20220420165828: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.0003950254261842248, 'time_algorithm_update': 0.025959218454639815, 'temp_loss': 0.6585537549870754, 'temp': 0.5945149485827886, 'alpha_loss': 2.0828254846231373, 'alpha': 0.9700949352387099, 'critic_loss': 222.2137669791952, 'actor_loss': 74.68077804610046, 'time_step': 0.026454871160942212, 'td_error': 57.60843178705838, 'init_value': -100.84230041503906, 'ave_value': -68.0452989730984} step=11628
2022-04-20 17:03.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420165828/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:03.47 [info     ] CQL_20220420165828: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.00039649009704589844, 'time_algorithm_update': 0.025418236241703144, 'temp_loss': 0.611604727412525, 'temp': 0.5857767094994148, 'alpha_loss': 2.1078768038566698, 'alpha': 0.9388009614763204, 'critic_loss': 223.4267713312517, 'actor_loss': 74.72546452527855, 'time_step': 0.025914855170668216, 'td_error': 56.23225230954892, 'init_value': -100.56989288330078, 'ave_value': -68.13435209037618} step=11970
2022-04-20 17:03.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420165828/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:03.56 [info     ] CQL_20220420165828: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.000390169216178314, 'time_algorithm_update': 0.025634867406030843, 'temp_loss': 0.5853867229313878, 'temp': 0.5772629388940265, 'alpha_loss': 2.107541302229926, 'alpha': 0.9088492091984777, 'critic_loss': 224.31119412427756, 'actor_loss': 74.767190342061, 'time_step': 0.026128662259955155, 'td_error': 56.23554355066572, 'init_value': -100.54318237304688, 'ave_value': -68.14542030405286} step=12312
2022-04-20 17:03.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420165828/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:04.06 [info     ] CQL_20220420165828: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.00039480025308174, 'time_algorithm_update': 0.02562934822506375, 'temp_loss': 0.5510033002199485, 'temp': 0.5687984064308523, 'alpha_loss': 2.104799496473565, 'alpha': 0.8794940194191291, 'critic_loss': 226.88682127835457, 'actor_loss': 74.78545941804585, 'time_step': 0.026125190550820868, 'td_error': 55.91787719150974, 'init_value': -101.33133697509766, 'ave_value': -68.91493536257126} step=12654
2022-04-20 17:04.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420165828/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:04.15 [info     ] CQL_20220420165828: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.0003909179341723347, 'time_algorithm_update': 0.02580073562979001, 'temp_loss': 0.5214760479087021, 'temp': 0.5604432052687595, 'alpha_loss': 2.0881929992322337, 'alpha': 0.8520617481560735, 'critic_loss': 227.76540794149477, 'actor_loss': 74.80734403509842, 'time_step': 0.026291668763634753, 'td_error': 55.15375821980654, 'init_value': -99.95594787597656, 'ave_value': -67.69737876385182} step=12996
2022-04-20 17:04.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420165828/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:04.25 [info     ] CQL_20220420165828: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.00039158369365491365, 'time_algorithm_update': 0.02544962383850276, 'temp_loss': 0.47566672174902686, 'temp': 0.5524051677413852, 'alpha_loss': 2.0998722645426877, 'alpha': 0.8249442216597105, 'critic_loss': 229.0130259642127, 'actor_loss': 74.82390640214173, 'time_step': 0.02594355602710568, 'td_error': 55.07727972599664, 'init_value': -99.86692810058594, 'ave_value': -68.06183658339755} step=13338
2022-04-20 17:04.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420165828/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:04.34 [info     ] CQL_20220420165828: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.0003929263667056435, 'time_algorithm_update': 0.025765104600560595, 'temp_loss': 0.4684483338779176, 'temp': 0.5443450885209423, 'alpha_loss': 2.0674156133107275, 'alpha': 0.7992681602985539, 'critic_loss': 230.5050725211874, 'actor_loss': 74.83196773863675, 'time_step': 0.02625986985992967, 'td_error': 54.5742701546684, 'init_value': -100.20917510986328, 'ave_value': -68.40935101144889} step=13680
2022-04-20 17:04.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420165828/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:04.43 [info     ] CQL_20220420165828: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.0003876497870997379, 'time_algorithm_update': 0.025849087893614293, 'temp_loss': 0.4310017567432938, 'temp': 0.5363355876758085, 'alpha_loss': 2.0410483047714707, 'alpha': 0.7743307679717304, 'critic_loss': 230.98339138811792, 'actor_loss': 74.8632670173868, 'time_step': 0.026335013540167557, 'td_error': 54.578365383383435, 'init_value': -99.97227478027344, 'ave_value': -68.56493011382517} step=14022
2022-04-20 17:04.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420165828/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:04.53 [info     ] CQL_20220420165828: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.0003942613713225426, 'time_algorithm_update': 0.02543730205959744, 'temp_loss': 0.41561596497501196, 'temp': 0.5286380042109573, 'alpha_loss': 2.0043119521735355, 'alpha': 0.7504810664737434, 'critic_loss': 231.83192403692948, 'actor_loss': 74.89399260805364, 'time_step': 0.02593177800987199, 'td_error': 54.51617241313544, 'init_value': -99.09030151367188, 'ave_value': -68.1116284765142} step=14364
2022-04-20 17:04.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420165828/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:05.02 [info     ] CQL_20220420165828: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.0003904201831036841, 'time_algorithm_update': 0.025652156935797796, 'temp_loss': 0.38242259201163437, 'temp': 0.5209928552309672, 'alpha_loss': 1.9732221740472256, 'alpha': 0.7274252465593884, 'critic_loss': 232.4241402163143, 'actor_loss': 74.89270453425179, 'time_step': 0.026140914325825652, 'td_error': 53.60911025368554, 'init_value': -98.88368225097656, 'ave_value': -67.86656528434843} step=14706
2022-04-20 17:05.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420165828/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:05.11 [info     ] CQL_20220420165828: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.0003897146871912549, 'time_algorithm_update': 0.02545533263892458, 'temp_loss': 0.36136662005855325, 'temp': 0.5136235546298892, 'alpha_loss': 1.9533750553620837, 'alpha': 0.7049770867615416, 'critic_loss': 232.80348009254502, 'actor_loss': 74.85608151084499, 'time_step': 0.025948549571790193, 'td_error': 53.451857491429124, 'init_value': -99.16697692871094, 'ave_value': -68.02984329236924} step=15048
2022-04-20 17:05.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420165828/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:05.21 [info     ] CQL_20220420165828: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.0003866584677445261, 'time_algorithm_update': 0.02573162909836797, 'temp_loss': 0.3484853525803974, 'temp': 0.5062177552465807, 'alpha_loss': 1.879038097761702, 'alpha': 0.6835657072346113, 'critic_loss': 233.38516132733974, 'actor_loss': 74.86765906127573, 'time_step': 0.026220638849581892, 'td_error': 52.706417089173954, 'init_value': -98.46136474609375, 'ave_value': -67.91782781467245} step=15390
2022-04-20 17:05.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420165828/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:05.30 [info     ] CQL_20220420165828: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.0003873555980927763, 'time_algorithm_update': 0.02541014952966344, 'temp_loss': 0.3124995782906026, 'temp': 0.4990323849065959, 'alpha_loss': 1.871439877766789, 'alpha': 0.6629863757836191, 'critic_loss': 233.55593528524477, 'actor_loss': 74.91121615582739, 'time_step': 0.025895713365565964, 'td_error': 52.87809832002867, 'init_value': -98.61783599853516, 'ave_value': -68.13642317483419} step=15732
2022-04-20 17:05.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420165828/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:05.39 [info     ] CQL_20220420165828: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.0003893291741086726, 'time_algorithm_update': 0.025606540211460048, 'temp_loss': 0.3100557550352219, 'temp': 0.4919312939657802, 'alpha_loss': 1.7933125737945588, 'alpha': 0.6431125803300511, 'critic_loss': 232.30222905030723, 'actor_loss': 74.92357063293457, 'time_step': 0.026096291709364505, 'td_error': 52.1030572402347, 'init_value': -98.91560363769531, 'ave_value': -68.81582277967318} step=16074
2022-04-20 17:05.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420165828/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:05.49 [info     ] CQL_20220420165828: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.0003879153937624212, 'time_algorithm_update': 0.025483577572114287, 'temp_loss': 0.2967494083078284, 'temp': 0.48471915477897687, 'alpha_loss': 1.7316074083771622, 'alpha': 0.6244159841049485, 'critic_loss': 232.54877351459703, 'actor_loss': 74.99287758096617, 'time_step': 0.025971768195169012, 'td_error': 51.48801308690176, 'init_value': -97.92353057861328, 'ave_value': -68.0649972727821} step=16416
2022-04-20 17:05.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420165828/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:05.58 [info     ] CQL_20220420165828: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.00038995101437931174, 'time_algorithm_update': 0.025708644013655812, 'temp_loss': 0.26617140335994854, 'temp': 0.4779079356911587, 'alpha_loss': 1.7042166560626866, 'alpha': 0.6056351942277094, 'critic_loss': 232.3992738333362, 'actor_loss': 74.91929749159785, 'time_step': 0.026198721768563252, 'td_error': 51.79241774476793, 'init_value': -98.46914672851562, 'ave_value': -68.23692673038389} step=16758
2022-04-20 17:05.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420165828/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:06.07 [info     ] CQL_20220420165828: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.0004036949391950641, 'time_algorithm_update': 0.025410505763271397, 'temp_loss': 0.2612082705348294, 'temp': 0.47131001766313585, 'alpha_loss': 1.6521357733603808, 'alpha': 0.5876573927221242, 'critic_loss': 230.92428588867188, 'actor_loss': 75.0169261017738, 'time_step': 0.02591510544046324, 'td_error': 51.374445390049786, 'init_value': -97.76048278808594, 'ave_value': -68.31647885573608} step=17100
2022-04-20 17:06.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420165828/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.519

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 17:06.09 [info     ] FQE_20220420170608: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00016184887254094504, 'time_algorithm_update': 0.0044960990009537665, 'loss': 0.007779837459752179, 'time_step': 0.00472855711557779, 'init_value': -0.2178240418434143, 'ave_value': -0.186307598837559, 'soft_opc': nan} step=166




2022-04-20 17:06.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170608/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.09 [info     ] FQE_20220420170608: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00015723130789147802, 'time_algorithm_update': 0.0035375112510589233, 'loss': 0.00627239000652524, 'time_step': 0.0037668139101511024, 'init_value': -0.3635130226612091, 'ave_value': -0.27651680567981424, 'soft_opc': nan} step=332




2022-04-20 17:06.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170608/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.10 [info     ] FQE_20220420170608: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00015615411551601915, 'time_algorithm_update': 0.004511306084782244, 'loss': 0.005984025502420333, 'time_step': 0.004737091351704425, 'init_value': -0.3892775774002075, 'ave_value': -0.2627563350020094, 'soft_opc': nan} step=498




2022-04-20 17:06.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170608/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.11 [info     ] FQE_20220420170608: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00017242115664194865, 'time_algorithm_update': 0.0045154223959129975, 'loss': 0.006020265084365944, 'time_step': 0.004761952951730016, 'init_value': -0.4681023955345154, 'ave_value': -0.3040679781743837, 'soft_opc': nan} step=664




2022-04-20 17:06.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170608/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.12 [info     ] FQE_20220420170608: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00016055480543389377, 'time_algorithm_update': 0.0043014733188123586, 'loss': 0.005725360471272774, 'time_step': 0.0045411285147609485, 'init_value': -0.5205420255661011, 'ave_value': -0.3309623798514876, 'soft_opc': nan} step=830




2022-04-20 17:06.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170608/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.13 [info     ] FQE_20220420170608: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00016126862491469784, 'time_algorithm_update': 0.004092963345079537, 'loss': 0.005396431774821088, 'time_step': 0.004323995256998453, 'init_value': -0.5330265760421753, 'ave_value': -0.32436109449596956, 'soft_opc': nan} step=996




2022-04-20 17:06.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170608/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.13 [info     ] FQE_20220420170608: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00016018568751323655, 'time_algorithm_update': 0.004642928939267814, 'loss': 0.005214332215100841, 'time_step': 0.004874012556420751, 'init_value': -0.5460191965103149, 'ave_value': -0.3263025650351837, 'soft_opc': nan} step=1162




2022-04-20 17:06.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170608/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.14 [info     ] FQE_20220420170608: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.0001648707562182323, 'time_algorithm_update': 0.004509706095040563, 'loss': 0.00487781813542123, 'time_step': 0.004746697035180517, 'init_value': -0.630277693271637, 'ave_value': -0.3760323472904159, 'soft_opc': nan} step=1328




2022-04-20 17:06.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170608/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.15 [info     ] FQE_20220420170608: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00015893470810120366, 'time_algorithm_update': 0.0034483067960624234, 'loss': 0.00453925593573802, 'time_step': 0.0036727333643350258, 'init_value': -0.6544710993766785, 'ave_value': -0.3947151728301636, 'soft_opc': nan} step=1494




2022-04-20 17:06.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170608/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.16 [info     ] FQE_20220420170608: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00016105749520910792, 'time_algorithm_update': 0.00457071252616055, 'loss': 0.004533550912424563, 'time_step': 0.004800563835235964, 'init_value': -0.7123245000839233, 'ave_value': -0.445244340321284, 'soft_opc': nan} step=1660




2022-04-20 17:06.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170608/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.17 [info     ] FQE_20220420170608: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.0001583946756569736, 'time_algorithm_update': 0.004543396363775414, 'loss': 0.004443709608111306, 'time_step': 0.004772786634514131, 'init_value': -0.7277700901031494, 'ave_value': -0.44338360061734783, 'soft_opc': nan} step=1826




2022-04-20 17:06.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170608/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.18 [info     ] FQE_20220420170608: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00016119106706366482, 'time_algorithm_update': 0.0043640984110085365, 'loss': 0.004193304993032691, 'time_step': 0.00459752025374447, 'init_value': -0.7710868716239929, 'ave_value': -0.4626244482022148, 'soft_opc': nan} step=1992




2022-04-20 17:06.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170608/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.18 [info     ] FQE_20220420170608: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.0001622222992311041, 'time_algorithm_update': 0.003968899508556688, 'loss': 0.004567587290924177, 'time_step': 0.004204451319683029, 'init_value': -0.805382490158081, 'ave_value': -0.47112667122604074, 'soft_opc': nan} step=2158




2022-04-20 17:06.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170608/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.19 [info     ] FQE_20220420170608: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00016057922179440418, 'time_algorithm_update': 0.004520021289227957, 'loss': 0.004450558721233743, 'time_step': 0.004753227693488799, 'init_value': -0.9393386244773865, 'ave_value': -0.5863056285659204, 'soft_opc': nan} step=2324




2022-04-20 17:06.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170608/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.20 [info     ] FQE_20220420170608: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00016052895281688277, 'time_algorithm_update': 0.004565467317420316, 'loss': 0.004647447469543262, 'time_step': 0.004799203700329884, 'init_value': -0.9479660987854004, 'ave_value': -0.5643468286875669, 'soft_opc': nan} step=2490




2022-04-20 17:06.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170608/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.21 [info     ] FQE_20220420170608: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00015922339565782663, 'time_algorithm_update': 0.0037077694054109506, 'loss': 0.004961149642210319, 'time_step': 0.003939592694661704, 'init_value': -1.044001817703247, 'ave_value': -0.6480166269080327, 'soft_opc': nan} step=2656




2022-04-20 17:06.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170608/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.22 [info     ] FQE_20220420170608: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00016207292855504048, 'time_algorithm_update': 0.0045079294457493055, 'loss': 0.005309650545423648, 'time_step': 0.004741450390183782, 'init_value': -1.1283278465270996, 'ave_value': -0.7027393427274718, 'soft_opc': nan} step=2822




2022-04-20 17:06.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170608/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.23 [info     ] FQE_20220420170608: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00016659857278846833, 'time_algorithm_update': 0.004538727093891925, 'loss': 0.0055395343799581635, 'time_step': 0.004775911928659462, 'init_value': -1.2048730850219727, 'ave_value': -0.7591309823523703, 'soft_opc': nan} step=2988




2022-04-20 17:06.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170608/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.23 [info     ] FQE_20220420170608: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00016022590269525367, 'time_algorithm_update': 0.004288463707429817, 'loss': 0.006059153349817652, 'time_step': 0.004519532962017749, 'init_value': -1.3409814834594727, 'ave_value': -0.861838744733449, 'soft_opc': nan} step=3154




2022-04-20 17:06.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170608/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.24 [info     ] FQE_20220420170608: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.0001603379307023014, 'time_algorithm_update': 0.0024890569319207983, 'loss': 0.0065785905041911424, 'time_step': 0.0027177276381527087, 'init_value': -1.4120309352874756, 'ave_value': -0.9315805337140085, 'soft_opc': nan} step=3320




2022-04-20 17:06.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170608/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.24 [info     ] FQE_20220420170608: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00016683124634156744, 'time_algorithm_update': 0.0028233011084866813, 'loss': 0.0069577335246771305, 'time_step': 0.003065810146101986, 'init_value': -1.4490578174591064, 'ave_value': -0.929389850780586, 'soft_opc': nan} step=3486




2022-04-20 17:06.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170608/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.25 [info     ] FQE_20220420170608: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00016328943781105868, 'time_algorithm_update': 0.003446558871901179, 'loss': 0.007266795996952443, 'time_step': 0.0036791462496102573, 'init_value': -1.5566825866699219, 'ave_value': -0.9993065550495442, 'soft_opc': nan} step=3652




2022-04-20 17:06.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170608/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.26 [info     ] FQE_20220420170608: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00015728732189500188, 'time_algorithm_update': 0.0034337216112987103, 'loss': 0.007932779100072762, 'time_step': 0.0036632914141000034, 'init_value': -1.6373530626296997, 'ave_value': -1.0806167380120772, 'soft_opc': nan} step=3818




2022-04-20 17:06.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170608/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.26 [info     ] FQE_20220420170608: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00015809449804834574, 'time_algorithm_update': 0.0033828249896865293, 'loss': 0.008588238001425177, 'time_step': 0.0036130339266305946, 'init_value': -1.717696189880371, 'ave_value': -1.1356516577756486, 'soft_opc': nan} step=3984




2022-04-20 17:06.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170608/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.27 [info     ] FQE_20220420170608: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00015703166823789297, 'time_algorithm_update': 0.003455782511148108, 'loss': 0.009229147443491742, 'time_step': 0.0036854025829269224, 'init_value': -1.8381385803222656, 'ave_value': -1.2289713973087166, 'soft_opc': nan} step=4150




2022-04-20 17:06.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170608/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.28 [info     ] FQE_20220420170608: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.0001644815307065665, 'time_algorithm_update': 0.0034535764211631684, 'loss': 0.01002227040344333, 'time_step': 0.0036920093628297367, 'init_value': -1.9306926727294922, 'ave_value': -1.303719060498013, 'soft_opc': nan} step=4316




2022-04-20 17:06.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170608/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.28 [info     ] FQE_20220420170608: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00016045426747885095, 'time_algorithm_update': 0.003431703670915351, 'loss': 0.0105970858866966, 'time_step': 0.003663205238709967, 'init_value': -1.9470500946044922, 'ave_value': -1.3194705026929041, 'soft_opc': nan} step=4482




2022-04-20 17:06.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170608/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.29 [info     ] FQE_20220420170608: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00015892178179269814, 'time_algorithm_update': 0.0034554593534354703, 'loss': 0.011511133727056524, 'time_step': 0.0036874564297227973, 'init_value': -2.051969289779663, 'ave_value': -1.4145350809595003, 'soft_opc': nan} step=4648




2022-04-20 17:06.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170608/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.30 [info     ] FQE_20220420170608: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.0001576765474066677, 'time_algorithm_update': 0.0033768932503390023, 'loss': 0.012156986922491342, 'time_step': 0.0036043919712664134, 'init_value': -2.1423182487487793, 'ave_value': -1.4837935111812643, 'soft_opc': nan} step=4814




2022-04-20 17:06.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170608/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.31 [info     ] FQE_20220420170608: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00016630126769284168, 'time_algorithm_update': 0.003422778773020549, 'loss': 0.012778379388450903, 'time_step': 0.0036581237632108024, 'init_value': -2.1541595458984375, 'ave_value': -1.487869474704604, 'soft_opc': nan} step=4980




2022-04-20 17:06.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170608/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.31 [info     ] FQE_20220420170608: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00016246789909270872, 'time_algorithm_update': 0.003430607807205384, 'loss': 0.013769318756411773, 'time_step': 0.0036638357553137354, 'init_value': -2.302704334259033, 'ave_value': -1.6160643403560997, 'soft_opc': nan} step=5146




2022-04-20 17:06.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170608/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.32 [info     ] FQE_20220420170608: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00016244779150170017, 'time_algorithm_update': 0.003428370119577431, 'loss': 0.013940043074179845, 'time_step': 0.003664206309490893, 'init_value': -2.315281867980957, 'ave_value': -1.6084398189207187, 'soft_opc': nan} step=5312




2022-04-20 17:06.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170608/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.33 [info     ] FQE_20220420170608: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00016540935240596174, 'time_algorithm_update': 0.0033969950963215656, 'loss': 0.014455767974667027, 'time_step': 0.0036369619599307874, 'init_value': -2.3285508155822754, 'ave_value': -1.6235365115701816, 'soft_opc': nan} step=5478




2022-04-20 17:06.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170608/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.33 [info     ] FQE_20220420170608: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.000156717128064259, 'time_algorithm_update': 0.003442680979349527, 'loss': 0.015425925920100277, 'time_step': 0.0036723498838493623, 'init_value': -2.3423924446105957, 'ave_value': -1.6193183135904103, 'soft_opc': nan} step=5644




2022-04-20 17:06.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170608/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.34 [info     ] FQE_20220420170608: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00015919897929731622, 'time_algorithm_update': 0.0034463922661471078, 'loss': 0.01590396533579936, 'time_step': 0.0036755311919982173, 'init_value': -2.401406764984131, 'ave_value': -1.7104459601357949, 'soft_opc': nan} step=5810




2022-04-20 17:06.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170608/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.35 [info     ] FQE_20220420170608: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.0001589433256402073, 'time_algorithm_update': 0.003464352653687259, 'loss': 0.016268358132465327, 'time_step': 0.003692470401166433, 'init_value': -2.482738971710205, 'ave_value': -1.7871597270573583, 'soft_opc': nan} step=5976




2022-04-20 17:06.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170608/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.35 [info     ] FQE_20220420170608: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00016158316508833184, 'time_algorithm_update': 0.003516643880361534, 'loss': 0.01728218596734398, 'time_step': 0.0037539665957531296, 'init_value': -2.5855283737182617, 'ave_value': -1.8486630901652346, 'soft_opc': nan} step=6142




2022-04-20 17:06.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170608/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.36 [info     ] FQE_20220420170608: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00015913147524178746, 'time_algorithm_update': 0.0035420972180653766, 'loss': 0.018119829915834208, 'time_step': 0.0037770515464874634, 'init_value': -2.6417760848999023, 'ave_value': -1.9845727536122542, 'soft_opc': nan} step=6308




2022-04-20 17:06.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170608/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.37 [info     ] FQE_20220420170608: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00016440540911203408, 'time_algorithm_update': 0.003519622676343803, 'loss': 0.019268839686676996, 'time_step': 0.0037555866930858196, 'init_value': -2.68984317779541, 'ave_value': -1.9958978538711867, 'soft_opc': nan} step=6474




2022-04-20 17:06.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170608/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.37 [info     ] FQE_20220420170608: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00016129160501870764, 'time_algorithm_update': 0.0034443958696112574, 'loss': 0.019710550342787462, 'time_step': 0.0036807476756084397, 'init_value': -2.8527889251708984, 'ave_value': -2.1304252662905703, 'soft_opc': nan} step=6640




2022-04-20 17:06.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170608/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.38 [info     ] FQE_20220420170608: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00015972608543304075, 'time_algorithm_update': 0.0033884780952729374, 'loss': 0.020489315384527647, 'time_step': 0.0036204565002257564, 'init_value': -2.89725399017334, 'ave_value': -2.188700486736389, 'soft_opc': nan} step=6806




2022-04-20 17:06.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170608/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.39 [info     ] FQE_20220420170608: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.0001615903463708349, 'time_algorithm_update': 0.0035332067903265894, 'loss': 0.021815935638592947, 'time_step': 0.00376824011285621, 'init_value': -2.9573099613189697, 'ave_value': -2.253656932986803, 'soft_opc': nan} step=6972




2022-04-20 17:06.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170608/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.39 [info     ] FQE_20220420170608: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00016230272959513837, 'time_algorithm_update': 0.0035273569175995976, 'loss': 0.022643347913174922, 'time_step': 0.0037632950817246035, 'init_value': -3.0615053176879883, 'ave_value': -2.342562292106785, 'soft_opc': nan} step=7138




2022-04-20 17:06.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170608/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.40 [info     ] FQE_20220420170608: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00016331241791506848, 'time_algorithm_update': 0.00350081202495529, 'loss': 0.02325287877079885, 'time_step': 0.00373687801590885, 'init_value': -3.127471685409546, 'ave_value': -2.4344955328214275, 'soft_opc': nan} step=7304




2022-04-20 17:06.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170608/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.41 [info     ] FQE_20220420170608: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00016238172370267202, 'time_algorithm_update': 0.003394899598087173, 'loss': 0.024030853833593654, 'time_step': 0.0036276765616543322, 'init_value': -3.067286968231201, 'ave_value': -2.3631022409812825, 'soft_opc': nan} step=7470




2022-04-20 17:06.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170608/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.41 [info     ] FQE_20220420170608: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00016293037368590572, 'time_algorithm_update': 0.0035067035491207995, 'loss': 0.025073721105126238, 'time_step': 0.003742880131824907, 'init_value': -3.1100692749023438, 'ave_value': -2.469275242714463, 'soft_opc': nan} step=7636




2022-04-20 17:06.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170608/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.42 [info     ] FQE_20220420170608: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00015839036688747177, 'time_algorithm_update': 0.0034024643610758953, 'loss': 0.026354846268252706, 'time_step': 0.0036311896450548285, 'init_value': -3.1801161766052246, 'ave_value': -2.4621604089264397, 'soft_opc': nan} step=7802




2022-04-20 17:06.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170608/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.43 [info     ] FQE_20220420170608: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.0001594991569059441, 'time_algorithm_update': 0.00340226328516581, 'loss': 0.027748195454478264, 'time_step': 0.003634485853723733, 'init_value': -3.1960296630859375, 'ave_value': -2.505192082086662, 'soft_opc': nan} step=7968




2022-04-20 17:06.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170608/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.43 [info     ] FQE_20220420170608: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00015803848404482188, 'time_algorithm_update': 0.003448523670794016, 'loss': 0.027647989075518697, 'time_step': 0.0036766299282211855, 'init_value': -3.2255730628967285, 'ave_value': -2.540445972173601, 'soft_opc': nan} step=8134




2022-04-20 17:06.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170608/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:06.44 [info     ] FQE_20220420170608: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00015770814504968114, 'time_algorithm_update': 0.003355304878878306, 'loss': 0.029096925887840252, 'time_step': 0.003585502325770367, 'init_value': -3.199650287628174, 'ave_value': -2.5359100450669323, 'soft_opc': nan} step=8300




2022-04-20 17:06.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170608/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

2022-04-20 17:06.44 [info     ] Directory is created at d3rlpy_logs/FQE_20220420170644
2022-04-20 17:06.44 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 17:06.44 [debug    ] Building models...
2022-04-20 17:06.44 [debug    ] Models have been built.
2022-04-20 17:06.44 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420170644/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/355 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 17:06.46 [info     ] FQE_20220420170644: epoch=1 step=355 epoch=1 metrics={'time_sample_batch': 0.00015981230937259298, 'time_algorithm_update': 0.0034696068562252423, 'loss': 0.022908734970949064, 'time_step': 0.0036989655293209454, 'init_value': -1.2000434398651123, 'ave_value': -1.1670433820904913, 'soft_opc': nan} step=355




2022-04-20 17:06.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170644/model_355.pt


Epoch 2/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:06.47 [info     ] FQE_20220420170644: epoch=2 step=710 epoch=2 metrics={'time_sample_batch': 0.00016254304160534495, 'time_algorithm_update': 0.003429106591453015, 'loss': 0.0226907424654969, 'time_step': 0.003666200772137709, 'init_value': -2.4963669776916504, 'ave_value': -2.457938793374458, 'soft_opc': nan} step=710




2022-04-20 17:06.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170644/model_710.pt


Epoch 3/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:06.49 [info     ] FQE_20220420170644: epoch=3 step=1065 epoch=3 metrics={'time_sample_batch': 0.00016062696215132593, 'time_algorithm_update': 0.0033766954717501787, 'loss': 0.02463064096133474, 'time_step': 0.0036112053293577383, 'init_value': -3.163304090499878, 'ave_value': -3.0848546371189935, 'soft_opc': nan} step=1065




2022-04-20 17:06.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170644/model_1065.pt


Epoch 4/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:06.50 [info     ] FQE_20220420170644: epoch=4 step=1420 epoch=4 metrics={'time_sample_batch': 0.0001608573215108522, 'time_algorithm_update': 0.0033526138520576583, 'loss': 0.030397937200228934, 'time_step': 0.003587771805239395, 'init_value': -4.310821056365967, 'ave_value': -4.261391996355437, 'soft_opc': nan} step=1420




2022-04-20 17:06.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170644/model_1420.pt


Epoch 5/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:06.51 [info     ] FQE_20220420170644: epoch=5 step=1775 epoch=5 metrics={'time_sample_batch': 0.00016258938211790273, 'time_algorithm_update': 0.0034129915103106433, 'loss': 0.03535578996992447, 'time_step': 0.003650907059790383, 'init_value': -4.981225490570068, 'ave_value': -4.911673566365334, 'soft_opc': nan} step=1775




2022-04-20 17:06.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170644/model_1775.pt


Epoch 6/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:06.53 [info     ] FQE_20220420170644: epoch=6 step=2130 epoch=6 metrics={'time_sample_batch': 0.00015962090290768045, 'time_algorithm_update': 0.003343415596115757, 'loss': 0.0438046070183037, 'time_step': 0.0035766614994532627, 'init_value': -5.981644153594971, 'ave_value': -5.936809525772104, 'soft_opc': nan} step=2130




2022-04-20 17:06.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170644/model_2130.pt


Epoch 7/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:06.54 [info     ] FQE_20220420170644: epoch=7 step=2485 epoch=7 metrics={'time_sample_batch': 0.00016123341842436454, 'time_algorithm_update': 0.0034204778536944323, 'loss': 0.05139755131020932, 'time_step': 0.0036584162376296354, 'init_value': -6.620979309082031, 'ave_value': -6.591396176646268, 'soft_opc': nan} step=2485




2022-04-20 17:06.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170644/model_2485.pt


Epoch 8/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:06.56 [info     ] FQE_20220420170644: epoch=8 step=2840 epoch=8 metrics={'time_sample_batch': 0.00016343358536841164, 'time_algorithm_update': 0.0033964204116606377, 'loss': 0.06377472022259739, 'time_step': 0.0036351909100169866, 'init_value': -7.51540470123291, 'ave_value': -7.534746040309872, 'soft_opc': nan} step=2840




2022-04-20 17:06.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170644/model_2840.pt


Epoch 9/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:06.57 [info     ] FQE_20220420170644: epoch=9 step=3195 epoch=9 metrics={'time_sample_batch': 0.0001599647629428917, 'time_algorithm_update': 0.0034385761744539505, 'loss': 0.07485452862062925, 'time_step': 0.0036719919930041675, 'init_value': -8.078653335571289, 'ave_value': -8.178984324873797, 'soft_opc': nan} step=3195




2022-04-20 17:06.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170644/model_3195.pt


Epoch 10/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:06.58 [info     ] FQE_20220420170644: epoch=10 step=3550 epoch=10 metrics={'time_sample_batch': 0.00016211455976459341, 'time_algorithm_update': 0.0034581540336071606, 'loss': 0.08839711556866975, 'time_step': 0.003694590716294839, 'init_value': -8.834487915039062, 'ave_value': -9.045840407338382, 'soft_opc': nan} step=3550




2022-04-20 17:06.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170644/model_3550.pt


Epoch 11/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.00 [info     ] FQE_20220420170644: epoch=11 step=3905 epoch=11 metrics={'time_sample_batch': 0.0001579163779675121, 'time_algorithm_update': 0.0029412041247730523, 'loss': 0.10269741533507763, 'time_step': 0.0031724123887612787, 'init_value': -9.766358375549316, 'ave_value': -10.092490555911757, 'soft_opc': nan} step=3905




2022-04-20 17:07.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170644/model_3905.pt


Epoch 12/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.01 [info     ] FQE_20220420170644: epoch=12 step=4260 epoch=12 metrics={'time_sample_batch': 0.000163101142560932, 'time_algorithm_update': 0.0033943659822705764, 'loss': 0.11941565173402638, 'time_step': 0.0036313782275562553, 'init_value': -9.666104316711426, 'ave_value': -10.094409865977068, 'soft_opc': nan} step=4260




2022-04-20 17:07.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170644/model_4260.pt


Epoch 13/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.03 [info     ] FQE_20220420170644: epoch=13 step=4615 epoch=13 metrics={'time_sample_batch': 0.0001583723954751458, 'time_algorithm_update': 0.0034921773722474004, 'loss': 0.13164196876730297, 'time_step': 0.003722376218983825, 'init_value': -10.407386779785156, 'ave_value': -10.98794842226625, 'soft_opc': nan} step=4615




2022-04-20 17:07.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170644/model_4615.pt


Epoch 14/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.04 [info     ] FQE_20220420170644: epoch=14 step=4970 epoch=14 metrics={'time_sample_batch': 0.00016042212365378796, 'time_algorithm_update': 0.00342258332480847, 'loss': 0.15207975310811275, 'time_step': 0.0036560145901962065, 'init_value': -10.542482376098633, 'ave_value': -11.304995958225147, 'soft_opc': nan} step=4970




2022-04-20 17:07.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170644/model_4970.pt


Epoch 15/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.05 [info     ] FQE_20220420170644: epoch=15 step=5325 epoch=15 metrics={'time_sample_batch': 0.00015996341973962918, 'time_algorithm_update': 0.0034911014664341026, 'loss': 0.16964190610697572, 'time_step': 0.003724010897354341, 'init_value': -11.030123710632324, 'ave_value': -12.049982993237608, 'soft_opc': nan} step=5325




2022-04-20 17:07.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170644/model_5325.pt


Epoch 16/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.07 [info     ] FQE_20220420170644: epoch=16 step=5680 epoch=16 metrics={'time_sample_batch': 0.00016406018969038842, 'time_algorithm_update': 0.0034810704244694238, 'loss': 0.18831121441449078, 'time_step': 0.003716542687214596, 'init_value': -11.171258926391602, 'ave_value': -12.343127100992387, 'soft_opc': nan} step=5680




2022-04-20 17:07.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170644/model_5680.pt


Epoch 17/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.08 [info     ] FQE_20220420170644: epoch=17 step=6035 epoch=17 metrics={'time_sample_batch': 0.00016090299042177873, 'time_algorithm_update': 0.0034981895500505473, 'loss': 0.20691235130521612, 'time_step': 0.0037315549984784195, 'init_value': -11.085090637207031, 'ave_value': -12.627181508151414, 'soft_opc': nan} step=6035




2022-04-20 17:07.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170644/model_6035.pt


Epoch 18/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.10 [info     ] FQE_20220420170644: epoch=18 step=6390 epoch=18 metrics={'time_sample_batch': 0.0001589331828372579, 'time_algorithm_update': 0.0034598350524902345, 'loss': 0.22137537657375067, 'time_step': 0.003689725634077905, 'init_value': -11.402677536010742, 'ave_value': -13.150515326447161, 'soft_opc': nan} step=6390




2022-04-20 17:07.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170644/model_6390.pt


Epoch 19/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.11 [info     ] FQE_20220420170644: epoch=19 step=6745 epoch=19 metrics={'time_sample_batch': 0.0001581514385384573, 'time_algorithm_update': 0.0034045192557321468, 'loss': 0.24274930866881156, 'time_step': 0.0036376671052314865, 'init_value': -11.705245018005371, 'ave_value': -13.686306766070608, 'soft_opc': nan} step=6745




2022-04-20 17:07.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170644/model_6745.pt


Epoch 20/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.12 [info     ] FQE_20220420170644: epoch=20 step=7100 epoch=20 metrics={'time_sample_batch': 0.00016495006185182385, 'time_algorithm_update': 0.0035299294431444623, 'loss': 0.2594095465215579, 'time_step': 0.0037694769845881934, 'init_value': -11.580267906188965, 'ave_value': -13.76037407550419, 'soft_opc': nan} step=7100




2022-04-20 17:07.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170644/model_7100.pt


Epoch 21/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.14 [info     ] FQE_20220420170644: epoch=21 step=7455 epoch=21 metrics={'time_sample_batch': 0.0001612488652618838, 'time_algorithm_update': 0.0034289272738174652, 'loss': 0.2794997602386374, 'time_step': 0.0036663129296101316, 'init_value': -11.44876766204834, 'ave_value': -13.819103903291769, 'soft_opc': nan} step=7455




2022-04-20 17:07.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170644/model_7455.pt


Epoch 22/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.15 [info     ] FQE_20220420170644: epoch=22 step=7810 epoch=22 metrics={'time_sample_batch': 0.00016317166073221556, 'time_algorithm_update': 0.0034256780651253714, 'loss': 0.2909115352156297, 'time_step': 0.0036622779470094494, 'init_value': -11.824960708618164, 'ave_value': -14.56383092424124, 'soft_opc': nan} step=7810




2022-04-20 17:07.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170644/model_7810.pt


Epoch 23/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.17 [info     ] FQE_20220420170644: epoch=23 step=8165 epoch=23 metrics={'time_sample_batch': 0.00016427375900913293, 'time_algorithm_update': 0.003482147001884353, 'loss': 0.3115553237182993, 'time_step': 0.003721801327987456, 'init_value': -12.156582832336426, 'ave_value': -14.98172479809299, 'soft_opc': nan} step=8165




2022-04-20 17:07.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170644/model_8165.pt


Epoch 24/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.18 [info     ] FQE_20220420170644: epoch=24 step=8520 epoch=24 metrics={'time_sample_batch': 0.000164130707861672, 'time_algorithm_update': 0.0034809858026638835, 'loss': 0.33185225013695974, 'time_step': 0.0037177173184676907, 'init_value': -12.640230178833008, 'ave_value': -15.621149436036841, 'soft_opc': nan} step=8520




2022-04-20 17:07.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170644/model_8520.pt


Epoch 25/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.20 [info     ] FQE_20220420170644: epoch=25 step=8875 epoch=25 metrics={'time_sample_batch': 0.00016023138879050672, 'time_algorithm_update': 0.003454397094081825, 'loss': 0.35623895071027145, 'time_step': 0.003686332702636719, 'init_value': -12.687771797180176, 'ave_value': -15.7985434142947, 'soft_opc': nan} step=8875




2022-04-20 17:07.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170644/model_8875.pt


Epoch 26/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.21 [info     ] FQE_20220420170644: epoch=26 step=9230 epoch=26 metrics={'time_sample_batch': 0.00016433487475757869, 'time_algorithm_update': 0.003483733996539049, 'loss': 0.36425228915898733, 'time_step': 0.0037232439282914283, 'init_value': -12.761731147766113, 'ave_value': -16.107588428614644, 'soft_opc': nan} step=9230




2022-04-20 17:07.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170644/model_9230.pt


Epoch 27/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.22 [info     ] FQE_20220420170644: epoch=27 step=9585 epoch=27 metrics={'time_sample_batch': 0.00016250677511725627, 'time_algorithm_update': 0.0034142299437187086, 'loss': 0.3702944088150078, 'time_step': 0.0036506558807802873, 'init_value': -12.924777030944824, 'ave_value': -16.298531100799735, 'soft_opc': nan} step=9585




2022-04-20 17:07.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170644/model_9585.pt


Epoch 28/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.24 [info     ] FQE_20220420170644: epoch=28 step=9940 epoch=28 metrics={'time_sample_batch': 0.00016342686935209893, 'time_algorithm_update': 0.0035365413612043354, 'loss': 0.3811049154474282, 'time_step': 0.003776643645595497, 'init_value': -12.821751594543457, 'ave_value': -16.2483348500813, 'soft_opc': nan} step=9940




2022-04-20 17:07.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170644/model_9940.pt


Epoch 29/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.25 [info     ] FQE_20220420170644: epoch=29 step=10295 epoch=29 metrics={'time_sample_batch': 0.00016253162437761333, 'time_algorithm_update': 0.003483695715246066, 'loss': 0.3924219727621112, 'time_step': 0.0037208570560938875, 'init_value': -13.343826293945312, 'ave_value': -16.739985324852313, 'soft_opc': nan} step=10295




2022-04-20 17:07.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170644/model_10295.pt


Epoch 30/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.27 [info     ] FQE_20220420170644: epoch=30 step=10650 epoch=30 metrics={'time_sample_batch': 0.00016072971720091053, 'time_algorithm_update': 0.003491647478560327, 'loss': 0.41495555770439163, 'time_step': 0.003725075385939907, 'init_value': -13.484827995300293, 'ave_value': -16.98103074248607, 'soft_opc': nan} step=10650




2022-04-20 17:07.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170644/model_10650.pt


Epoch 31/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.28 [info     ] FQE_20220420170644: epoch=31 step=11005 epoch=31 metrics={'time_sample_batch': 0.00016009639686262103, 'time_algorithm_update': 0.003513444981104891, 'loss': 0.41585041243110743, 'time_step': 0.0037489300042810574, 'init_value': -13.48488712310791, 'ave_value': -16.98410343622149, 'soft_opc': nan} step=11005




2022-04-20 17:07.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170644/model_11005.pt


Epoch 32/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.30 [info     ] FQE_20220420170644: epoch=32 step=11360 epoch=32 metrics={'time_sample_batch': 0.00016300846153581645, 'time_algorithm_update': 0.003463349543826681, 'loss': 0.43290367320075, 'time_step': 0.003701801703009807, 'init_value': -13.553712844848633, 'ave_value': -17.01435285636357, 'soft_opc': nan} step=11360




2022-04-20 17:07.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170644/model_11360.pt


Epoch 33/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.31 [info     ] FQE_20220420170644: epoch=33 step=11715 epoch=33 metrics={'time_sample_batch': 0.0001643268155380034, 'time_algorithm_update': 0.0035217983621946523, 'loss': 0.436766408069033, 'time_step': 0.0037613203827763946, 'init_value': -13.447892189025879, 'ave_value': -16.890596094988336, 'soft_opc': nan} step=11715




2022-04-20 17:07.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170644/model_11715.pt


Epoch 34/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.32 [info     ] FQE_20220420170644: epoch=34 step=12070 epoch=34 metrics={'time_sample_batch': 0.00016252826636945698, 'time_algorithm_update': 0.0034873183344451475, 'loss': 0.4528171157962839, 'time_step': 0.003722943050760618, 'init_value': -13.1395845413208, 'ave_value': -16.710234586532053, 'soft_opc': nan} step=12070




2022-04-20 17:07.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170644/model_12070.pt


Epoch 35/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.34 [info     ] FQE_20220420170644: epoch=35 step=12425 epoch=35 metrics={'time_sample_batch': 0.00016211455976459341, 'time_algorithm_update': 0.003466693448348784, 'loss': 0.46998288470359756, 'time_step': 0.0037040455240598866, 'init_value': -14.377738952636719, 'ave_value': -17.628090653245838, 'soft_opc': nan} step=12425




2022-04-20 17:07.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170644/model_12425.pt


Epoch 36/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.35 [info     ] FQE_20220420170644: epoch=36 step=12780 epoch=36 metrics={'time_sample_batch': 0.00016197419502365757, 'time_algorithm_update': 0.003462305203290053, 'loss': 0.4909334389938855, 'time_step': 0.0036987190515222683, 'init_value': -14.629305839538574, 'ave_value': -17.804212304993868, 'soft_opc': nan} step=12780




2022-04-20 17:07.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170644/model_12780.pt


Epoch 37/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.37 [info     ] FQE_20220420170644: epoch=37 step=13135 epoch=37 metrics={'time_sample_batch': 0.00016471432967924736, 'time_algorithm_update': 0.0034537120604179274, 'loss': 0.5061859454697286, 'time_step': 0.0036931796812675367, 'init_value': -14.790865898132324, 'ave_value': -17.927756914881897, 'soft_opc': nan} step=13135




2022-04-20 17:07.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170644/model_13135.pt


Epoch 38/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.38 [info     ] FQE_20220420170644: epoch=38 step=13490 epoch=38 metrics={'time_sample_batch': 0.00016271161361479424, 'time_algorithm_update': 0.0034539988343144806, 'loss': 0.5146693228416039, 'time_step': 0.003691335463188064, 'init_value': -14.855637550354004, 'ave_value': -17.954342061791333, 'soft_opc': nan} step=13490




2022-04-20 17:07.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170644/model_13490.pt


Epoch 39/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.39 [info     ] FQE_20220420170644: epoch=39 step=13845 epoch=39 metrics={'time_sample_batch': 0.00016281705507090394, 'time_algorithm_update': 0.003464878780741087, 'loss': 0.5376011111419385, 'time_step': 0.0037008896679945396, 'init_value': -15.484079360961914, 'ave_value': -18.478402604289265, 'soft_opc': nan} step=13845




2022-04-20 17:07.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170644/model_13845.pt


Epoch 40/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.41 [info     ] FQE_20220420170644: epoch=40 step=14200 epoch=40 metrics={'time_sample_batch': 0.00016556995015748789, 'time_algorithm_update': 0.0035425790598694706, 'loss': 0.5614623947672441, 'time_step': 0.003784174986288581, 'init_value': -15.684405326843262, 'ave_value': -18.62386827250053, 'soft_opc': nan} step=14200




2022-04-20 17:07.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170644/model_14200.pt


Epoch 41/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.42 [info     ] FQE_20220420170644: epoch=41 step=14555 epoch=41 metrics={'time_sample_batch': 0.00016402862441371863, 'time_algorithm_update': 0.003491791201309419, 'loss': 0.5661755877114097, 'time_step': 0.003731473063079404, 'init_value': -15.935098648071289, 'ave_value': -18.821727817770252, 'soft_opc': nan} step=14555




2022-04-20 17:07.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170644/model_14555.pt


Epoch 42/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.44 [info     ] FQE_20220420170644: epoch=42 step=14910 epoch=42 metrics={'time_sample_batch': 0.00015993789887764086, 'time_algorithm_update': 0.0034777876356957664, 'loss': 0.574140507045766, 'time_step': 0.0037124600208980935, 'init_value': -16.322223663330078, 'ave_value': -19.153116391688833, 'soft_opc': nan} step=14910




2022-04-20 17:07.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170644/model_14910.pt


Epoch 43/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.45 [info     ] FQE_20220420170644: epoch=43 step=15265 epoch=43 metrics={'time_sample_batch': 0.00016162429057376486, 'time_algorithm_update': 0.003479028755510357, 'loss': 0.590850227410105, 'time_step': 0.0037142988661645164, 'init_value': -16.839624404907227, 'ave_value': -19.54334916007517, 'soft_opc': nan} step=15265




2022-04-20 17:07.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170644/model_15265.pt


Epoch 44/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.47 [info     ] FQE_20220420170644: epoch=44 step=15620 epoch=44 metrics={'time_sample_batch': 0.0001634107509129484, 'time_algorithm_update': 0.003443053742529641, 'loss': 0.6136656233542402, 'time_step': 0.0036801573256371725, 'init_value': -17.448434829711914, 'ave_value': -19.957282253348076, 'soft_opc': nan} step=15620




2022-04-20 17:07.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170644/model_15620.pt


Epoch 45/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.48 [info     ] FQE_20220420170644: epoch=45 step=15975 epoch=45 metrics={'time_sample_batch': 0.00016584262041978434, 'time_algorithm_update': 0.0034753362897416234, 'loss': 0.6172049715223027, 'time_step': 0.0037164298581405424, 'init_value': -17.033092498779297, 'ave_value': -19.95146096529801, 'soft_opc': nan} step=15975




2022-04-20 17:07.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170644/model_15975.pt


Epoch 46/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.49 [info     ] FQE_20220420170644: epoch=46 step=16330 epoch=46 metrics={'time_sample_batch': 0.0001618606943479726, 'time_algorithm_update': 0.0034673804968175753, 'loss': 0.6126461207184574, 'time_step': 0.0037023953988518515, 'init_value': -16.479900360107422, 'ave_value': -19.41974502646459, 'soft_opc': nan} step=16330




2022-04-20 17:07.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170644/model_16330.pt


Epoch 47/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.51 [info     ] FQE_20220420170644: epoch=47 step=16685 epoch=47 metrics={'time_sample_batch': 0.00016441613855496258, 'time_algorithm_update': 0.003436830010212643, 'loss': 0.6348875509706181, 'time_step': 0.003676208308045293, 'init_value': -17.04293441772461, 'ave_value': -19.858158551541994, 'soft_opc': nan} step=16685




2022-04-20 17:07.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170644/model_16685.pt


Epoch 48/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.52 [info     ] FQE_20220420170644: epoch=48 step=17040 epoch=48 metrics={'time_sample_batch': 0.0001634631358401876, 'time_algorithm_update': 0.0034691192734409384, 'loss': 0.6306743316562243, 'time_step': 0.0037058037771305567, 'init_value': -17.244792938232422, 'ave_value': -19.997468724392327, 'soft_opc': nan} step=17040




2022-04-20 17:07.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170644/model_17040.pt


Epoch 49/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.54 [info     ] FQE_20220420170644: epoch=49 step=17395 epoch=49 metrics={'time_sample_batch': 0.00016313673744738943, 'time_algorithm_update': 0.0034730172493088415, 'loss': 0.6447101175470251, 'time_step': 0.0037094357987524757, 'init_value': -17.237882614135742, 'ave_value': -20.104278878299002, 'soft_opc': nan} step=17395




2022-04-20 17:07.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170644/model_17395.pt


Epoch 50/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:07.55 [info     ] FQE_20220420170644: epoch=50 step=17750 epoch=50 metrics={'time_sample_batch': 0.000162659228687555, 'time_algorithm_update': 0.0034354727033158424, 'loss': 0.6387337003900132, 'time_step': 0.0036739322501169124, 'init_value': -17.068634033203125, 'ave_value': -19.970111272223246, 'soft_opc': nan} step=17750




2022-04-20 17:07.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420170644/model_17750.pt
search iteration:  11
using hyper params:  [0.00459858087145252, 0.00387744425285509, 1.2844250312785716e-05, 1]
2022-04-20 17:07.55 [debug    ] RoundIterator is selected.
2022-04-20 17:07.55 [info     ] Directory is created at d3rlpy_logs/CQL_20220420170755
2022-04-20 17:07.55 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 17:07.55 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 17:07.55 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220420170755/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.00459858087145252, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weig

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:08.03 [info     ] CQL_20220420170755: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.00029153084894369916, 'time_algorithm_update': 0.0227235241940147, 'temp_loss': 4.712239753955986, 'temp': 0.9978167573262376, 'alpha_loss': -13.280734919665152, 'alpha': 1.015950884735375, 'critic_loss': 18.127817393743502, 'actor_loss': -1.2788306796237041, 'time_step': 0.023106127454523454, 'td_error': 4.188347784694726, 'init_value': -1.5721323490142822, 'ave_value': 0.18459284560771677} step=342
2022-04-20 17:08.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420170755/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:08.12 [info     ] CQL_20220420170755: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.0002961402748063294, 'time_algorithm_update': 0.02340091878210592, 'temp_loss': 3.7874309096420022, 'temp': 0.9937937522840778, 'alpha_loss': -5.5893186205311824, 'alpha': 1.03948912564774, 'critic_loss': 21.921651868095175, 'actor_loss': 0.1622278947622804, 'time_step': 0.023791865298622532, 'td_error': 4.299166715242889, 'init_value': -2.657963275909424, 'ave_value': 0.44093942569317046} step=684
2022-04-20 17:08.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420170755/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:08.21 [info     ] CQL_20220420170755: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.0003056010307624326, 'time_algorithm_update': 0.02430285766110783, 'temp_loss': 3.0850522553014477, 'temp': 0.9902331473534567, 'alpha_loss': -1.250834697808482, 'alpha': 1.0514477386809231, 'critic_loss': 37.662681384393345, 'actor_loss': 1.4840966460692604, 'time_step': 0.02470379196412382, 'td_error': 5.325000429021295, 'init_value': -5.392075538635254, 'ave_value': -0.2806723335230941} step=1026
2022-04-20 17:08.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420170755/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:08.30 [info     ] CQL_20220420170755: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.00030760458338330364, 'time_algorithm_update': 0.025014108384561817, 'temp_loss': 2.579684707504964, 'temp': 0.9869446308292144, 'alpha_loss': 1.897021930573892, 'alpha': 1.0490431897124353, 'critic_loss': 59.1012421323542, 'actor_loss': 2.795801562175416, 'time_step': 0.025419225469667312, 'td_error': 6.711132167017082, 'init_value': -7.233445644378662, 'ave_value': -0.1617349961775917} step=1368
2022-04-20 17:08.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420170755/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:08.39 [info     ] CQL_20220420170755: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.0003033039862649483, 'time_algorithm_update': 0.025506573810912016, 'temp_loss': 2.182871660991022, 'temp': 0.9838472634379627, 'alpha_loss': 4.198144544286338, 'alpha': 1.0304386797007064, 'critic_loss': 84.50366155167072, 'actor_loss': 4.031762981623934, 'time_step': 0.02590730036908423, 'td_error': 9.1970605521164, 'init_value': -10.465431213378906, 'ave_value': -1.0401171073424924} step=1710
2022-04-20 17:08.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420170755/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:08.48 [info     ] CQL_20220420170755: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.0002894666459825304, 'time_algorithm_update': 0.024072315957811143, 'temp_loss': 1.8847968058976514, 'temp': 0.980896900968942, 'alpha_loss': 6.03072282445361, 'alpha': 0.997789845654839, 'critic_loss': 113.50952050281548, 'actor_loss': 5.406468923328913, 'time_step': 0.02444999538667021, 'td_error': 11.31540108212343, 'init_value': -14.832101821899414, 'ave_value': -3.037676602373789} step=2052
2022-04-20 17:08.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420170755/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:08.57 [info     ] CQL_20220420170755: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.00030583666082014115, 'time_algorithm_update': 0.025482219562195894, 'temp_loss': 1.6505399224353812, 'temp': 0.9780293788477691, 'alpha_loss': 7.442149539439999, 'alpha': 0.9583350254429711, 'critic_loss': 146.09032069869906, 'actor_loss': 7.0262567850581386, 'time_step': 0.025885467640837732, 'td_error': 13.681076764070392, 'init_value': -16.928272247314453, 'ave_value': -2.84719772406258} step=2394
2022-04-20 17:08.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420170755/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:09.07 [info     ] CQL_20220420170755: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.0003067394446211252, 'time_algorithm_update': 0.025487105051676433, 'temp_loss': 1.4392383288570314, 'temp': 0.9752650630404378, 'alpha_loss': 8.567808644813404, 'alpha': 0.9178801341014996, 'critic_loss': 183.3077292191355, 'actor_loss': 8.87305097830923, 'time_step': 0.025890340581972, 'td_error': 19.597641926936223, 'init_value': -21.557964324951172, 'ave_value': -5.441974750993488} step=2736
2022-04-20 17:09.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420170755/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:09.16 [info     ] CQL_20220420170755: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.00030140430606596653, 'time_algorithm_update': 0.025241521366855556, 'temp_loss': 1.2345861201746422, 'temp': 0.9726148810651567, 'alpha_loss': 9.433903961850886, 'alpha': 0.8792258914102588, 'critic_loss': 225.4326153136136, 'actor_loss': 10.839151272299693, 'time_step': 0.02563639900140595, 'td_error': 21.800293052101708, 'init_value': -24.694143295288086, 'ave_value': -6.02700646164181} step=3078
2022-04-20 17:09.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420170755/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:09.25 [info     ] CQL_20220420170755: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.0003039049126251399, 'time_algorithm_update': 0.02555411670640198, 'temp_loss': 1.0299801103204314, 'temp': 0.9700522903810468, 'alpha_loss': 10.19516498721831, 'alpha': 0.8429126924241496, 'critic_loss': 271.05110690468234, 'actor_loss': 12.933801529700297, 'time_step': 0.025955277576781156, 'td_error': 25.50060742114883, 'init_value': -30.00371742248535, 'ave_value': -8.496055482432649} step=3420
2022-04-20 17:09.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420170755/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:09.34 [info     ] CQL_20220420170755: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.00030495688232064943, 'time_algorithm_update': 0.02515537766685263, 'temp_loss': 0.8543082279158615, 'temp': 0.9677273631095886, 'alpha_loss': 10.875265436562879, 'alpha': 0.8088880909813775, 'critic_loss': 320.1859750134206, 'actor_loss': 15.278468118076436, 'time_step': 0.025558108474776062, 'td_error': 34.76717084848102, 'init_value': -34.8818244934082, 'ave_value': -10.850641914579246} step=3762
2022-04-20 17:09.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420170755/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:09.44 [info     ] CQL_20220420170755: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.000309129904585275, 'time_algorithm_update': 0.025651453531276413, 'temp_loss': 0.7408852028872883, 'temp': 0.9654778290910331, 'alpha_loss': 11.223405732048882, 'alpha': 0.7771053678459592, 'critic_loss': 372.5351360834133, 'actor_loss': 17.60812695263422, 'time_step': 0.02605969306321172, 'td_error': 36.84531347011188, 'init_value': -39.10350799560547, 'ave_value': -12.124627214842015} step=4104
2022-04-20 17:09.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420170755/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:09.53 [info     ] CQL_20220420170755: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.0003085847486529434, 'time_algorithm_update': 0.0252514624456216, 'temp_loss': 0.5998572171910813, 'temp': 0.9633523185699306, 'alpha_loss': 11.935923699049921, 'alpha': 0.7470171059432783, 'critic_loss': 428.280381230583, 'actor_loss': 20.36210982562506, 'time_step': 0.025657321974547984, 'td_error': 36.35900150655816, 'init_value': -45.94733428955078, 'ave_value': -15.604959661466582} step=4446
2022-04-20 17:09.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420170755/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:10.02 [info     ] CQL_20220420170755: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.0003083546956380208, 'time_algorithm_update': 0.025565474354035674, 'temp_loss': 0.4782617485575509, 'temp': 0.9613941439062531, 'alpha_loss': 11.956217870377658, 'alpha': 0.7192670146972813, 'critic_loss': 486.3536255596674, 'actor_loss': 23.013477674004626, 'time_step': 0.025970820097895395, 'td_error': 62.266506072288095, 'init_value': -49.86008071899414, 'ave_value': -17.07281574795375} step=4788
2022-04-20 17:10.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420170755/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:10.11 [info     ] CQL_20220420170755: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.00030595308158829897, 'time_algorithm_update': 0.02519468745292976, 'temp_loss': 0.384609119618061, 'temp': 0.9597275919384427, 'alpha_loss': 11.784712378741705, 'alpha': 0.6926648412880144, 'critic_loss': 549.1389600965712, 'actor_loss': 25.795197419952927, 'time_step': 0.02559901747787208, 'td_error': 66.7700861511628, 'init_value': -57.0338020324707, 'ave_value': -20.48452275385191} step=5130
2022-04-20 17:10.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420170755/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:10.21 [info     ] CQL_20220420170755: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.0003119233058907135, 'time_algorithm_update': 0.025527463321797332, 'temp_loss': 0.3317887176492671, 'temp': 0.9579567466562952, 'alpha_loss': 11.475159236562183, 'alpha': 0.6692928110646923, 'critic_loss': 606.9767895971822, 'actor_loss': 28.4103813729091, 'time_step': 0.02593686984993561, 'td_error': 61.56355071249261, 'init_value': -60.687522888183594, 'ave_value': -20.57646028693732} step=5472
2022-04-20 17:10.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420170755/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:10.30 [info     ] CQL_20220420170755: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.0003067993978310747, 'time_algorithm_update': 0.02546076816424989, 'temp_loss': 0.3003616480821232, 'temp': 0.9563097978195949, 'alpha_loss': 11.092269526587593, 'alpha': 0.6470936609987628, 'critic_loss': 666.8271653917101, 'actor_loss': 31.117576058147943, 'time_step': 0.025866533580579255, 'td_error': 66.14504968611173, 'init_value': -69.82063293457031, 'ave_value': -25.320231836325412} step=5814
2022-04-20 17:10.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420170755/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:10.39 [info     ] CQL_20220420170755: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.00031240014304891663, 'time_algorithm_update': 0.02553320140169378, 'temp_loss': 0.21068392893332138, 'temp': 0.9548070469097785, 'alpha_loss': 10.767985486147696, 'alpha': 0.6260887604010733, 'critic_loss': 722.2498839975101, 'actor_loss': 33.54163756008037, 'time_step': 0.025942955797875835, 'td_error': 49.79105999523452, 'init_value': -72.30335998535156, 'ave_value': -25.93827491528279} step=6156
2022-04-20 17:10.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420170755/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:10.48 [info     ] CQL_20220420170755: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.00030417749059130574, 'time_algorithm_update': 0.0256234511994479, 'temp_loss': 0.14659411706335362, 'temp': 0.9535885359111586, 'alpha_loss': 10.406554422880474, 'alpha': 0.6061207910030209, 'critic_loss': 777.3591306809096, 'actor_loss': 35.963754319308094, 'time_step': 0.026027415928087737, 'td_error': 70.17995520024499, 'init_value': -79.40716552734375, 'ave_value': -28.08010300250204} step=6498
2022-04-20 17:10.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420170755/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:10.58 [info     ] CQL_20220420170755: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.00030828986251563357, 'time_algorithm_update': 0.025365104452211257, 'temp_loss': 0.06848282432891646, 'temp': 0.9528691472365842, 'alpha_loss': 10.169598427432323, 'alpha': 0.58694288343714, 'critic_loss': 829.8582431726288, 'actor_loss': 38.3421554732741, 'time_step': 0.025770358174865008, 'td_error': 58.79309223899469, 'init_value': -84.33976745605469, 'ave_value': -30.955262333358732} step=6840
2022-04-20 17:10.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420170755/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:11.07 [info     ] CQL_20220420170755: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.00030808560332359626, 'time_algorithm_update': 0.02543950499149791, 'temp_loss': 0.041271316944158566, 'temp': 0.9523180245656019, 'alpha_loss': 9.986408987937615, 'alpha': 0.5686328791038334, 'critic_loss': 883.582195616605, 'actor_loss': 40.82623577675624, 'time_step': 0.025847024387783475, 'td_error': 63.693158957077095, 'init_value': -92.39689636230469, 'ave_value': -34.34437592085954} step=7182
2022-04-20 17:11.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420170755/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:11.16 [info     ] CQL_20220420170755: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.00030590149394252845, 'time_algorithm_update': 0.02522266329380504, 'temp_loss': -0.019582629764768465, 'temp': 0.9522083560277147, 'alpha_loss': 9.90292239328574, 'alpha': 0.5506094342086747, 'critic_loss': 938.1298796001233, 'actor_loss': 43.37486480132878, 'time_step': 0.025624887985095643, 'td_error': 86.0012072086803, 'init_value': -95.6567153930664, 'ave_value': -35.351103150071324} step=7524
2022-04-20 17:11.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420170755/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:11.25 [info     ] CQL_20220420170755: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.0003075083793952451, 'time_algorithm_update': 0.02419623227147331, 'temp_loss': -0.03829794066647688, 'temp': 0.9525653228425143, 'alpha_loss': 9.699705142026756, 'alpha': 0.5331687697193079, 'critic_loss': 987.6927766855697, 'actor_loss': 45.5012914423357, 'time_step': 0.024600373374091253, 'td_error': 74.8580391281909, 'init_value': -100.32218170166016, 'ave_value': -37.82601677689466} step=7866
2022-04-20 17:11.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420170755/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:11.34 [info     ] CQL_20220420170755: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.000307154237178334, 'time_algorithm_update': 0.02396559645557961, 'temp_loss': -0.11336497849191142, 'temp': 0.9533766961237143, 'alpha_loss': 9.365550547315364, 'alpha': 0.5163560051318498, 'critic_loss': 1038.320110834133, 'actor_loss': 47.73447873299582, 'time_step': 0.024375200968736795, 'td_error': 73.13354570172159, 'init_value': -105.94927978515625, 'ave_value': -39.52218980535731} step=8208
2022-04-20 17:11.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420170755/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:11.42 [info     ] CQL_20220420170755: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.00030589033985695645, 'time_algorithm_update': 0.023896782718903838, 'temp_loss': -0.12655817228233257, 'temp': 0.9546937407457341, 'alpha_loss': 9.09400875108284, 'alpha': 0.5003342521295213, 'critic_loss': 1086.4085384614286, 'actor_loss': 49.945186754416305, 'time_step': 0.024299862092001398, 'td_error': 84.26199299190912, 'init_value': -113.01004791259766, 'ave_value': -42.80557854474128} step=8550
2022-04-20 17:11.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420170755/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:11.51 [info     ] CQL_20220420170755: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.000304342710483841, 'time_algorithm_update': 0.02383058391816435, 'temp_loss': -0.14300760551633543, 'temp': 0.9564105350720254, 'alpha_loss': 9.10973991288079, 'alpha': 0.48466537535539145, 'critic_loss': 1135.5739737170481, 'actor_loss': 52.10091344376057, 'time_step': 0.02423021249603807, 'td_error': 80.04927534743251, 'init_value': -117.84989929199219, 'ave_value': -44.47531532181276} step=8892
2022-04-20 17:11.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420170755/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:12.00 [info     ] CQL_20220420170755: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.00030657910464102763, 'time_algorithm_update': 0.02395574879228023, 'temp_loss': -0.133754377111749, 'temp': 0.9579782423220182, 'alpha_loss': 8.829313749458358, 'alpha': 0.4694773683771055, 'critic_loss': 1187.5665111876372, 'actor_loss': 54.498888919228, 'time_step': 0.02435505947871515, 'td_error': 96.20895651968554, 'init_value': -120.7271499633789, 'ave_value': -45.25231951149734} step=9234
2022-04-20 17:12.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420170755/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:12.08 [info     ] CQL_20220420170755: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.00030570420605397365, 'time_algorithm_update': 0.023878448190744857, 'temp_loss': -0.1822636079816529, 'temp': 0.9600667465499967, 'alpha_loss': 8.64420297131901, 'alpha': 0.45468540611671426, 'critic_loss': 1240.8682361625092, 'actor_loss': 56.8987602657742, 'time_step': 0.02428317209433394, 'td_error': 91.73437913377118, 'init_value': -129.05577087402344, 'ave_value': -49.41652996298429} step=9576
2022-04-20 17:12.08 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420170755/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:12.17 [info     ] CQL_20220420170755: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.00030294844978734066, 'time_algorithm_update': 0.02382268333992763, 'temp_loss': -0.1693487198021241, 'temp': 0.9621529570448468, 'alpha_loss': 8.537055152201512, 'alpha': 0.44043117822610844, 'critic_loss': 1291.4654551723547, 'actor_loss': 59.091202708015665, 'time_step': 0.02422284103973567, 'td_error': 94.84977760671399, 'init_value': -133.7958984375, 'ave_value': -49.08809813976288} step=9918
2022-04-20 17:12.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420170755/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:12.26 [info     ] CQL_20220420170755: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.0003074540032280816, 'time_algorithm_update': 0.023900117093359517, 'temp_loss': -0.2257295093711531, 'temp': 0.9645683934465487, 'alpha_loss': 8.24876154375355, 'alpha': 0.4266746377038677, 'critic_loss': 1341.2878489354898, 'actor_loss': 61.23701107850549, 'time_step': 0.02430574866066202, 'td_error': 91.88478337000976, 'init_value': -136.67291259765625, 'ave_value': -51.1268784914253} step=10260
2022-04-20 17:12.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420170755/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:12.35 [info     ] CQL_20220420170755: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.0003067770896599307, 'time_algorithm_update': 0.02394219936683164, 'temp_loss': -0.21407843812515862, 'temp': 0.9672751214072021, 'alpha_loss': 8.16187223774648, 'alpha': 0.41331366732803704, 'critic_loss': 1393.3534331739995, 'actor_loss': 63.569227776332205, 'time_step': 0.02434693930441873, 'td_error': 108.54255992850713, 'init_value': -144.54421997070312, 'ave_value': -54.81269279388694} step=10602
2022-04-20 17:12.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420170755/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:12.43 [info     ] CQL_20220420170755: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.00029923762494360494, 'time_algorithm_update': 0.023910162044547455, 'temp_loss': -0.2116351687943029, 'temp': 0.9697995614587215, 'alpha_loss': 7.93311463740834, 'alpha': 0.400294711802438, 'critic_loss': 1445.9822601853755, 'actor_loss': 66.01142626756813, 'time_step': 0.02430672046036748, 'td_error': 93.45227814780978, 'init_value': -150.23049926757812, 'ave_value': -56.38325298970884} step=10944
2022-04-20 17:12.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420170755/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:12.52 [info     ] CQL_20220420170755: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.0003069325497275905, 'time_algorithm_update': 0.023957030814990662, 'temp_loss': -0.22205973274962246, 'temp': 0.9721247335972144, 'alpha_loss': 7.814359165771663, 'alpha': 0.3878515336597175, 'critic_loss': 1499.1218529416803, 'actor_loss': 68.25681574860512, 'time_step': 0.02435955248380962, 'td_error': 110.08489730165194, 'init_value': -157.68406677246094, 'ave_value': -59.219225437952595} step=11286
2022-04-20 17:12.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420170755/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:13.01 [info     ] CQL_20220420170755: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.00030427299744901603, 'time_algorithm_update': 0.023878705431843363, 'temp_loss': -0.22799753365025185, 'temp': 0.9749693621320334, 'alpha_loss': 7.5044323254747, 'alpha': 0.37571402495367484, 'critic_loss': 1550.8686298571135, 'actor_loss': 70.56039646215606, 'time_step': 0.024280029430724027, 'td_error': 118.97013129752621, 'init_value': -162.7728271484375, 'ave_value': -61.77767589012782} step=11628
2022-04-20 17:13.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420170755/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:13.09 [info     ] CQL_20220420170755: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.00031069008230465893, 'time_algorithm_update': 0.02399639358297426, 'temp_loss': -0.2199189339857004, 'temp': 0.977388585694352, 'alpha_loss': 7.3282089623791435, 'alpha': 0.3642069792364076, 'critic_loss': 1604.6908658568623, 'actor_loss': 72.81899745561923, 'time_step': 0.024405949297007064, 'td_error': 128.64410334213804, 'init_value': -165.54547119140625, 'ave_value': -61.55615082325162} step=11970
2022-04-20 17:13.09 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420170755/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:13.18 [info     ] CQL_20220420170755: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.0003099615810907375, 'time_algorithm_update': 0.02393364139467652, 'temp_loss': -0.25957896302274447, 'temp': 0.9802038589416192, 'alpha_loss': 7.222484719683552, 'alpha': 0.35289277667887725, 'critic_loss': 1657.1423207779376, 'actor_loss': 75.12764986495526, 'time_step': 0.024341141968442684, 'td_error': 123.52867312261046, 'init_value': -171.328125, 'ave_value': -64.37414273629318} step=12312
2022-04-20 17:13.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420170755/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:13.27 [info     ] CQL_20220420170755: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.0003024876466271473, 'time_algorithm_update': 0.023945695475528116, 'temp_loss': -0.22943627168778438, 'temp': 0.9828641249073876, 'alpha_loss': 6.990660458280329, 'alpha': 0.3419438044578708, 'critic_loss': 1709.9470803779468, 'actor_loss': 77.36068096495511, 'time_step': 0.024347812808745088, 'td_error': 127.79105482046924, 'init_value': -176.1519775390625, 'ave_value': -66.42279932143452} step=12654
2022-04-20 17:13.27 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420170755/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:13.36 [info     ] CQL_20220420170755: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.00030488926067686917, 'time_algorithm_update': 0.023963580354612473, 'temp_loss': -0.24591916979274206, 'temp': 0.9855405303121311, 'alpha_loss': 6.826196694234659, 'alpha': 0.33130872519741283, 'critic_loss': 1764.2911234180829, 'actor_loss': 79.77150900322094, 'time_step': 0.024366544701202572, 'td_error': 143.18132336539637, 'init_value': -180.92721557617188, 'ave_value': -68.56870868327381} step=12996
2022-04-20 17:13.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420170755/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:13.44 [info     ] CQL_20220420170755: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.000306184528863918, 'time_algorithm_update': 0.02391477774458322, 'temp_loss': -0.19771148490430843, 'temp': 0.9881175906337493, 'alpha_loss': 6.642866281040928, 'alpha': 0.3211365597464188, 'critic_loss': 1818.4077494660316, 'actor_loss': 82.08705560784591, 'time_step': 0.02432015485930861, 'td_error': 147.43319534687137, 'init_value': -187.63214111328125, 'ave_value': -71.308197956053} step=13338
2022-04-20 17:13.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420170755/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:13.53 [info     ] CQL_20220420170755: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.0003055940594589501, 'time_algorithm_update': 0.025243075270401805, 'temp_loss': -0.2371310560653607, 'temp': 0.9904799951098816, 'alpha_loss': 6.665992760518838, 'alpha': 0.31110348234399715, 'critic_loss': 1874.9388798942343, 'actor_loss': 84.65639809279413, 'time_step': 0.025646395850599857, 'td_error': 157.94525271867877, 'init_value': -192.6770782470703, 'ave_value': -73.0259130145408} step=13680
2022-04-20 17:13.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420170755/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:14.03 [info     ] CQL_20220420170755: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.0003092156516181098, 'time_algorithm_update': 0.0255877741596155, 'temp_loss': -0.19168953423263035, 'temp': 0.9925585749902224, 'alpha_loss': 6.468050325125978, 'alpha': 0.3012907990412405, 'critic_loss': 1930.3756456877056, 'actor_loss': 86.89214724267435, 'time_step': 0.025994245071857297, 'td_error': 150.1881601545832, 'init_value': -197.59384155273438, 'ave_value': -75.53064491262307} step=14022
2022-04-20 17:14.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420170755/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:14.12 [info     ] CQL_20220420170755: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.00030682240313256693, 'time_algorithm_update': 0.025443941529034175, 'temp_loss': -0.19484965906788906, 'temp': 0.9948790089080208, 'alpha_loss': 6.248040484406098, 'alpha': 0.29194759425015476, 'critic_loss': 1982.2154951485975, 'actor_loss': 89.04970668770416, 'time_step': 0.025850616700468006, 'td_error': 158.70650702894832, 'init_value': -205.67562866210938, 'ave_value': -78.16438547543578} step=14364
2022-04-20 17:14.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420170755/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:14.21 [info     ] CQL_20220420170755: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.0003065895615962514, 'time_algorithm_update': 0.02533761381405836, 'temp_loss': -0.1571959671264852, 'temp': 0.9968826828295725, 'alpha_loss': 6.000042970417536, 'alpha': 0.28295113817293044, 'critic_loss': 2038.4322895250823, 'actor_loss': 91.53187967043871, 'time_step': 0.025740012787936025, 'td_error': 150.8475047384006, 'init_value': -205.99697875976562, 'ave_value': -77.97411227535557} step=14706
2022-04-20 17:14.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420170755/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:14.30 [info     ] CQL_20220420170755: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.000309887685273823, 'time_algorithm_update': 0.02548182707780983, 'temp_loss': -0.16063194468138162, 'temp': 0.9987156821621789, 'alpha_loss': 5.827154699124788, 'alpha': 0.2743284508847354, 'critic_loss': 2090.7697118569536, 'actor_loss': 93.63576793112951, 'time_step': 0.0258863760016815, 'td_error': 164.51140730474745, 'init_value': -214.5202178955078, 'ave_value': -82.39313709151638} step=15048
2022-04-20 17:14.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420170755/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:14.40 [info     ] CQL_20220420170755: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.0003055926651982536, 'time_algorithm_update': 0.025169824299059416, 'temp_loss': -0.11382409889926338, 'temp': 1.0003106330918987, 'alpha_loss': 5.657762111976132, 'alpha': 0.2659849621224822, 'critic_loss': 2147.9920079638387, 'actor_loss': 96.14101914076777, 'time_step': 0.025574074851142034, 'td_error': 173.54461557342367, 'init_value': -217.1767120361328, 'ave_value': -83.91523822406391} step=15390
2022-04-20 17:14.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420170755/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:14.49 [info     ] CQL_20220420170755: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.0003097280424240737, 'time_algorithm_update': 0.025594697361103973, 'temp_loss': -0.08754001831232805, 'temp': 1.0013951296694794, 'alpha_loss': 5.749946304929186, 'alpha': 0.2577307880447622, 'critic_loss': 2200.328416612413, 'actor_loss': 98.41423556679173, 'time_step': 0.026000210416247272, 'td_error': 165.59534420239743, 'init_value': -223.80966186523438, 'ave_value': -86.23992129392452} step=15732
2022-04-20 17:14.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420170755/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:14.58 [info     ] CQL_20220420170755: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.0003056261274549696, 'time_algorithm_update': 0.025119008376584415, 'temp_loss': -0.09527141173854906, 'temp': 1.0025457792114794, 'alpha_loss': 5.632913525341547, 'alpha': 0.24961924457062057, 'critic_loss': 2254.1263984546326, 'actor_loss': 100.70613591155113, 'time_step': 0.025519005039282013, 'td_error': 182.72502014211747, 'init_value': -230.0730438232422, 'ave_value': -88.5212770119551} step=16074
2022-04-20 17:14.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420170755/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:15.07 [info     ] CQL_20220420170755: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.00030947916688974837, 'time_algorithm_update': 0.02531225569764076, 'temp_loss': -0.06585459915474492, 'temp': 1.0035605420146072, 'alpha_loss': 5.5366350098660115, 'alpha': 0.2417200849505893, 'critic_loss': 2308.3689639442846, 'actor_loss': 102.90781311124388, 'time_step': 0.025718074095876592, 'td_error': 191.1829860472895, 'init_value': -237.9459991455078, 'ave_value': -91.63151743449606} step=16416
2022-04-20 17:15.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420170755/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:15.16 [info     ] CQL_20220420170755: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.0003117727257354915, 'time_algorithm_update': 0.025215058995966325, 'temp_loss': -0.09071625968474045, 'temp': 1.0046081996103475, 'alpha_loss': 5.227492097525569, 'alpha': 0.23416751827935725, 'critic_loss': 2359.1930177922836, 'actor_loss': 105.13506883766219, 'time_step': 0.025626779996860794, 'td_error': 177.4235356207895, 'init_value': -242.68667602539062, 'ave_value': -93.94746036975233} step=16758
2022-04-20 17:15.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420170755/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:15.26 [info     ] CQL_20220420170755: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.00030369856204205784, 'time_algorithm_update': 0.025346097890396563, 'temp_loss': -0.08884504809975624, 'temp': 1.0058717936800237, 'alpha_loss': 5.028541076253032, 'alpha': 0.2271319141513423, 'critic_loss': 2411.7394698293588, 'actor_loss': 107.37676082856474, 'time_step': 0.025743467765941955, 'td_error': 232.46777326556094, 'init_value': -244.70907592773438, 'ave_value': -93.58426025008296} step=17100
2022-04-20 17:15.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420170755/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 17:15.27 [info     ] FQE_20220420171526: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.0001528435442821089, 'time_algorithm_update': 0.0034338752907442757, 'loss': 0.007547229313830206, 'time_step': 0.0036557841013713055, 'init_value': -0.2845044732093811, 'ave_value': -0.2549710463959019, 'soft_opc': nan} step=166




2022-04-20 17:15.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171526/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:15.27 [info     ] FQE_20220420171526: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00015307765409170864, 'time_algorithm_update': 0.004450912935188018, 'loss': 0.005974417156820376, 'time_step': 0.004677541284676057, 'init_value': -0.3761371374130249, 'ave_value': -0.3126249710165031, 'soft_opc': nan} step=332




2022-04-20 17:15.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171526/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:15.28 [info     ] FQE_20220420171526: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.0001532212797417698, 'time_algorithm_update': 0.004480412207454084, 'loss': 0.005477062064638159, 'time_step': 0.004702929990837373, 'init_value': -0.4144620895385742, 'ave_value': -0.3480043880834437, 'soft_opc': nan} step=498




2022-04-20 17:15.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171526/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:15.29 [info     ] FQE_20220420171526: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00015362917658794358, 'time_algorithm_update': 0.0042000664285866615, 'loss': 0.0054440053196691245, 'time_step': 0.004422712038798505, 'init_value': -0.4701577425003052, 'ave_value': -0.38124863409881926, 'soft_opc': nan} step=664




2022-04-20 17:15.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171526/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:15.30 [info     ] FQE_20220420171526: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.0001527573688920722, 'time_algorithm_update': 0.0039928849921169055, 'loss': 0.005268143592346235, 'time_step': 0.004216751420354268, 'init_value': -0.5175232887268066, 'ave_value': -0.41767408844788334, 'soft_opc': nan} step=830




2022-04-20 17:15.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171526/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:15.31 [info     ] FQE_20220420171526: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00015626614352306687, 'time_algorithm_update': 0.004440161119024438, 'loss': 0.005058878024265799, 'time_step': 0.004667891077248447, 'init_value': -0.5301144123077393, 'ave_value': -0.42663092954872966, 'soft_opc': nan} step=996




2022-04-20 17:15.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171526/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:15.31 [info     ] FQE_20220420171526: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.0001520047704857516, 'time_algorithm_update': 0.004370746842349868, 'loss': 0.004919397817217711, 'time_step': 0.00459122801401529, 'init_value': -0.570686399936676, 'ave_value': -0.4418034530286719, 'soft_opc': nan} step=1162




2022-04-20 17:15.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171526/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:15.32 [info     ] FQE_20220420171526: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.0001554216247007071, 'time_algorithm_update': 0.0035694148167070136, 'loss': 0.004837117960160396, 'time_step': 0.0037946629236979656, 'init_value': -0.589102029800415, 'ave_value': -0.44539528374333637, 'soft_opc': nan} step=1328




2022-04-20 17:15.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171526/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:15.33 [info     ] FQE_20220420171526: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00015348411468138178, 'time_algorithm_update': 0.004483982741114605, 'loss': 0.004562627388085856, 'time_step': 0.004705535360129483, 'init_value': -0.57539963722229, 'ave_value': -0.4350097709561925, 'soft_opc': nan} step=1494




2022-04-20 17:15.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171526/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:15.34 [info     ] FQE_20220420171526: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00015441624515027884, 'time_algorithm_update': 0.004436632236802435, 'loss': 0.0047541986197143435, 'time_step': 0.004663881049098739, 'init_value': -0.5979069471359253, 'ave_value': -0.43620858368975624, 'soft_opc': nan} step=1660




2022-04-20 17:15.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171526/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:15.35 [info     ] FQE_20220420171526: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00015370098941297415, 'time_algorithm_update': 0.004265460623316018, 'loss': 0.004590175141221339, 'time_step': 0.004488088998449854, 'init_value': -0.6199972629547119, 'ave_value': -0.43970605074050456, 'soft_opc': nan} step=1826




2022-04-20 17:15.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171526/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:15.35 [info     ] FQE_20220420171526: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00015524496515113186, 'time_algorithm_update': 0.0038532305912799144, 'loss': 0.004503119358209422, 'time_step': 0.004076727901596621, 'init_value': -0.6124475002288818, 'ave_value': -0.4193508357547962, 'soft_opc': nan} step=1992




2022-04-20 17:15.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171526/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:15.36 [info     ] FQE_20220420171526: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00015374264085149192, 'time_algorithm_update': 0.004447970045618264, 'loss': 0.0044514040163920136, 'time_step': 0.004673347415694271, 'init_value': -0.6305869817733765, 'ave_value': -0.4172636706524671, 'soft_opc': nan} step=2158




2022-04-20 17:15.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171526/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:15.37 [info     ] FQE_20220420171526: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.0001553699194666851, 'time_algorithm_update': 0.004483037684337202, 'loss': 0.004500901035498828, 'time_step': 0.004707393876041274, 'init_value': -0.6585946679115295, 'ave_value': -0.41378555616682716, 'soft_opc': nan} step=2324




2022-04-20 17:15.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171526/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:15.38 [info     ] FQE_20220420171526: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00014952291925269436, 'time_algorithm_update': 0.0034658822668604105, 'loss': 0.004452637587505651, 'time_step': 0.003684435982302011, 'init_value': -0.6578193306922913, 'ave_value': -0.40509182895357543, 'soft_opc': nan} step=2490




2022-04-20 17:15.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171526/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:15.39 [info     ] FQE_20220420171526: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00016045283122235034, 'time_algorithm_update': 0.00453354938920722, 'loss': 0.004555528066177146, 'time_step': 0.004763565867780203, 'init_value': -0.6733085513114929, 'ave_value': -0.397605242882226, 'soft_opc': nan} step=2656




2022-04-20 17:15.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171526/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:15.40 [info     ] FQE_20220420171526: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00015318968209875636, 'time_algorithm_update': 0.004517088453453708, 'loss': 0.004529226559399436, 'time_step': 0.004740456500685358, 'init_value': -0.725970447063446, 'ave_value': -0.4160096051087519, 'soft_opc': nan} step=2822




2022-04-20 17:15.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171526/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:15.40 [info     ] FQE_20220420171526: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00015307047280920557, 'time_algorithm_update': 0.004320763679872076, 'loss': 0.004754208174186298, 'time_step': 0.0045429496880037236, 'init_value': -0.7483808994293213, 'ave_value': -0.41494833289033123, 'soft_opc': nan} step=2988




2022-04-20 17:15.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171526/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:15.41 [info     ] FQE_20220420171526: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00015367513679596316, 'time_algorithm_update': 0.003723200545253524, 'loss': 0.004729717664128865, 'time_step': 0.003942910447178117, 'init_value': -0.799861490726471, 'ave_value': -0.4347371824186396, 'soft_opc': nan} step=3154




2022-04-20 17:15.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171526/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:15.42 [info     ] FQE_20220420171526: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.0001557103122573301, 'time_algorithm_update': 0.004480604665825166, 'loss': 0.005594649267276323, 'time_step': 0.004708896200340915, 'init_value': -0.8920232057571411, 'ave_value': -0.5191245204248877, 'soft_opc': nan} step=3320




2022-04-20 17:15.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171526/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:15.43 [info     ] FQE_20220420171526: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00015303600265319088, 'time_algorithm_update': 0.004491343555680241, 'loss': 0.005508219287450235, 'time_step': 0.004713739257260978, 'init_value': -0.9304492473602295, 'ave_value': -0.5367989239509444, 'soft_opc': nan} step=3486




2022-04-20 17:15.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171526/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:15.43 [info     ] FQE_20220420171526: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.0001534482082688665, 'time_algorithm_update': 0.0035648015608270483, 'loss': 0.00635328568119825, 'time_step': 0.0037927871727081665, 'init_value': -1.0172957181930542, 'ave_value': -0.5933715312515159, 'soft_opc': nan} step=3652




2022-04-20 17:15.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171526/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:15.44 [info     ] FQE_20220420171526: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.0001547867993274367, 'time_algorithm_update': 0.004508108977811882, 'loss': 0.006858165743368038, 'time_step': 0.004732694970556052, 'init_value': -1.0199003219604492, 'ave_value': -0.567913173189437, 'soft_opc': nan} step=3818




2022-04-20 17:15.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171526/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:15.45 [info     ] FQE_20220420171526: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00015626039849706442, 'time_algorithm_update': 0.004466533660888672, 'loss': 0.006999740621119917, 'time_step': 0.004696137933845979, 'init_value': -1.0659172534942627, 'ave_value': -0.6160968599530557, 'soft_opc': nan} step=3984




2022-04-20 17:15.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171526/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:15.46 [info     ] FQE_20220420171526: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00015472360404140978, 'time_algorithm_update': 0.004398571439536221, 'loss': 0.007591740425300212, 'time_step': 0.00462709852011807, 'init_value': -1.09930419921875, 'ave_value': -0.6140043939295988, 'soft_opc': nan} step=4150




2022-04-20 17:15.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171526/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:15.47 [info     ] FQE_20220420171526: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00015204929443727057, 'time_algorithm_update': 0.0038165816341538027, 'loss': 0.007806642159135137, 'time_step': 0.004042366901075983, 'init_value': -1.1608513593673706, 'ave_value': -0.6649734899519129, 'soft_opc': nan} step=4316




2022-04-20 17:15.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171526/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:15.48 [info     ] FQE_20220420171526: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00015458285090434983, 'time_algorithm_update': 0.004371936062732375, 'loss': 0.008478436506835812, 'time_step': 0.00460019743586161, 'init_value': -1.181782603263855, 'ave_value': -0.6450628473725472, 'soft_opc': nan} step=4482




2022-04-20 17:15.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171526/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:15.48 [info     ] FQE_20220420171526: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00015196742781673568, 'time_algorithm_update': 0.004464669399950878, 'loss': 0.00933399956171255, 'time_step': 0.004687432783195771, 'init_value': -1.2571275234222412, 'ave_value': -0.7104149746670816, 'soft_opc': nan} step=4648




2022-04-20 17:15.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171526/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:15.49 [info     ] FQE_20220420171526: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00015401409333010754, 'time_algorithm_update': 0.00368424639644393, 'loss': 0.009986079353362277, 'time_step': 0.003910440996468785, 'init_value': -1.3619848489761353, 'ave_value': -0.7510977389710444, 'soft_opc': nan} step=4814




2022-04-20 17:15.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171526/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:15.50 [info     ] FQE_20220420171526: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00015551067260374506, 'time_algorithm_update': 0.004395016704697207, 'loss': 0.010667638679971942, 'time_step': 0.004622335893562041, 'init_value': -1.3948228359222412, 'ave_value': -0.7796482362876739, 'soft_opc': nan} step=4980




2022-04-20 17:15.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171526/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:15.51 [info     ] FQE_20220420171526: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00015585968293339373, 'time_algorithm_update': 0.004477752260414951, 'loss': 0.01136895205688113, 'time_step': 0.004707063537046134, 'init_value': -1.3854284286499023, 'ave_value': -0.7394201968637135, 'soft_opc': nan} step=5146




2022-04-20 17:15.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171526/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:15.52 [info     ] FQE_20220420171526: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00015561695558479033, 'time_algorithm_update': 0.004419637013630694, 'loss': 0.01225126784923206, 'time_step': 0.004648830517228827, 'init_value': -1.495105266571045, 'ave_value': -0.8106192545688434, 'soft_opc': nan} step=5312




2022-04-20 17:15.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171526/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:15.52 [info     ] FQE_20220420171526: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00015263528708952018, 'time_algorithm_update': 0.0036822456911385776, 'loss': 0.013056864573463169, 'time_step': 0.003908723233694054, 'init_value': -1.5969030857086182, 'ave_value': -0.8920102319126395, 'soft_opc': nan} step=5478




2022-04-20 17:15.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171526/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:15.53 [info     ] FQE_20220420171526: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00016055049666439193, 'time_algorithm_update': 0.0044961779950612995, 'loss': 0.014299823352730417, 'time_step': 0.004729974700743894, 'init_value': -1.657489538192749, 'ave_value': -0.9053309980477836, 'soft_opc': nan} step=5644




2022-04-20 17:15.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171526/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:15.54 [info     ] FQE_20220420171526: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00015884278768516448, 'time_algorithm_update': 0.004465334386710661, 'loss': 0.014939912741425929, 'time_step': 0.004696000053221921, 'init_value': -1.7808997631072998, 'ave_value': -0.997237204274515, 'soft_opc': nan} step=5810




2022-04-20 17:15.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171526/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:15.55 [info     ] FQE_20220420171526: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00015524352889463125, 'time_algorithm_update': 0.0037993810263024755, 'loss': 0.015890543638021653, 'time_step': 0.00402379035949707, 'init_value': -1.7944175004959106, 'ave_value': -0.9728301996432311, 'soft_opc': nan} step=5976




2022-04-20 17:15.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171526/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:15.56 [info     ] FQE_20220420171526: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00015291104833763767, 'time_algorithm_update': 0.0044978052736764935, 'loss': 0.016760649086615766, 'time_step': 0.0047211474682911335, 'init_value': -1.8629658222198486, 'ave_value': -0.9969902127007911, 'soft_opc': nan} step=6142




2022-04-20 17:15.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171526/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:15.57 [info     ] FQE_20220420171526: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00015550205506474138, 'time_algorithm_update': 0.0044820940638163, 'loss': 0.01796376133402136, 'time_step': 0.00470818238086011, 'init_value': -2.0210859775543213, 'ave_value': -1.1235554722951608, 'soft_opc': nan} step=6308




2022-04-20 17:15.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171526/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:15.57 [info     ] FQE_20220420171526: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00016124277229768685, 'time_algorithm_update': 0.004436581967824913, 'loss': 0.01918699243105948, 'time_step': 0.0046706774148596334, 'init_value': -2.071117877960205, 'ave_value': -1.1319523134654774, 'soft_opc': nan} step=6474




2022-04-20 17:15.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171526/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:15.58 [info     ] FQE_20220420171526: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.0001526266695505165, 'time_algorithm_update': 0.0038023756211062513, 'loss': 0.019970009320819503, 'time_step': 0.004025133259325142, 'init_value': -2.0327138900756836, 'ave_value': -1.0406975922023727, 'soft_opc': nan} step=6640




2022-04-20 17:15.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171526/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:15.59 [info     ] FQE_20220420171526: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00015509559447506824, 'time_algorithm_update': 0.0044347378144781275, 'loss': 0.021147725009639937, 'time_step': 0.004663997385875288, 'init_value': -2.1098625659942627, 'ave_value': -1.060197234626121, 'soft_opc': nan} step=6806




2022-04-20 17:15.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171526/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:16.00 [info     ] FQE_20220420171526: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00015719683773546335, 'time_algorithm_update': 0.004472300230738628, 'loss': 0.022685511688511056, 'time_step': 0.0047024387911141635, 'init_value': -2.2240350246429443, 'ave_value': -1.1136353344799161, 'soft_opc': nan} step=6972




2022-04-20 17:16.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171526/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:16.01 [info     ] FQE_20220420171526: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.0001549476600555052, 'time_algorithm_update': 0.0037922399589814335, 'loss': 0.022277818656111337, 'time_step': 0.00402228659894093, 'init_value': -2.2673873901367188, 'ave_value': -1.1142870590409584, 'soft_opc': nan} step=7138




2022-04-20 17:16.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171526/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:16.01 [info     ] FQE_20220420171526: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00015868910823959903, 'time_algorithm_update': 0.004482122788946313, 'loss': 0.024585342681293476, 'time_step': 0.004713788089981998, 'init_value': -2.4200966358184814, 'ave_value': -1.2200536622993043, 'soft_opc': nan} step=7304




2022-04-20 17:16.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171526/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:16.02 [info     ] FQE_20220420171526: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00015451534684882107, 'time_algorithm_update': 0.004488122032349368, 'loss': 0.026893408618396127, 'time_step': 0.004714279289705208, 'init_value': -2.4762439727783203, 'ave_value': -1.2297519621615474, 'soft_opc': nan} step=7470




2022-04-20 17:16.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171526/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:16.03 [info     ] FQE_20220420171526: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00016038963593632342, 'time_algorithm_update': 0.0043596129819571255, 'loss': 0.028253945607568574, 'time_step': 0.004595018294920404, 'init_value': -2.5384387969970703, 'ave_value': -1.2090256656125724, 'soft_opc': nan} step=7636




2022-04-20 17:16.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171526/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:16.04 [info     ] FQE_20220420171526: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00015196024653423265, 'time_algorithm_update': 0.003733981086547116, 'loss': 0.029445891988082194, 'time_step': 0.003957930817661515, 'init_value': -2.5822031497955322, 'ave_value': -1.2441928563773765, 'soft_opc': nan} step=7802




2022-04-20 17:16.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171526/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:16.05 [info     ] FQE_20220420171526: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00015483419579195688, 'time_algorithm_update': 0.004511459764227809, 'loss': 0.03064987310825522, 'time_step': 0.0047352931585656595, 'init_value': -2.713144302368164, 'ave_value': -1.3061419035173818, 'soft_opc': nan} step=7968




2022-04-20 17:16.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171526/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:16.06 [info     ] FQE_20220420171526: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00015456417956984187, 'time_algorithm_update': 0.004451958529920463, 'loss': 0.03187942030525441, 'time_step': 0.004677261214658439, 'init_value': -2.818601131439209, 'ave_value': -1.3551958063785154, 'soft_opc': nan} step=8134




2022-04-20 17:16.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171526/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:16.06 [info     ] FQE_20220420171526: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00015277316771357893, 'time_algorithm_update': 0.003813523844064, 'loss': 0.03304335249150284, 'time_step': 0.004038101219269167, 'init_value': -2.8021750450134277, 'ave_value': -1.307037582295435, 'soft_opc': nan} step=8300




2022-04-20 17:16.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171526/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

start
[ 0.00000000e+00  7.95731469e+08  4.27108923e-02  1.24000047e-02
  1.42999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.34732792e-01  6.00000000e-01  3.37421461e-01]
Read chunk # 39 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.38489108e-01  4.94000047e-02
 -1.56000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -7.49080829e-02  7.04145269e-02]
Read chunk # 40 out of 4999
torch.Size([44400, 6])
2022-04-20 17:16.07 [debug    ] RoundIterator is selected.
2022-04-20 17:16.07 [info     ] Directory is created at d3rlpy_logs/FQE_20220420171607
2022-04-20 17:16.07 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 17:16.07 [debug    ] Building models...
2022-04-20 17:16.07 [debug    ] Models have been built.
2022-04-20 17:16.07 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420171607/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 17:16.09 [info     ] FQE_20220420171607: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.000165035558301349, 'time_algorithm_update': 0.004539139048997746, 'loss': 0.022676890388369387, 'time_step': 0.004779104576554409, 'init_value': -1.4397823810577393, 'ave_value': -1.4442904170569, 'soft_opc': nan} step=344




2022-04-20 17:16.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171607/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:16.10 [info     ] FQE_20220420171607: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00015951866327330123, 'time_algorithm_update': 0.004089440024176309, 'loss': 0.021446487668706753, 'time_step': 0.004322724979977275, 'init_value': -2.236819267272949, 'ave_value': -2.2256884468434093, 'soft_opc': nan} step=688




2022-04-20 17:16.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171607/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:16.12 [info     ] FQE_20220420171607: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00015906469766483752, 'time_algorithm_update': 0.004129248996113622, 'loss': 0.02591716593163911, 'time_step': 0.004360386105470879, 'init_value': -3.151956796646118, 'ave_value': -3.1424650301804413, 'soft_opc': nan} step=1032




2022-04-20 17:16.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171607/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:16.14 [info     ] FQE_20220420171607: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00016478605048601017, 'time_algorithm_update': 0.004490055317102477, 'loss': 0.02953000845885727, 'time_step': 0.004727229822513669, 'init_value': -3.79213285446167, 'ave_value': -3.819410398521939, 'soft_opc': nan} step=1376




2022-04-20 17:16.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171607/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:16.15 [info     ] FQE_20220420171607: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00015966836796250453, 'time_algorithm_update': 0.004069906334544337, 'loss': 0.03805704570245431, 'time_step': 0.004303989715354387, 'init_value': -4.670290946960449, 'ave_value': -4.751651237059284, 'soft_opc': nan} step=1720




2022-04-20 17:16.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171607/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:16.17 [info     ] FQE_20220420171607: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00016177532284758812, 'time_algorithm_update': 0.0044824467148891715, 'loss': 0.045768174702886404, 'time_step': 0.004717833774034367, 'init_value': -5.004853248596191, 'ave_value': -5.169375481503504, 'soft_opc': nan} step=2064




2022-04-20 17:16.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171607/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:16.19 [info     ] FQE_20220420171607: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.0001639356446820636, 'time_algorithm_update': 0.0040732532046562015, 'loss': 0.054558168782657665, 'time_step': 0.004310957221097724, 'init_value': -5.758167266845703, 'ave_value': -6.036476271834459, 'soft_opc': nan} step=2408




2022-04-20 17:16.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171607/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:16.20 [info     ] FQE_20220420171607: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00016510486602783203, 'time_algorithm_update': 0.0043669044971466064, 'loss': 0.06434708879448386, 'time_step': 0.004604650791301284, 'init_value': -5.8911285400390625, 'ave_value': -6.3801926680513334, 'soft_opc': nan} step=2752




2022-04-20 17:16.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171607/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:16.22 [info     ] FQE_20220420171607: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00016085214393083438, 'time_algorithm_update': 0.0042873011078945426, 'loss': 0.07316091486122893, 'time_step': 0.004523399957390719, 'init_value': -6.047929763793945, 'ave_value': -6.732237241558127, 'soft_opc': nan} step=3096




2022-04-20 17:16.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171607/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:16.24 [info     ] FQE_20220420171607: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00016277543334073798, 'time_algorithm_update': 0.00398937976637552, 'loss': 0.08737473189289313, 'time_step': 0.004225474457408107, 'init_value': -6.276991844177246, 'ave_value': -7.3381830337885265, 'soft_opc': nan} step=3440




2022-04-20 17:16.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171607/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:16.25 [info     ] FQE_20220420171607: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00016204700913540152, 'time_algorithm_update': 0.004459225854208303, 'loss': 0.09641319580733516, 'time_step': 0.004694127759268117, 'init_value': -6.481459617614746, 'ave_value': -7.8099647932761425, 'soft_opc': nan} step=3784




2022-04-20 17:16.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171607/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:16.27 [info     ] FQE_20220420171607: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00016178086746570676, 'time_algorithm_update': 0.004040770752485408, 'loss': 0.10799425281042796, 'time_step': 0.00427376669506694, 'init_value': -6.518930912017822, 'ave_value': -8.281178844646291, 'soft_opc': nan} step=4128




2022-04-20 17:16.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171607/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:16.29 [info     ] FQE_20220420171607: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00016321345817211063, 'time_algorithm_update': 0.004502474568610968, 'loss': 0.11881759082681911, 'time_step': 0.004738555398098258, 'init_value': -6.549778938293457, 'ave_value': -8.598404019668296, 'soft_opc': nan} step=4472




2022-04-20 17:16.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171607/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:16.30 [info     ] FQE_20220420171607: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00015844231428102006, 'time_algorithm_update': 0.0031417095383932422, 'loss': 0.1263595025629065, 'time_step': 0.0033740671568138654, 'init_value': -6.692722320556641, 'ave_value': -9.091726721997734, 'soft_opc': nan} step=4816




2022-04-20 17:16.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171607/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:16.31 [info     ] FQE_20220420171607: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00015811310258022574, 'time_algorithm_update': 0.003456245328104773, 'loss': 0.13558668249048467, 'time_step': 0.003683097140733586, 'init_value': -6.53488826751709, 'ave_value': -9.14879717465725, 'soft_opc': nan} step=5160




2022-04-20 17:16.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171607/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:16.33 [info     ] FQE_20220420171607: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00016261741172435672, 'time_algorithm_update': 0.0035195322923882062, 'loss': 0.14192278306324815, 'time_step': 0.0037552478701569315, 'init_value': -7.076873779296875, 'ave_value': -9.801492722265355, 'soft_opc': nan} step=5504




2022-04-20 17:16.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171607/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:16.34 [info     ] FQE_20220420171607: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00016125620797623034, 'time_algorithm_update': 0.003440289996391119, 'loss': 0.14882983057759702, 'time_step': 0.00367620587348938, 'init_value': -6.978515148162842, 'ave_value': -9.903005397937319, 'soft_opc': nan} step=5848




2022-04-20 17:16.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171607/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:16.35 [info     ] FQE_20220420171607: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.0001601618389750636, 'time_algorithm_update': 0.003369115812833919, 'loss': 0.15704728375990376, 'time_step': 0.003600175297537515, 'init_value': -7.135053634643555, 'ave_value': -10.201395796152116, 'soft_opc': nan} step=6192




2022-04-20 17:16.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171607/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:16.37 [info     ] FQE_20220420171607: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.0001617358174434928, 'time_algorithm_update': 0.0034517694351285004, 'loss': 0.1602536801541181, 'time_step': 0.0036850003309028094, 'init_value': -7.351619243621826, 'ave_value': -10.557331831108879, 'soft_opc': nan} step=6536




2022-04-20 17:16.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171607/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:16.38 [info     ] FQE_20220420171607: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00016044669373090878, 'time_algorithm_update': 0.0033782907696657404, 'loss': 0.16551073068525468, 'time_step': 0.003612327714299047, 'init_value': -7.409758567810059, 'ave_value': -10.776677710777737, 'soft_opc': nan} step=6880




2022-04-20 17:16.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171607/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:16.40 [info     ] FQE_20220420171607: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00015775200932524925, 'time_algorithm_update': 0.0034410558467687564, 'loss': 0.1746991939550309, 'time_step': 0.003670220458230307, 'init_value': -7.6584014892578125, 'ave_value': -11.274277013258354, 'soft_opc': nan} step=7224




2022-04-20 17:16.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171607/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:16.41 [info     ] FQE_20220420171607: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00016088263933048693, 'time_algorithm_update': 0.0034432501293892082, 'loss': 0.18003733404695468, 'time_step': 0.0036763327066288436, 'init_value': -7.882464408874512, 'ave_value': -11.525681110545321, 'soft_opc': nan} step=7568




2022-04-20 17:16.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171607/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:16.42 [info     ] FQE_20220420171607: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00015942509784254918, 'time_algorithm_update': 0.003369697997736376, 'loss': 0.18487801976220378, 'time_step': 0.003604468218115873, 'init_value': -8.292707443237305, 'ave_value': -12.087815393601453, 'soft_opc': nan} step=7912




2022-04-20 17:16.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171607/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:16.44 [info     ] FQE_20220420171607: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00016060402227002522, 'time_algorithm_update': 0.003396455631699673, 'loss': 0.19425715726453724, 'time_step': 0.0036299214806667594, 'init_value': -8.510573387145996, 'ave_value': -12.441565527545439, 'soft_opc': nan} step=8256




2022-04-20 17:16.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171607/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:16.45 [info     ] FQE_20220420171607: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.0001628128595130388, 'time_algorithm_update': 0.003417963898459146, 'loss': 0.2009427054963876, 'time_step': 0.0036528249119603356, 'init_value': -8.663467407226562, 'ave_value': -12.7165004859368, 'soft_opc': nan} step=8600




2022-04-20 17:16.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171607/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:16.46 [info     ] FQE_20220420171607: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00016248711319856866, 'time_algorithm_update': 0.003358696782311728, 'loss': 0.21232444958133234, 'time_step': 0.0035947970179624335, 'init_value': -9.064029693603516, 'ave_value': -13.19914317525722, 'soft_opc': nan} step=8944




2022-04-20 17:16.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171607/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:16.48 [info     ] FQE_20220420171607: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.000162485727044039, 'time_algorithm_update': 0.00340872795082802, 'loss': 0.21666120577516945, 'time_step': 0.003643355397290962, 'init_value': -8.892391204833984, 'ave_value': -13.055316502949944, 'soft_opc': nan} step=9288




2022-04-20 17:16.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171607/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:16.49 [info     ] FQE_20220420171607: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00016057976456575616, 'time_algorithm_update': 0.003470049347988395, 'loss': 0.21878830304499283, 'time_step': 0.0037021463693574417, 'init_value': -9.360916137695312, 'ave_value': -13.653636470732389, 'soft_opc': nan} step=9632




2022-04-20 17:16.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171607/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:16.50 [info     ] FQE_20220420171607: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00015964757564455965, 'time_algorithm_update': 0.003381812295248342, 'loss': 0.22915060851766272, 'time_step': 0.0036130422769590866, 'init_value': -9.115196228027344, 'ave_value': -13.601784455534574, 'soft_opc': nan} step=9976




2022-04-20 17:16.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171607/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:16.52 [info     ] FQE_20220420171607: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.0001611030379007029, 'time_algorithm_update': 0.0035021076368731124, 'loss': 0.23174252823200944, 'time_step': 0.0037392537261164466, 'init_value': -9.277657508850098, 'ave_value': -13.734057860344917, 'soft_opc': nan} step=10320




2022-04-20 17:16.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171607/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:16.53 [info     ] FQE_20220420171607: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00016509793525518372, 'time_algorithm_update': 0.003411344317502754, 'loss': 0.2423813368388733, 'time_step': 0.003649510616479918, 'init_value': -9.406003952026367, 'ave_value': -13.849945754228948, 'soft_opc': nan} step=10664




2022-04-20 17:16.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171607/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:16.55 [info     ] FQE_20220420171607: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00015868281209191612, 'time_algorithm_update': 0.0034511158632677657, 'loss': 0.2487520667680994, 'time_step': 0.0036805126556130343, 'init_value': -9.389179229736328, 'ave_value': -13.861995205484533, 'soft_opc': nan} step=11008




2022-04-20 17:16.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171607/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:16.56 [info     ] FQE_20220420171607: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.0001611286817595016, 'time_algorithm_update': 0.003476289815680925, 'loss': 0.25525416480106583, 'time_step': 0.0037111868691998857, 'init_value': -9.5596342086792, 'ave_value': -13.94031866800275, 'soft_opc': nan} step=11352




2022-04-20 17:16.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171607/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:16.57 [info     ] FQE_20220420171607: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00015847488891246707, 'time_algorithm_update': 0.003456689590631529, 'loss': 0.26588355502665995, 'time_step': 0.003687576499096183, 'init_value': -9.896291732788086, 'ave_value': -14.257523220750663, 'soft_opc': nan} step=11696




2022-04-20 17:16.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171607/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:16.59 [info     ] FQE_20220420171607: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00015800221021785291, 'time_algorithm_update': 0.003425125465836636, 'loss': 0.2708397216033624, 'time_step': 0.0036545160204865213, 'init_value': -9.888957977294922, 'ave_value': -14.24960235169342, 'soft_opc': nan} step=12040




2022-04-20 17:16.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171607/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:17.00 [info     ] FQE_20220420171607: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.0001605853091838748, 'time_algorithm_update': 0.0034168418063673864, 'loss': 0.27680239410594454, 'time_step': 0.0036527847134789756, 'init_value': -9.986227989196777, 'ave_value': -14.265482341880734, 'soft_opc': nan} step=12384




2022-04-20 17:17.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171607/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:17.02 [info     ] FQE_20220420171607: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00016042590141296387, 'time_algorithm_update': 0.00342278286468151, 'loss': 0.28846153587395285, 'time_step': 0.003658036852991858, 'init_value': -10.457880973815918, 'ave_value': -14.567631450033671, 'soft_opc': nan} step=12728




2022-04-20 17:17.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171607/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:17.03 [info     ] FQE_20220420171607: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.0001600585704626039, 'time_algorithm_update': 0.003469594689302666, 'loss': 0.29909869271525463, 'time_step': 0.0037031478660051213, 'init_value': -10.396526336669922, 'ave_value': -14.635971122966692, 'soft_opc': nan} step=13072




2022-04-20 17:17.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171607/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:17.04 [info     ] FQE_20220420171607: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00016296602958856628, 'time_algorithm_update': 0.0028856057067250095, 'loss': 0.3084419271204794, 'time_step': 0.0031209387058435483, 'init_value': -10.9216947555542, 'ave_value': -15.007742931002433, 'soft_opc': nan} step=13416




2022-04-20 17:17.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171607/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:17.05 [info     ] FQE_20220420171607: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00015830508498258368, 'time_algorithm_update': 0.00308796763420105, 'loss': 0.3245436087423979, 'time_step': 0.003322168145068856, 'init_value': -11.050203323364258, 'ave_value': -15.123161418621873, 'soft_opc': nan} step=13760




2022-04-20 17:17.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171607/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:17.07 [info     ] FQE_20220420171607: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.0001630658327147018, 'time_algorithm_update': 0.003379845341970754, 'loss': 0.3383336883920873, 'time_step': 0.0036158935968265975, 'init_value': -11.436330795288086, 'ave_value': -15.322136314790528, 'soft_opc': nan} step=14104




2022-04-20 17:17.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171607/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:17.08 [info     ] FQE_20220420171607: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00015855528587518738, 'time_algorithm_update': 0.003410996392715809, 'loss': 0.34127742536961597, 'time_step': 0.003643374110377112, 'init_value': -11.55665111541748, 'ave_value': -15.397601919970265, 'soft_opc': nan} step=14448




2022-04-20 17:17.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171607/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:17.09 [info     ] FQE_20220420171607: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.0001595200494278309, 'time_algorithm_update': 0.0034319772276767465, 'loss': 0.36032178369621465, 'time_step': 0.003666806359623754, 'init_value': -11.758251190185547, 'ave_value': -15.45494914311278, 'soft_opc': nan} step=14792




2022-04-20 17:17.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171607/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:17.11 [info     ] FQE_20220420171607: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.0001592303431311319, 'time_algorithm_update': 0.0034468548242435897, 'loss': 0.37341582222549285, 'time_step': 0.003677418065625568, 'init_value': -12.169084548950195, 'ave_value': -15.59490920758999, 'soft_opc': nan} step=15136




2022-04-20 17:17.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171607/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:17.12 [info     ] FQE_20220420171607: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00016018678975659748, 'time_algorithm_update': 0.003405345040698384, 'loss': 0.39060873920801853, 'time_step': 0.0036368314609971156, 'init_value': -12.527885437011719, 'ave_value': -15.903649757158112, 'soft_opc': nan} step=15480




2022-04-20 17:17.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171607/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:17.14 [info     ] FQE_20220420171607: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.0001577048800712408, 'time_algorithm_update': 0.003411954918573069, 'loss': 0.40130089253706985, 'time_step': 0.003642113402832386, 'init_value': -12.881361961364746, 'ave_value': -16.03267813910235, 'soft_opc': nan} step=15824




2022-04-20 17:17.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171607/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:17.15 [info     ] FQE_20220420171607: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00015646288561266521, 'time_algorithm_update': 0.0033626057380853696, 'loss': 0.41978676338560966, 'time_step': 0.003589670325434485, 'init_value': -12.99079704284668, 'ave_value': -16.055959895494823, 'soft_opc': nan} step=16168




2022-04-20 17:17.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171607/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:17.16 [info     ] FQE_20220420171607: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00015843676966290142, 'time_algorithm_update': 0.003435480040173198, 'loss': 0.4368055599665832, 'time_step': 0.003665997538455697, 'init_value': -13.494976043701172, 'ave_value': -16.513643839096165, 'soft_opc': nan} step=16512




2022-04-20 17:17.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171607/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:17.18 [info     ] FQE_20220420171607: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00015700417895649755, 'time_algorithm_update': 0.003415495850319086, 'loss': 0.44161089538694037, 'time_step': 0.0036436374797377477, 'init_value': -12.849117279052734, 'ave_value': -16.100993259694125, 'soft_opc': nan} step=16856




2022-04-20 17:17.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171607/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:17.19 [info     ] FQE_20220420171607: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00015971411106198332, 'time_algorithm_update': 0.0034355098424955857, 'loss': 0.4384543190082145, 'time_step': 0.003670681354611419, 'init_value': -13.066444396972656, 'ave_value': -16.23089710692833, 'soft_opc': nan} step=17200




2022-04-20 17:17.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420171607/model_17200.pt
most optimal hyper params for cql at this point:  [0.00459858087145252, 0.00387744425285509, 1.2844250312785716e-05, 1]
search iteration:  12
using hyper params:  [0.00818937760098772, 0.0013282750444488331, 6.420935256253267e-05, 5]
2022-04-20 17:17.19 [debug    ] RoundIterator is selected.
2022-04-20 17:17.19 [info     ] Directory is created at d3rlpy_logs/CQL_20220420171719
2022-04-20 17:17.19 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 17:17.19 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 17:17.19 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220420171719/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_lea

  minimum = torch.tensor(
  maximum = torch.tensor(


Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:17.28 [info     ] CQL_20220420171719: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.0003543395047996476, 'time_algorithm_update': 0.023825202071875856, 'temp_loss': 4.530162258454931, 'temp': 0.989775394138537, 'alpha_loss': -18.69458702433179, 'alpha': 1.018134745938039, 'critic_loss': 64.81032593487299, 'actor_loss': 3.080217130090061, 'time_step': 0.024275675154568858, 'td_error': 3.788796281373766, 'init_value': -6.0945048332214355, 'ave_value': -4.811489959906216} step=342
2022-04-20 17:17.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420171719/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:17.36 [info     ] CQL_20220420171719: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.00033916158285754466, 'time_algorithm_update': 0.02274985062448602, 'temp_loss': 4.3355955254961875, 'temp': 0.9690811290727024, 'alpha_loss': -14.66777517363342, 'alpha': 1.0513691564052425, 'critic_loss': 44.10057501765022, 'actor_loss': 6.628978027934917, 'time_step': 0.02318138825265985, 'td_error': 3.0410565704385535, 'init_value': -13.340957641601562, 'ave_value': -9.127105879856122} step=684
2022-04-20 17:17.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420171719/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:17.44 [info     ] CQL_20220420171719: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.00033831526661476895, 'time_algorithm_update': 0.02301624295307182, 'temp_loss': 4.041686178647984, 'temp': 0.9490189498279527, 'alpha_loss': -10.516564893443682, 'alpha': 1.078978987813693, 'critic_loss': 39.95447061772932, 'actor_loss': 12.145000184488575, 'time_step': 0.023453749411287365, 'td_error': 4.891120922846839, 'init_value': -22.477832794189453, 'ave_value': -13.850968814961746} step=1026
2022-04-20 17:17.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420171719/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:17.53 [info     ] CQL_20220420171719: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.0003542907056752701, 'time_algorithm_update': 0.02398791578080919, 'temp_loss': 3.675574637295907, 'temp': 0.9303128194739247, 'alpha_loss': -8.348228971860562, 'alpha': 1.1036436017493756, 'critic_loss': 41.476969679893806, 'actor_loss': 17.578985242118613, 'time_step': 0.02443894656778079, 'td_error': 6.999485803140626, 'init_value': -29.790851593017578, 'ave_value': -18.184329645617254} step=1368
2022-04-20 17:17.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420171719/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:18.02 [info     ] CQL_20220420171719: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.0003556549897667957, 'time_algorithm_update': 0.02380495740656267, 'temp_loss': 3.369602984852261, 'temp': 0.9126652599426738, 'alpha_loss': -7.024084494127865, 'alpha': 1.1276107266632438, 'critic_loss': 45.39637085987113, 'actor_loss': 22.535665077075624, 'time_step': 0.024260880654318293, 'td_error': 9.692642848829403, 'init_value': -37.3847541809082, 'ave_value': -22.29262029588659} step=1710
2022-04-20 17:18.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420171719/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:18.11 [info     ] CQL_20220420171719: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.00035435902444939867, 'time_algorithm_update': 0.024036744881791677, 'temp_loss': 3.148655022096913, 'temp': 0.8956656478650389, 'alpha_loss': -6.053315737094098, 'alpha': 1.1516433082825956, 'critic_loss': 51.404971897950645, 'actor_loss': 27.171732199819463, 'time_step': 0.0244887628053364, 'td_error': 12.505182085874965, 'init_value': -44.13939666748047, 'ave_value': -26.208013960461695} step=2052
2022-04-20 17:18.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420171719/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:18.19 [info     ] CQL_20220420171719: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.0003312805242705763, 'time_algorithm_update': 0.022131074241727416, 'temp_loss': 2.9213484936987446, 'temp': 0.8792857130368551, 'alpha_loss': -5.139781853608918, 'alpha': 1.1754247922646373, 'critic_loss': 58.190845534118296, 'actor_loss': 31.429803558260378, 'time_step': 0.022552988682574, 'td_error': 15.750835380722105, 'init_value': -50.7769889831543, 'ave_value': -30.06306709308043} step=2394
2022-04-20 17:18.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420171719/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:18.27 [info     ] CQL_20220420171719: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.0003532227019817508, 'time_algorithm_update': 0.023965254164578623, 'temp_loss': 2.728054336637084, 'temp': 0.8633325255056571, 'alpha_loss': -4.323125736755237, 'alpha': 1.1987061782887107, 'critic_loss': 65.99699768824884, 'actor_loss': 35.35663481483682, 'time_step': 0.024414543519940293, 'td_error': 19.075143615451896, 'init_value': -57.07442092895508, 'ave_value': -33.90241627469659} step=2736
2022-04-20 17:18.27 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420171719/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:18.36 [info     ] CQL_20220420171719: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.00035504012079963905, 'time_algorithm_update': 0.023814061928910817, 'temp_loss': 2.5720551941129894, 'temp': 0.8478292538757213, 'alpha_loss': -3.603334266888468, 'alpha': 1.221161669457865, 'critic_loss': 74.51829754121123, 'actor_loss': 38.90694197158367, 'time_step': 0.024267571711400798, 'td_error': 22.11664474815382, 'init_value': -61.8132209777832, 'ave_value': -36.48676574213279} step=3078
2022-04-20 17:18.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420171719/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:18.45 [info     ] CQL_20220420171719: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.00036106890405130667, 'time_algorithm_update': 0.025128988494649965, 'temp_loss': 2.412383270542524, 'temp': 0.832607518859774, 'alpha_loss': -2.9784975886693474, 'alpha': 1.2430131989612914, 'critic_loss': 82.7161997410289, 'actor_loss': 42.177436560915226, 'time_step': 0.02558735936705829, 'td_error': 25.332418710573393, 'init_value': -66.61766052246094, 'ave_value': -38.757644686857994} step=3420
2022-04-20 17:18.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420171719/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:18.54 [info     ] CQL_20220420171719: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.0003653004852651853, 'time_algorithm_update': 0.025427611250626415, 'temp_loss': 2.2600751798055327, 'temp': 0.817770430916234, 'alpha_loss': -2.273210135903972, 'alpha': 1.2630534416053727, 'critic_loss': 91.17275874935396, 'actor_loss': 45.1970341665703, 'time_step': 0.02589444040555006, 'td_error': 28.418094530225133, 'init_value': -70.89498138427734, 'ave_value': -41.553775270591984} step=3762
2022-04-20 17:18.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420171719/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:19.04 [info     ] CQL_20220420171719: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.0003616726189328913, 'time_algorithm_update': 0.025250409081665395, 'temp_loss': 2.095199802814171, 'temp': 0.8033298116329818, 'alpha_loss': -1.5779910265462616, 'alpha': 1.2802669012058547, 'critic_loss': 100.1228246186909, 'actor_loss': 47.974769848829126, 'time_step': 0.025708233403880693, 'td_error': 31.60432747155687, 'init_value': -75.72108459472656, 'ave_value': -44.35365175187185} step=4104
2022-04-20 17:19.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420171719/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:19.13 [info     ] CQL_20220420171719: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.0003536946592275162, 'time_algorithm_update': 0.025321720636378952, 'temp_loss': 1.960901589421501, 'temp': 0.7892934514416589, 'alpha_loss': -0.9662468872218235, 'alpha': 1.293738023230904, 'critic_loss': 109.02770969323944, 'actor_loss': 50.42734076962834, 'time_step': 0.02577293895141423, 'td_error': 34.22960350363494, 'init_value': -79.77062225341797, 'ave_value': -46.5396137594633} step=4446
2022-04-20 17:19.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420171719/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:19.22 [info     ] CQL_20220420171719: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.00036004900235181664, 'time_algorithm_update': 0.025588123421919972, 'temp_loss': 1.807343526193273, 'temp': 0.775736490711134, 'alpha_loss': -0.29603980204236163, 'alpha': 1.3010924775698032, 'critic_loss': 118.03089552316052, 'actor_loss': 52.77034134892693, 'time_step': 0.026048392580266585, 'td_error': 36.534607670414445, 'init_value': -82.04949951171875, 'ave_value': -48.02278366931051} step=4788
2022-04-20 17:19.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420171719/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:19.31 [info     ] CQL_20220420171719: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.0003586101253130283, 'time_algorithm_update': 0.0250396122012222, 'temp_loss': 1.6968892422335886, 'temp': 0.7623769177330865, 'alpha_loss': 0.2696782317726018, 'alpha': 1.30159420437283, 'critic_loss': 126.96131258958961, 'actor_loss': 54.91102413824427, 'time_step': 0.025495969761184782, 'td_error': 39.133893128696236, 'init_value': -85.76692962646484, 'ave_value': -50.27007457484533} step=5130
2022-04-20 17:19.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420171719/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:19.41 [info     ] CQL_20220420171719: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.00036424502991793447, 'time_algorithm_update': 0.025494852261236538, 'temp_loss': 1.5539616743723552, 'temp': 0.7494466785798993, 'alpha_loss': 0.8302371776485761, 'alpha': 1.2918558375180116, 'critic_loss': 135.46515265124583, 'actor_loss': 56.823523727773924, 'time_step': 0.025958624499583104, 'td_error': 41.6281593243719, 'init_value': -88.48529815673828, 'ave_value': -51.817624789274355} step=5472
2022-04-20 17:19.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420171719/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:19.50 [info     ] CQL_20220420171719: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.00035749262536478323, 'time_algorithm_update': 0.025114976174650135, 'temp_loss': 1.4344546055933187, 'temp': 0.7369112586765959, 'alpha_loss': 1.3001137367961655, 'alpha': 1.2723994432834156, 'critic_loss': 143.9209630642718, 'actor_loss': 58.56357788621334, 'time_step': 0.025569671078732138, 'td_error': 44.31216763527321, 'init_value': -90.89739990234375, 'ave_value': -52.95910465271153} step=5814
2022-04-20 17:19.50 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420171719/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:19.59 [info     ] CQL_20220420171719: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.0003602665070204707, 'time_algorithm_update': 0.025356602947614347, 'temp_loss': 1.3427245040037479, 'temp': 0.7246427584809867, 'alpha_loss': 1.7025000557401462, 'alpha': 1.2423517777208697, 'critic_loss': 151.41777109960367, 'actor_loss': 60.16971163164105, 'time_step': 0.02581960834257784, 'td_error': 45.32522146884535, 'init_value': -92.9625015258789, 'ave_value': -54.70358164182184} step=6156
2022-04-20 17:19.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420171719/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:20.08 [info     ] CQL_20220420171719: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.00035355453602751796, 'time_algorithm_update': 0.024921011506465442, 'temp_loss': 1.234848066205867, 'temp': 0.7125512080931524, 'alpha_loss': 2.0250245195797136, 'alpha': 1.2064060140074344, 'critic_loss': 159.6622568766276, 'actor_loss': 61.591512122349435, 'time_step': 0.025372581872326588, 'td_error': 46.9813224975432, 'init_value': -95.1705322265625, 'ave_value': -55.598025555344854} step=6498
2022-04-20 17:20.08 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420171719/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:20.18 [info     ] CQL_20220420171719: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.00035948850955182346, 'time_algorithm_update': 0.02547002326675326, 'temp_loss': 1.1502034782666213, 'temp': 0.7008094214207945, 'alpha_loss': 2.3071501332482227, 'alpha': 1.1658197895825257, 'critic_loss': 166.5500849450541, 'actor_loss': 62.937652353654826, 'time_step': 0.025927748596459105, 'td_error': 48.26304193909906, 'init_value': -96.7738265991211, 'ave_value': -56.8667853256127} step=6840
2022-04-20 17:20.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420171719/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:20.27 [info     ] CQL_20220420171719: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.00035635421150609066, 'time_algorithm_update': 0.025066871392099482, 'temp_loss': 1.072395946895867, 'temp': 0.6892367238886872, 'alpha_loss': 2.5464244711126693, 'alpha': 1.1240968665881463, 'critic_loss': 173.09248798214205, 'actor_loss': 64.08996680187202, 'time_step': 0.02551771534813775, 'td_error': 49.100757675569156, 'init_value': -98.27841186523438, 'ave_value': -57.533553068225714} step=7182
2022-04-20 17:20.27 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420171719/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:20.36 [info     ] CQL_20220420171719: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.000361574323553788, 'time_algorithm_update': 0.02549269255141766, 'temp_loss': 0.9946677954921945, 'temp': 0.6778181486311015, 'alpha_loss': 2.7362769493582655, 'alpha': 1.0832544188750417, 'critic_loss': 179.8482427764357, 'actor_loss': 65.2081820727789, 'time_step': 0.025954265343515498, 'td_error': 49.99550500991494, 'init_value': -99.76205444335938, 'ave_value': -58.4434573195821} step=7524
2022-04-20 17:20.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420171719/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:20.45 [info     ] CQL_20220420171719: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.0003602323476334064, 'time_algorithm_update': 0.024974771410401105, 'temp_loss': 0.9253186203582943, 'temp': 0.6666056722925421, 'alpha_loss': 2.932857630284209, 'alpha': 1.0434832691449172, 'critic_loss': 185.96110467743455, 'actor_loss': 66.23315273530302, 'time_step': 0.02543515908090692, 'td_error': 50.78928075586579, 'init_value': -101.01808166503906, 'ave_value': -60.125845628110945} step=7866
2022-04-20 17:20.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420171719/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:20.54 [info     ] CQL_20220420171719: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.00035413454847726207, 'time_algorithm_update': 0.025385154618157282, 'temp_loss': 0.8507812948603379, 'temp': 0.6557481651417694, 'alpha_loss': 3.044850072839804, 'alpha': 1.0048092880792785, 'critic_loss': 192.52883161578262, 'actor_loss': 67.16186196622793, 'time_step': 0.025838771758720888, 'td_error': 51.26194282889634, 'init_value': -102.14643859863281, 'ave_value': -60.56207555529368} step=8208
2022-04-20 17:20.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420171719/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:21.04 [info     ] CQL_20220420171719: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.00035652500844141196, 'time_algorithm_update': 0.025139059936791136, 'temp_loss': 0.7896482084403958, 'temp': 0.6451216014156564, 'alpha_loss': 3.1580402263772416, 'alpha': 0.9683084353717447, 'critic_loss': 198.6090791490343, 'actor_loss': 68.04413236651504, 'time_step': 0.025592683351527878, 'td_error': 51.132410617848116, 'init_value': -103.03950500488281, 'ave_value': -60.92981978372008} step=8550
2022-04-20 17:21.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420171719/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:21.13 [info     ] CQL_20220420171719: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.00036170120127716954, 'time_algorithm_update': 0.02553551587444997, 'temp_loss': 0.7088489321588772, 'temp': 0.6346666016425305, 'alpha_loss': 3.2130679620636835, 'alpha': 0.9336611760987176, 'critic_loss': 204.61104391889964, 'actor_loss': 68.72963941027547, 'time_step': 0.025997182082014476, 'td_error': 51.485045857489844, 'init_value': -103.1146469116211, 'ave_value': -61.707353482008784} step=8892
2022-04-20 17:21.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420171719/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:21.22 [info     ] CQL_20220420171719: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.0003598795996771918, 'time_algorithm_update': 0.025061220453496565, 'temp_loss': 0.6393106158888131, 'temp': 0.6247758807843191, 'alpha_loss': 3.278357387111898, 'alpha': 0.9008797515205472, 'critic_loss': 210.00415030139231, 'actor_loss': 69.48711625037835, 'time_step': 0.025520853131835223, 'td_error': 52.121146779112884, 'init_value': -103.91534423828125, 'ave_value': -62.227448214999995} step=9234
2022-04-20 17:21.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420171719/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:21.31 [info     ] CQL_20220420171719: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.00035952127467819123, 'time_algorithm_update': 0.025422504073695132, 'temp_loss': 0.5769709809220325, 'temp': 0.6152959642005943, 'alpha_loss': 3.296946183108447, 'alpha': 0.8696204233936399, 'critic_loss': 215.89505575971995, 'actor_loss': 70.09395906102587, 'time_step': 0.025883536589773076, 'td_error': 52.19039961838633, 'init_value': -104.49528503417969, 'ave_value': -62.77549877642645} step=9576
2022-04-20 17:21.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420171719/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:21.40 [info     ] CQL_20220420171719: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.00035953103450306676, 'time_algorithm_update': 0.024884092180352462, 'temp_loss': 0.5201812965784505, 'temp': 0.6060794644188463, 'alpha_loss': 3.2934344450981294, 'alpha': 0.8399184109174718, 'critic_loss': 220.52318314223263, 'actor_loss': 70.70519588983547, 'time_step': 0.025342273433306063, 'td_error': 52.38468174228401, 'init_value': -104.6430892944336, 'ave_value': -64.31036159133589} step=9918
2022-04-20 17:21.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420171719/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:21.49 [info     ] CQL_20220420171719: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.0003616461279796578, 'time_algorithm_update': 0.024001703624836883, 'temp_loss': 0.48303549449171934, 'temp': 0.5969259859177104, 'alpha_loss': 3.3296524014389304, 'alpha': 0.811507301895242, 'critic_loss': 225.47339335659095, 'actor_loss': 71.16949516430236, 'time_step': 0.024463457670825268, 'td_error': 52.465467037272475, 'init_value': -105.21134185791016, 'ave_value': -64.03280294395513} step=10260
2022-04-20 17:21.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420171719/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:21.58 [info     ] CQL_20220420171719: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.00035587319156579804, 'time_algorithm_update': 0.02379326095357973, 'temp_loss': 0.4274608445468179, 'temp': 0.5881451291647571, 'alpha_loss': 3.328589497951039, 'alpha': 0.7842188479259, 'critic_loss': 228.97479270355046, 'actor_loss': 71.69590393423337, 'time_step': 0.024248082735385115, 'td_error': 52.3961038643497, 'init_value': -105.0047836303711, 'ave_value': -64.71883423170796} step=10602
2022-04-20 17:21.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420171719/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:22.07 [info     ] CQL_20220420171719: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.000352961278101157, 'time_algorithm_update': 0.023683511722854704, 'temp_loss': 0.38607692472331706, 'temp': 0.5797897145064951, 'alpha_loss': 3.338001890140667, 'alpha': 0.7578219882926048, 'critic_loss': 232.2991917035733, 'actor_loss': 72.07884292156376, 'time_step': 0.02413541322563127, 'td_error': 52.53065181455506, 'init_value': -105.34782409667969, 'ave_value': -65.30782672999007} step=10944
2022-04-20 17:22.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420171719/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:22.15 [info     ] CQL_20220420171719: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.0003588115959836726, 'time_algorithm_update': 0.023829994145889728, 'temp_loss': 0.36154967922804476, 'temp': 0.5714688471883361, 'alpha_loss': 3.2612071237717455, 'alpha': 0.7331390820051494, 'critic_loss': 234.43323088528817, 'actor_loss': 72.47489411649649, 'time_step': 0.024287953014262238, 'td_error': 52.07658504260899, 'init_value': -105.95345306396484, 'ave_value': -65.05663246637396} step=11286
2022-04-20 17:22.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420171719/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:22.24 [info     ] CQL_20220420171719: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.00035381456564741523, 'time_algorithm_update': 0.023804509151748747, 'temp_loss': 0.30895770470780587, 'temp': 0.5631875052438144, 'alpha_loss': 3.255981717541901, 'alpha': 0.7093777468329981, 'critic_loss': 236.81429214923702, 'actor_loss': 72.83243790565179, 'time_step': 0.024255130723205922, 'td_error': 52.184000176805185, 'init_value': -105.54377746582031, 'ave_value': -65.89303514674977} step=11628
2022-04-20 17:22.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420171719/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:22.33 [info     ] CQL_20220420171719: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.00035581602687724154, 'time_algorithm_update': 0.023675586745055797, 'temp_loss': 0.29175164992784897, 'temp': 0.5555004691868498, 'alpha_loss': 3.2096307417105514, 'alpha': 0.686193394207815, 'critic_loss': 238.68767975924308, 'actor_loss': 73.1318135958666, 'time_step': 0.024129231073702986, 'td_error': 52.38183502697346, 'init_value': -106.69913482666016, 'ave_value': -66.47781196318365} step=11970
2022-04-20 17:22.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420171719/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:22.41 [info     ] CQL_20220420171719: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.0003572869719120494, 'time_algorithm_update': 0.02399387624528673, 'temp_loss': 0.27856790387120683, 'temp': 0.5475981369701742, 'alpha_loss': 3.1730248670131838, 'alpha': 0.6641221616351813, 'critic_loss': 239.72255171391, 'actor_loss': 73.51875949881928, 'time_step': 0.024450541936863236, 'td_error': 51.46092860991154, 'init_value': -105.15013122558594, 'ave_value': -65.80802427106076} step=12312
2022-04-20 17:22.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420171719/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:22.50 [info     ] CQL_20220420171719: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.00035377970913000274, 'time_algorithm_update': 0.02387121546338176, 'temp_loss': 0.23793672578006286, 'temp': 0.5398355893224304, 'alpha_loss': 3.138169903155656, 'alpha': 0.6430237620894672, 'critic_loss': 241.1016710515608, 'actor_loss': 73.82290091709784, 'time_step': 0.024324433148255824, 'td_error': 51.92123051505945, 'init_value': -105.4656753540039, 'ave_value': -66.69568136462085} step=12654
2022-04-20 17:22.50 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420171719/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:22.59 [info     ] CQL_20220420171719: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.0003525799478006642, 'time_algorithm_update': 0.02410670679215102, 'temp_loss': 0.23158579434088447, 'temp': 0.5324210048767558, 'alpha_loss': 3.046230685989759, 'alpha': 0.6225234435664283, 'critic_loss': 241.50277330722028, 'actor_loss': 74.06390733328479, 'time_step': 0.02455792092440421, 'td_error': 50.963230630484595, 'init_value': -105.75865173339844, 'ave_value': -66.45500656563554} step=12996
2022-04-20 17:22.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420171719/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:23.08 [info     ] CQL_20220420171719: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.00035256182241160967, 'time_algorithm_update': 0.023919260989852816, 'temp_loss': 0.19175748176578628, 'temp': 0.5252653830929807, 'alpha_loss': 3.0170024483524567, 'alpha': 0.6028696871988954, 'critic_loss': 241.6220772726494, 'actor_loss': 74.24001824228387, 'time_step': 0.024370687049731873, 'td_error': 51.864944062896875, 'init_value': -105.373779296875, 'ave_value': -67.19866712559585} step=13338
2022-04-20 17:23.08 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420171719/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:23.16 [info     ] CQL_20220420171719: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.0003532959006683171, 'time_algorithm_update': 0.023965513497068172, 'temp_loss': 0.17591669939850507, 'temp': 0.5182152066314429, 'alpha_loss': 2.950796464033294, 'alpha': 0.5839287764147708, 'critic_loss': 241.37871935214216, 'actor_loss': 74.46004216155113, 'time_step': 0.024420343644437733, 'td_error': 50.621578801130205, 'init_value': -106.03977966308594, 'ave_value': -67.22306840162288} step=13680
2022-04-20 17:23.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420171719/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:23.25 [info     ] CQL_20220420171719: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.00035954218858863876, 'time_algorithm_update': 0.023856241103501347, 'temp_loss': 0.17034450024965475, 'temp': 0.5116737810840384, 'alpha_loss': 2.8579844361857365, 'alpha': 0.5656681989717205, 'critic_loss': 241.30444224396643, 'actor_loss': 74.63413691381265, 'time_step': 0.024310328109919677, 'td_error': 50.10678905026515, 'init_value': -103.93314361572266, 'ave_value': -66.91365562174637} step=14022
2022-04-20 17:23.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420171719/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:23.34 [info     ] CQL_20220420171719: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.0003560537483259948, 'time_algorithm_update': 0.023962359679372686, 'temp_loss': 0.1590922129373148, 'temp': 0.5049600719708448, 'alpha_loss': 2.7898432469507406, 'alpha': 0.5481661692348837, 'critic_loss': 240.1876868532415, 'actor_loss': 74.82894633109109, 'time_step': 0.024416545678300466, 'td_error': 50.21942478184233, 'init_value': -106.524169921875, 'ave_value': -69.01130310844329} step=14364
2022-04-20 17:23.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420171719/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:23.42 [info     ] CQL_20220420171719: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.0003575853437011005, 'time_algorithm_update': 0.02398377970645302, 'temp_loss': 0.14161180225554962, 'temp': 0.49834476672766503, 'alpha_loss': 2.7249582591112596, 'alpha': 0.5312988338066124, 'critic_loss': 240.91949677049067, 'actor_loss': 75.01211381655688, 'time_step': 0.0244402571728355, 'td_error': 49.27197862019385, 'init_value': -105.59928894042969, 'ave_value': -67.19457246106622} step=14706
2022-04-20 17:23.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420171719/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:23.51 [info     ] CQL_20220420171719: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.0003511396765011793, 'time_algorithm_update': 0.02393690047905459, 'temp_loss': 0.12317453473751917, 'temp': 0.4919929080887845, 'alpha_loss': 2.6435312057099147, 'alpha': 0.5149609472667962, 'critic_loss': 240.1542511878655, 'actor_loss': 75.17011737265783, 'time_step': 0.024387257140979432, 'td_error': 49.54821239986844, 'init_value': -105.13714599609375, 'ave_value': -68.02861422186633} step=15048
2022-04-20 17:23.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420171719/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:24.00 [info     ] CQL_20220420171719: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.00035390379833199126, 'time_algorithm_update': 0.023966608688845273, 'temp_loss': 0.12939395673834442, 'temp': 0.48593020325864267, 'alpha_loss': 2.54358154901287, 'alpha': 0.4992209911695001, 'critic_loss': 239.69163937038846, 'actor_loss': 75.36122295312714, 'time_step': 0.024420250926101415, 'td_error': 49.0926723834102, 'init_value': -105.0414047241211, 'ave_value': -68.05043705245531} step=15390
2022-04-20 17:24.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420171719/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:24.09 [info     ] CQL_20220420171719: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.0003534778516892104, 'time_algorithm_update': 0.023923981956571166, 'temp_loss': 0.10035232737202916, 'temp': 0.4798519985194792, 'alpha_loss': 2.5107749006901567, 'alpha': 0.4840837806811807, 'critic_loss': 238.86295050905463, 'actor_loss': 75.46982147261413, 'time_step': 0.024375139621266147, 'td_error': 48.75058216529148, 'init_value': -104.24627685546875, 'ave_value': -67.93395919752147} step=15732
2022-04-20 17:24.09 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420171719/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:24.18 [info     ] CQL_20220420171719: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.00035955961684734503, 'time_algorithm_update': 0.025235803503739208, 'temp_loss': 0.08711113642944752, 'temp': 0.4750297321387899, 'alpha_loss': 2.422426839495263, 'alpha': 0.46922731538962204, 'critic_loss': 237.9472261841534, 'actor_loss': 75.49216970923351, 'time_step': 0.025693666865254008, 'td_error': 47.753050990842766, 'init_value': -103.16314697265625, 'ave_value': -67.26138394783477} step=16074
2022-04-20 17:24.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420171719/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:24.27 [info     ] CQL_20220420171719: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.00035912321324934037, 'time_algorithm_update': 0.025510095713431376, 'temp_loss': 0.09113756040034936, 'temp': 0.46993762272143225, 'alpha_loss': 2.340084697070875, 'alpha': 0.45512846905237053, 'critic_loss': 237.3160759998344, 'actor_loss': 75.54706223248041, 'time_step': 0.025969236914874516, 'td_error': 47.688366101159914, 'init_value': -105.1919174194336, 'ave_value': -68.41351654799806} step=16416
2022-04-20 17:24.27 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420171719/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:24.36 [info     ] CQL_20220420171719: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.0003628131241826286, 'time_algorithm_update': 0.02531807115900586, 'temp_loss': 0.07934469967938916, 'temp': 0.46451098319382694, 'alpha_loss': 2.29133259134683, 'alpha': 0.44134438970284157, 'critic_loss': 236.34938303629556, 'actor_loss': 75.69183775695444, 'time_step': 0.02578237391354745, 'td_error': 46.910615005564786, 'init_value': -104.0980453491211, 'ave_value': -68.24091941774697} step=16758
2022-04-20 17:24.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420171719/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:24.46 [info     ] CQL_20220420171719: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.0003544684739140739, 'time_algorithm_update': 0.02529402434477332, 'temp_loss': 0.08054080243870529, 'temp': 0.45983910761032887, 'alpha_loss': 2.1892146056855633, 'alpha': 0.4281269249163176, 'critic_loss': 235.73487407840483, 'actor_loss': 75.71376888654386, 'time_step': 0.025747862475657322, 'td_error': 47.098880224877156, 'init_value': -104.6261215209961, 'ave_value': -69.20440825212512} step=17100
2022-04-20 17:24.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420171719/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.5

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 17:24.47 [info     ] FQE_20220420172446: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00015444066151078925, 'time_algorithm_update': 0.003569032772477851, 'loss': 0.007928850660829928, 'time_step': 0.003801136131746223, 'init_value': -0.3683009147644043, 'ave_value': -0.3534986696063398, 'soft_opc': nan} step=166




2022-04-20 17:24.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172446/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:24.47 [info     ] FQE_20220420172446: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00015853686505053416, 'time_algorithm_update': 0.0044451679091855704, 'loss': 0.006579863782747683, 'time_step': 0.004673195172505206, 'init_value': -0.5174131989479065, 'ave_value': -0.44009622270996507, 'soft_opc': nan} step=332




2022-04-20 17:24.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172446/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:24.48 [info     ] FQE_20220420172446: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.0001587824649121388, 'time_algorithm_update': 0.0044913090855242255, 'loss': 0.006075325981624065, 'time_step': 0.004724593047636101, 'init_value': -0.5907434225082397, 'ave_value': -0.4898210965969541, 'soft_opc': nan} step=498




2022-04-20 17:24.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172446/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:24.49 [info     ] FQE_20220420172446: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00015739647738904837, 'time_algorithm_update': 0.00424729485109628, 'loss': 0.006350228453548737, 'time_step': 0.004473628768001695, 'init_value': -0.6826414465904236, 'ave_value': -0.5333951960779256, 'soft_opc': nan} step=664




2022-04-20 17:24.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172446/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:24.50 [info     ] FQE_20220420172446: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00015773112515369094, 'time_algorithm_update': 0.003966625914516219, 'loss': 0.005956374017622726, 'time_step': 0.0041938761630690245, 'init_value': -0.7478091716766357, 'ave_value': -0.565081201634697, 'soft_opc': nan} step=830




2022-04-20 17:24.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172446/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:24.51 [info     ] FQE_20220420172446: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.0001648923000657415, 'time_algorithm_update': 0.004449100379484245, 'loss': 0.005755176174409508, 'time_step': 0.004689028464167951, 'init_value': -0.7818694114685059, 'ave_value': -0.5623309184100714, 'soft_opc': nan} step=996




2022-04-20 17:24.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172446/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:24.52 [info     ] FQE_20220420172446: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00016027617167277508, 'time_algorithm_update': 0.004446991954941347, 'loss': 0.005741711325820042, 'time_step': 0.004679365330431835, 'init_value': -0.8210053443908691, 'ave_value': -0.5885021066007851, 'soft_opc': nan} step=1162




2022-04-20 17:24.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172446/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:24.52 [info     ] FQE_20220420172446: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.0001551516084785921, 'time_algorithm_update': 0.0035706586148365436, 'loss': 0.005577145973276572, 'time_step': 0.0037944302501448667, 'init_value': -0.9017906188964844, 'ave_value': -0.6321514618390047, 'soft_opc': nan} step=1328




2022-04-20 17:24.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172446/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:24.53 [info     ] FQE_20220420172446: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.0001612743699407003, 'time_algorithm_update': 0.0044626686946455255, 'loss': 0.005216821302317576, 'time_step': 0.004698494830763483, 'init_value': -0.9934642910957336, 'ave_value': -0.6967768276865418, 'soft_opc': nan} step=1494




2022-04-20 17:24.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172446/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:24.54 [info     ] FQE_20220420172446: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.0001619910619345056, 'time_algorithm_update': 0.0044835633542164265, 'loss': 0.005375156331953245, 'time_step': 0.004721352852970721, 'init_value': -1.1084412336349487, 'ave_value': -0.7735420993252381, 'soft_opc': nan} step=1660




2022-04-20 17:24.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172446/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:24.55 [info     ] FQE_20220420172446: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00016142517687326455, 'time_algorithm_update': 0.004398712192673281, 'loss': 0.005209087354203978, 'time_step': 0.004633356289691235, 'init_value': -1.1974177360534668, 'ave_value': -0.8423295923919828, 'soft_opc': nan} step=1826




2022-04-20 17:24.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172446/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:24.56 [info     ] FQE_20220420172446: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00015947043177593187, 'time_algorithm_update': 0.0040907472012990925, 'loss': 0.0051267516563635275, 'time_step': 0.004323726677032839, 'init_value': -1.2822074890136719, 'ave_value': -0.9104289656756698, 'soft_opc': nan} step=1992




2022-04-20 17:24.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172446/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:24.57 [info     ] FQE_20220420172446: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00016229123954313347, 'time_algorithm_update': 0.004442719091852027, 'loss': 0.005449599363414727, 'time_step': 0.004677139132855886, 'init_value': -1.4119079113006592, 'ave_value': -0.9886863520822009, 'soft_opc': nan} step=2158




2022-04-20 17:24.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172446/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:24.57 [info     ] FQE_20220420172446: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00016046432127435524, 'time_algorithm_update': 0.004466395780264613, 'loss': 0.0055821564298190445, 'time_step': 0.004696040268403938, 'init_value': -1.5628083944320679, 'ave_value': -1.1007934399486126, 'soft_opc': nan} step=2324




2022-04-20 17:24.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172446/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:24.58 [info     ] FQE_20220420172446: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00015655339482318923, 'time_algorithm_update': 0.0036186468170349858, 'loss': 0.005446511410535818, 'time_step': 0.0038466166300945973, 'init_value': -1.5745335817337036, 'ave_value': -1.1147908306605108, 'soft_opc': nan} step=2490




2022-04-20 17:24.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172446/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:24.59 [info     ] FQE_20220420172446: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00016177562345941383, 'time_algorithm_update': 0.004453363188778062, 'loss': 0.00633355664266049, 'time_step': 0.00468902559165495, 'init_value': -1.7297818660736084, 'ave_value': -1.217823390286785, 'soft_opc': nan} step=2656




2022-04-20 17:24.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172446/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:25.00 [info     ] FQE_20220420172446: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00016173684453389732, 'time_algorithm_update': 0.004519877663577895, 'loss': 0.006538568201323365, 'time_step': 0.004750345126692071, 'init_value': -1.8650494813919067, 'ave_value': -1.3181379689021153, 'soft_opc': nan} step=2822




2022-04-20 17:25.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172446/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:25.01 [info     ] FQE_20220420172446: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00016194797423948725, 'time_algorithm_update': 0.004272887505680682, 'loss': 0.007205275038863162, 'time_step': 0.004510152770812253, 'init_value': -1.9062342643737793, 'ave_value': -1.3586902665930825, 'soft_opc': nan} step=2988




2022-04-20 17:25.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172446/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:25.01 [info     ] FQE_20220420172446: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.000157642077250653, 'time_algorithm_update': 0.004066171416317124, 'loss': 0.00806990182952933, 'time_step': 0.004294547689966409, 'init_value': -2.1067464351654053, 'ave_value': -1.5074158713892774, 'soft_opc': nan} step=3154




2022-04-20 17:25.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172446/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:25.02 [info     ] FQE_20220420172446: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00016374329486525203, 'time_algorithm_update': 0.004502523376281003, 'loss': 0.00874105010725872, 'time_step': 0.0047388593834566785, 'init_value': -2.2583658695220947, 'ave_value': -1.6053617865921141, 'soft_opc': nan} step=3320




2022-04-20 17:25.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172446/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:25.03 [info     ] FQE_20220420172446: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00016005355191518026, 'time_algorithm_update': 0.004425991012389402, 'loss': 0.009428355430466044, 'time_step': 0.004660319132977222, 'init_value': -2.3169472217559814, 'ave_value': -1.6496080606236114, 'soft_opc': nan} step=3486




2022-04-20 17:25.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172446/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:25.04 [info     ] FQE_20220420172446: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00015931244356086455, 'time_algorithm_update': 0.0035823770316250354, 'loss': 0.009682246089032122, 'time_step': 0.003811748631029244, 'init_value': -2.5617880821228027, 'ave_value': -1.812994595704315, 'soft_opc': nan} step=3652




2022-04-20 17:25.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172446/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:25.05 [info     ] FQE_20220420172446: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00015540151710969857, 'time_algorithm_update': 0.004455587950097509, 'loss': 0.011124379200024912, 'time_step': 0.004676495689943612, 'init_value': -2.7247440814971924, 'ave_value': -1.9297047614917024, 'soft_opc': nan} step=3818




2022-04-20 17:25.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172446/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:25.06 [info     ] FQE_20220420172446: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00016018281500023533, 'time_algorithm_update': 0.004506102527480528, 'loss': 0.011515719123207122, 'time_step': 0.004740673375416951, 'init_value': -2.8069870471954346, 'ave_value': -1.9431239666031288, 'soft_opc': nan} step=3984




2022-04-20 17:25.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172446/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:25.06 [info     ] FQE_20220420172446: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00015544747731771814, 'time_algorithm_update': 0.004238457564848015, 'loss': 0.012961051055841732, 'time_step': 0.004463444273155856, 'init_value': -3.0153980255126953, 'ave_value': -2.1061642839833423, 'soft_opc': nan} step=4150




2022-04-20 17:25.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172446/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:25.07 [info     ] FQE_20220420172446: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00015449954802731434, 'time_algorithm_update': 0.003967249249837485, 'loss': 0.013915747897110385, 'time_step': 0.004190658948507653, 'init_value': -3.245509147644043, 'ave_value': -2.265846464601723, 'soft_opc': nan} step=4316




2022-04-20 17:25.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172446/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:25.08 [info     ] FQE_20220420172446: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00016139932425625353, 'time_algorithm_update': 0.004478447408561247, 'loss': 0.015005114207784814, 'time_step': 0.004710780568869717, 'init_value': -3.3411865234375, 'ave_value': -2.3317784573178035, 'soft_opc': nan} step=4482




2022-04-20 17:25.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172446/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:25.09 [info     ] FQE_20220420172446: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00016078460647399166, 'time_algorithm_update': 0.004491665277136378, 'loss': 0.016386022127138068, 'time_step': 0.004728139164936112, 'init_value': -3.5135483741760254, 'ave_value': -2.429862930565267, 'soft_opc': nan} step=4648




2022-04-20 17:25.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172446/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:25.09 [info     ] FQE_20220420172446: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00015705177582890154, 'time_algorithm_update': 0.0035256319735423626, 'loss': 0.017331827810617637, 'time_step': 0.003753636256757989, 'init_value': -3.653447151184082, 'ave_value': -2.5163488430482848, 'soft_opc': nan} step=4814




2022-04-20 17:25.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172446/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:25.10 [info     ] FQE_20220420172446: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00015937707510339208, 'time_algorithm_update': 0.004471817648554423, 'loss': 0.01855337440179302, 'time_step': 0.004702793546469815, 'init_value': -3.724755048751831, 'ave_value': -2.5798523377056592, 'soft_opc': nan} step=4980




2022-04-20 17:25.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172446/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:25.11 [info     ] FQE_20220420172446: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00015994152390813254, 'time_algorithm_update': 0.004463701363069466, 'loss': 0.01967869443448642, 'time_step': 0.004696307412113051, 'init_value': -3.9266738891601562, 'ave_value': -2.6813961901643255, 'soft_opc': nan} step=5146




2022-04-20 17:25.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172446/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:25.12 [info     ] FQE_20220420172446: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00016392713569733034, 'time_algorithm_update': 0.0042104577443685875, 'loss': 0.02084541474244591, 'time_step': 0.004449363214423858, 'init_value': -4.03306245803833, 'ave_value': -2.7357459356387457, 'soft_opc': nan} step=5312




2022-04-20 17:25.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172446/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:25.13 [info     ] FQE_20220420172446: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00016305389174495834, 'time_algorithm_update': 0.0040518907179315404, 'loss': 0.022185964737065047, 'time_step': 0.00428666407803455, 'init_value': -3.9928648471832275, 'ave_value': -2.668075907713658, 'soft_opc': nan} step=5478




2022-04-20 17:25.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172446/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:25.14 [info     ] FQE_20220420172446: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00016086360058152532, 'time_algorithm_update': 0.004461445004107004, 'loss': 0.02271943014530263, 'time_step': 0.004693054291139166, 'init_value': -4.046876907348633, 'ave_value': -2.6774195395477185, 'soft_opc': nan} step=5644




2022-04-20 17:25.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172446/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:25.14 [info     ] FQE_20220420172446: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00016077886144798923, 'time_algorithm_update': 0.004488186663891895, 'loss': 0.023370591818399906, 'time_step': 0.004717920199934259, 'init_value': -4.162146091461182, 'ave_value': -2.722404784366891, 'soft_opc': nan} step=5810




2022-04-20 17:25.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172446/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:25.15 [info     ] FQE_20220420172446: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00015774979648819888, 'time_algorithm_update': 0.0035549474049763508, 'loss': 0.02500913157450394, 'time_step': 0.0037835003381752105, 'init_value': -4.26640510559082, 'ave_value': -2.791933431066908, 'soft_opc': nan} step=5976




2022-04-20 17:25.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172446/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:25.16 [info     ] FQE_20220420172446: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00016148549964629025, 'time_algorithm_update': 0.004473305610289057, 'loss': 0.026117052427066648, 'time_step': 0.004706180239298257, 'init_value': -4.444155693054199, 'ave_value': -2.9006385319673265, 'soft_opc': nan} step=6142




2022-04-20 17:25.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172446/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:25.17 [info     ] FQE_20220420172446: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00016029627926378366, 'time_algorithm_update': 0.004529851028718144, 'loss': 0.027662157018652118, 'time_step': 0.004765167293778385, 'init_value': -4.576366424560547, 'ave_value': -2.977534471304567, 'soft_opc': nan} step=6308




2022-04-20 17:25.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172446/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:25.18 [info     ] FQE_20220420172446: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00015853399253753294, 'time_algorithm_update': 0.004240027393203184, 'loss': 0.028446753916095, 'time_step': 0.004474704524120653, 'init_value': -4.677883625030518, 'ave_value': -3.023095830831979, 'soft_opc': nan} step=6474




2022-04-20 17:25.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172446/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:25.19 [info     ] FQE_20220420172446: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.0001590137022087373, 'time_algorithm_update': 0.004020672246634242, 'loss': 0.0305422341222426, 'time_step': 0.0042521695056593565, 'init_value': -4.857265472412109, 'ave_value': -3.192362590402633, 'soft_opc': nan} step=6640




2022-04-20 17:25.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172446/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:25.19 [info     ] FQE_20220420172446: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00015976342810205667, 'time_algorithm_update': 0.004456194050340767, 'loss': 0.03162356879259179, 'time_step': 0.004687421293143767, 'init_value': -4.9960832595825195, 'ave_value': -3.3134559147261284, 'soft_opc': nan} step=6806




2022-04-20 17:25.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172446/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:25.20 [info     ] FQE_20220420172446: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.0001628413257828678, 'time_algorithm_update': 0.00449225845107113, 'loss': 0.03271768174235182, 'time_step': 0.004729394453117646, 'init_value': -5.053980827331543, 'ave_value': -3.286104545963777, 'soft_opc': nan} step=6972




2022-04-20 17:25.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172446/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:25.21 [info     ] FQE_20220420172446: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00016069412231445312, 'time_algorithm_update': 0.0036125197468033754, 'loss': 0.033303032826957665, 'time_step': 0.0038493455174457595, 'init_value': -5.144338607788086, 'ave_value': -3.3220244007873108, 'soft_opc': nan} step=7138




2022-04-20 17:25.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172446/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:25.22 [info     ] FQE_20220420172446: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00016489948134824453, 'time_algorithm_update': 0.004621396581810641, 'loss': 0.03528327555311507, 'time_step': 0.004861917840429099, 'init_value': -5.387595176696777, 'ave_value': -3.5490843155094094, 'soft_opc': nan} step=7304




2022-04-20 17:25.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172446/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:25.23 [info     ] FQE_20220420172446: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.0001593426049473774, 'time_algorithm_update': 0.0044570730393191415, 'loss': 0.03638679190350876, 'time_step': 0.004688209797962603, 'init_value': -5.448028564453125, 'ave_value': -3.5706005031729604, 'soft_opc': nan} step=7470




2022-04-20 17:25.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172446/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:25.23 [info     ] FQE_20220420172446: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00016056485922939806, 'time_algorithm_update': 0.00414224417812853, 'loss': 0.037501509298559235, 'time_step': 0.004376705870570907, 'init_value': -5.543101787567139, 'ave_value': -3.6227715012189505, 'soft_opc': nan} step=7636




2022-04-20 17:25.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172446/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:25.24 [info     ] FQE_20220420172446: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00016112930803413852, 'time_algorithm_update': 0.004182888800839344, 'loss': 0.039171432118290606, 'time_step': 0.0044174682663147705, 'init_value': -5.606414794921875, 'ave_value': -3.6476177945867314, 'soft_opc': nan} step=7802




2022-04-20 17:25.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172446/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:25.25 [info     ] FQE_20220420172446: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00016554292426051865, 'time_algorithm_update': 0.004472581737012748, 'loss': 0.040251726602310745, 'time_step': 0.004710898341902767, 'init_value': -5.759537220001221, 'ave_value': -3.777240446371001, 'soft_opc': nan} step=7968




2022-04-20 17:25.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172446/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:25.26 [info     ] FQE_20220420172446: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.000159003648413233, 'time_algorithm_update': 0.0044655067374907344, 'loss': 0.04110393172034612, 'time_step': 0.004696952291281827, 'init_value': -5.791072845458984, 'ave_value': -3.7535273521064636, 'soft_opc': nan} step=8134




2022-04-20 17:25.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172446/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:25.27 [info     ] FQE_20220420172446: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00015826397631541793, 'time_algorithm_update': 0.003556926566434194, 'loss': 0.042134574246985544, 'time_step': 0.003786770694227104, 'init_value': -5.764736175537109, 'ave_value': -3.756388104149887, 'soft_opc': nan} step=8300




2022-04-20 17:25.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172446/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

2022-04-20 17:25.27 [info     ] Directory is created at d3rlpy_logs/FQE_20220420172527
2022-04-20 17:25.27 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 17:25.27 [debug    ] Building models...
2022-04-20 17:25.27 [debug    ] Models have been built.
2022-04-20 17:25.27 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420172527/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 17:25.29 [info     ] FQE_20220420172527: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00015824686649233797, 'time_algorithm_update': 0.004435456769410954, 'loss': 0.02526376058271718, 'time_step': 0.004666311796321426, 'init_value': -0.9816912412643433, 'ave_value': -1.003004658349731, 'soft_opc': nan} step=344




2022-04-20 17:25.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172527/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:25.30 [info     ] FQE_20220420172527: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00016014035357985387, 'time_algorithm_update': 0.004013022711110669, 'loss': 0.023541200299595676, 'time_step': 0.004244232593580734, 'init_value': -1.7712470293045044, 'ave_value': -1.811628777741849, 'soft_opc': nan} step=688




2022-04-20 17:25.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172527/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:25.32 [info     ] FQE_20220420172527: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00016091174857560977, 'time_algorithm_update': 0.004319096027418624, 'loss': 0.026243539735523246, 'time_step': 0.004556689844575039, 'init_value': -2.705209255218506, 'ave_value': -2.7787374658090576, 'soft_opc': nan} step=1032




2022-04-20 17:25.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172527/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:25.34 [info     ] FQE_20220420172527: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00015984579574230106, 'time_algorithm_update': 0.004330810419348783, 'loss': 0.028759848681105257, 'time_step': 0.0045652306357095405, 'init_value': -3.4339075088500977, 'ave_value': -3.50878841487674, 'soft_opc': nan} step=1376




2022-04-20 17:25.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172527/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:25.35 [info     ] FQE_20220420172527: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00016260840171991394, 'time_algorithm_update': 0.004027987635412881, 'loss': 0.03401813044775884, 'time_step': 0.004265434520189152, 'init_value': -4.353689193725586, 'ave_value': -4.475214230396726, 'soft_opc': nan} step=1720




2022-04-20 17:25.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172527/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:25.37 [info     ] FQE_20220420172527: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.0001651318960411604, 'time_algorithm_update': 0.004490485718083936, 'loss': 0.038903331646674075, 'time_step': 0.004729954309241716, 'init_value': -4.8313493728637695, 'ave_value': -4.987169824097608, 'soft_opc': nan} step=2064




2022-04-20 17:25.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172527/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:25.39 [info     ] FQE_20220420172527: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00016265483789665755, 'time_algorithm_update': 0.004052644552186478, 'loss': 0.045291763559785175, 'time_step': 0.004287142393200896, 'init_value': -5.654010772705078, 'ave_value': -5.876655254653982, 'soft_opc': nan} step=2408




2022-04-20 17:25.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172527/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:25.40 [info     ] FQE_20220420172527: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00016377069229303406, 'time_algorithm_update': 0.004409998655319214, 'loss': 0.053572957921058456, 'time_step': 0.0046473388061967005, 'init_value': -5.966207981109619, 'ave_value': -6.279475919056583, 'soft_opc': nan} step=2752




2022-04-20 17:25.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172527/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:25.42 [info     ] FQE_20220420172527: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.0001590002414792083, 'time_algorithm_update': 0.004145557797232339, 'loss': 0.06159401704492264, 'time_step': 0.004377503727757653, 'init_value': -6.279799461364746, 'ave_value': -6.733304026803455, 'soft_opc': nan} step=3096




2022-04-20 17:25.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172527/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:25.44 [info     ] FQE_20220420172527: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.0001635003921597503, 'time_algorithm_update': 0.0041456277980360876, 'loss': 0.07197532614119091, 'time_step': 0.004385801941849465, 'init_value': -6.771953582763672, 'ave_value': -7.409761569789938, 'soft_opc': nan} step=3440




2022-04-20 17:25.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172527/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:25.45 [info     ] FQE_20220420172527: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00016468486120534498, 'time_algorithm_update': 0.0045030047727185624, 'loss': 0.08173767410132081, 'time_step': 0.004743759022202603, 'init_value': -7.177839756011963, 'ave_value': -8.010470894005921, 'soft_opc': nan} step=3784




2022-04-20 17:25.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172527/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:25.47 [info     ] FQE_20220420172527: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.0001640798047531483, 'time_algorithm_update': 0.003993667142335759, 'loss': 0.0937691149626707, 'time_step': 0.004233594550642856, 'init_value': -7.558128356933594, 'ave_value': -8.661363038769714, 'soft_opc': nan} step=4128




2022-04-20 17:25.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172527/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:25.49 [info     ] FQE_20220420172527: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00016171987666640172, 'time_algorithm_update': 0.004462102124857348, 'loss': 0.1056982857489222, 'time_step': 0.004697304825450099, 'init_value': -7.779842376708984, 'ave_value': -9.22404895167093, 'soft_opc': nan} step=4472




2022-04-20 17:25.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172527/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:25.50 [info     ] FQE_20220420172527: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00016505912292835324, 'time_algorithm_update': 0.004022175489470016, 'loss': 0.12149354151374292, 'time_step': 0.0042609530825947605, 'init_value': -8.084383964538574, 'ave_value': -9.918493873847497, 'soft_opc': nan} step=4816




2022-04-20 17:25.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172527/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:25.52 [info     ] FQE_20220420172527: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00016257166862487793, 'time_algorithm_update': 0.004434952209162158, 'loss': 0.1322486071121814, 'time_step': 0.004672069189160369, 'init_value': -8.06432056427002, 'ave_value': -10.223156067326261, 'soft_opc': nan} step=5160




2022-04-20 17:25.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172527/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:25.54 [info     ] FQE_20220420172527: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00016068857769633449, 'time_algorithm_update': 0.0041552899881850845, 'loss': 0.14495232662857446, 'time_step': 0.004389512677525365, 'init_value': -8.528160095214844, 'ave_value': -11.05201258933222, 'soft_opc': nan} step=5504




2022-04-20 17:25.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172527/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:25.55 [info     ] FQE_20220420172527: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00016188136366910712, 'time_algorithm_update': 0.004046110912810924, 'loss': 0.15825616622473612, 'time_step': 0.004282143226889677, 'init_value': -8.124731063842773, 'ave_value': -10.99818174124838, 'soft_opc': nan} step=5848




2022-04-20 17:25.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172527/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:25.57 [info     ] FQE_20220420172527: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00016546665236007337, 'time_algorithm_update': 0.0044994922571404035, 'loss': 0.16615187945819004, 'time_step': 0.004740477994430897, 'init_value': -8.442155838012695, 'ave_value': -11.580225620720837, 'soft_opc': nan} step=6192




2022-04-20 17:25.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172527/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:25.59 [info     ] FQE_20220420172527: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00016083273776741915, 'time_algorithm_update': 0.004075218078701995, 'loss': 0.18153346000174278, 'time_step': 0.00430742807166521, 'init_value': -8.719820976257324, 'ave_value': -12.10562960161819, 'soft_opc': nan} step=6536




2022-04-20 17:25.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172527/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:26.00 [info     ] FQE_20220420172527: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00016384693079216537, 'time_algorithm_update': 0.004450473674508028, 'loss': 0.19294878608157295, 'time_step': 0.0046865489593772005, 'init_value': -8.897254943847656, 'ave_value': -12.440771707462835, 'soft_opc': nan} step=6880




2022-04-20 17:26.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172527/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:26.02 [info     ] FQE_20220420172527: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00016215166380239088, 'time_algorithm_update': 0.004031120344649914, 'loss': 0.20654121665608918, 'time_step': 0.004265571056410324, 'init_value': -9.272649765014648, 'ave_value': -13.054629716814102, 'soft_opc': nan} step=7224




2022-04-20 17:26.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172527/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:26.04 [info     ] FQE_20220420172527: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00016037530677263126, 'time_algorithm_update': 0.0043249754018561785, 'loss': 0.21192677297932638, 'time_step': 0.004557843125143716, 'init_value': -9.141197204589844, 'ave_value': -13.181452884269996, 'soft_opc': nan} step=7568




2022-04-20 17:26.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172527/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:26.05 [info     ] FQE_20220420172527: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00016076550927273062, 'time_algorithm_update': 0.004267449295798013, 'loss': 0.22244199700003794, 'time_step': 0.004501944364503373, 'init_value': -9.029403686523438, 'ave_value': -13.26606359861724, 'soft_opc': nan} step=7912




2022-04-20 17:26.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172527/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:26.07 [info     ] FQE_20220420172527: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.0001638822777326717, 'time_algorithm_update': 0.004085416017576705, 'loss': 0.2344779177503877, 'time_step': 0.004323790239733319, 'init_value': -9.266603469848633, 'ave_value': -13.6221051806326, 'soft_opc': nan} step=8256




2022-04-20 17:26.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172527/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:26.09 [info     ] FQE_20220420172527: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00016319751739501953, 'time_algorithm_update': 0.004491718702538069, 'loss': 0.2437661392993272, 'time_step': 0.004729572423668795, 'init_value': -9.475301742553711, 'ave_value': -13.863053480015758, 'soft_opc': nan} step=8600




2022-04-20 17:26.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172527/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:26.10 [info     ] FQE_20220420172527: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.0001630845458008522, 'time_algorithm_update': 0.00401143001955609, 'loss': 0.2542110901639992, 'time_step': 0.004245096167852712, 'init_value': -9.538484573364258, 'ave_value': -14.004799472811623, 'soft_opc': nan} step=8944




2022-04-20 17:26.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172527/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:26.12 [info     ] FQE_20220420172527: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00016245107318079747, 'time_algorithm_update': 0.004501633172811464, 'loss': 0.26465213779078495, 'time_step': 0.004738538071166637, 'init_value': -9.648378372192383, 'ave_value': -14.175345294025878, 'soft_opc': nan} step=9288




2022-04-20 17:26.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172527/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:26.14 [info     ] FQE_20220420172527: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00016222305076066837, 'time_algorithm_update': 0.004131459219511165, 'loss': 0.2763756293922576, 'time_step': 0.00436711449955785, 'init_value': -9.979202270507812, 'ave_value': -14.436201982155467, 'soft_opc': nan} step=9632




2022-04-20 17:26.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172527/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:26.15 [info     ] FQE_20220420172527: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.0001618300759515097, 'time_algorithm_update': 0.004185142905213112, 'loss': 0.2939417599213071, 'time_step': 0.004418844400450241, 'init_value': -10.340232849121094, 'ave_value': -14.811984000231877, 'soft_opc': nan} step=9976




2022-04-20 17:26.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172527/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:26.17 [info     ] FQE_20220420172527: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.0001638684161873751, 'time_algorithm_update': 0.00441401018652805, 'loss': 0.29209053661500994, 'time_step': 0.004648492086765378, 'init_value': -9.832859992980957, 'ave_value': -14.368847877857368, 'soft_opc': nan} step=10320




2022-04-20 17:26.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172527/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:26.19 [info     ] FQE_20220420172527: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.000164321688718574, 'time_algorithm_update': 0.004048762626426164, 'loss': 0.3104858784665635, 'time_step': 0.004286941400794096, 'init_value': -10.59630012512207, 'ave_value': -14.98692103462663, 'soft_opc': nan} step=10664




2022-04-20 17:26.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172527/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:26.20 [info     ] FQE_20220420172527: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00016374920689782432, 'time_algorithm_update': 0.004473839388337246, 'loss': 0.3275893250112097, 'time_step': 0.004711831724920938, 'init_value': -10.916360855102539, 'ave_value': -15.341903560860125, 'soft_opc': nan} step=11008




2022-04-20 17:26.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172527/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:26.22 [info     ] FQE_20220420172527: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00015980421110641126, 'time_algorithm_update': 0.00400392399277798, 'loss': 0.34634188160321916, 'time_step': 0.004237151423165965, 'init_value': -11.346872329711914, 'ave_value': -15.621777188537541, 'soft_opc': nan} step=11352




2022-04-20 17:26.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172527/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:26.24 [info     ] FQE_20220420172527: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00016411515169365462, 'time_algorithm_update': 0.004306898560634879, 'loss': 0.35810892077617695, 'time_step': 0.004543257314105367, 'init_value': -11.773447036743164, 'ave_value': -16.040816993000792, 'soft_opc': nan} step=11696




2022-04-20 17:26.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172527/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:26.25 [info     ] FQE_20220420172527: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00016182175902433173, 'time_algorithm_update': 0.004219417655190756, 'loss': 0.3781374285084217, 'time_step': 0.0044576816780622615, 'init_value': -12.090524673461914, 'ave_value': -16.165518538297682, 'soft_opc': nan} step=12040




2022-04-20 17:26.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172527/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:26.27 [info     ] FQE_20220420172527: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00016364524530809978, 'time_algorithm_update': 0.0040603481059850646, 'loss': 0.39132880895441874, 'time_step': 0.004298658565033314, 'init_value': -12.2645263671875, 'ave_value': -16.202857031596004, 'soft_opc': nan} step=12384




2022-04-20 17:26.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172527/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:26.29 [info     ] FQE_20220420172527: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00016321484432664027, 'time_algorithm_update': 0.004415570996528448, 'loss': 0.4031989401704523, 'time_step': 0.0046542113603547566, 'init_value': -12.44497299194336, 'ave_value': -16.307934258538214, 'soft_opc': nan} step=12728




2022-04-20 17:26.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172527/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:26.30 [info     ] FQE_20220420172527: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00016218146612477857, 'time_algorithm_update': 0.004065227369929469, 'loss': 0.41633869473670804, 'time_step': 0.004305491613787274, 'init_value': -12.883997917175293, 'ave_value': -16.58923423695315, 'soft_opc': nan} step=13072




2022-04-20 17:26.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172527/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:26.32 [info     ] FQE_20220420172527: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.0001652718976486561, 'time_algorithm_update': 0.004462691240532454, 'loss': 0.42118087659459996, 'time_step': 0.0047022714171298715, 'init_value': -12.998636245727539, 'ave_value': -16.587357911447416, 'soft_opc': nan} step=13416




2022-04-20 17:26.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172527/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:26.34 [info     ] FQE_20220420172527: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.0001631025658097378, 'time_algorithm_update': 0.003957855840061986, 'loss': 0.42302898149179335, 'time_step': 0.004197503938231357, 'init_value': -13.120193481445312, 'ave_value': -16.86764631533706, 'soft_opc': nan} step=13760




2022-04-20 17:26.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172527/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:26.35 [info     ] FQE_20220420172527: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00016154799350472383, 'time_algorithm_update': 0.004239195308019948, 'loss': 0.4366879514472689, 'time_step': 0.004476665757423223, 'init_value': -13.187554359436035, 'ave_value': -16.885271439702368, 'soft_opc': nan} step=14104




2022-04-20 17:26.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172527/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:26.37 [info     ] FQE_20220420172527: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.0001641359440115995, 'time_algorithm_update': 0.004370455131974331, 'loss': 0.45348524640119353, 'time_step': 0.00460995075314544, 'init_value': -13.612205505371094, 'ave_value': -17.103456587590244, 'soft_opc': nan} step=14448




2022-04-20 17:26.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172527/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:26.38 [info     ] FQE_20220420172527: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.0001591887584952421, 'time_algorithm_update': 0.003626259953476662, 'loss': 0.4662054947436636, 'time_step': 0.003860215114992718, 'init_value': -13.381345748901367, 'ave_value': -16.842696225966392, 'soft_opc': nan} step=14792




2022-04-20 17:26.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172527/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:26.40 [info     ] FQE_20220420172527: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00016005233276722043, 'time_algorithm_update': 0.003418817769649417, 'loss': 0.4731571861369492, 'time_step': 0.003653838883998782, 'init_value': -13.398639678955078, 'ave_value': -16.967054363301543, 'soft_opc': nan} step=15136




2022-04-20 17:26.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172527/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:26.41 [info     ] FQE_20220420172527: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00016127838644870493, 'time_algorithm_update': 0.0034544571887615114, 'loss': 0.48208579615971375, 'time_step': 0.0036859110344287962, 'init_value': -13.933154106140137, 'ave_value': -17.30618553433299, 'soft_opc': nan} step=15480




2022-04-20 17:26.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172527/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:26.42 [info     ] FQE_20220420172527: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00015869944594627204, 'time_algorithm_update': 0.003434373888858529, 'loss': 0.49718241227877347, 'time_step': 0.003666001003842021, 'init_value': -14.283270835876465, 'ave_value': -17.4223312459949, 'soft_opc': nan} step=15824




2022-04-20 17:26.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172527/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:26.44 [info     ] FQE_20220420172527: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00015644902406736861, 'time_algorithm_update': 0.0033348750236422515, 'loss': 0.5097320609790987, 'time_step': 0.003563849731933239, 'init_value': -14.309146881103516, 'ave_value': -17.41140850552018, 'soft_opc': nan} step=16168




2022-04-20 17:26.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172527/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:26.45 [info     ] FQE_20220420172527: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00016085976778074752, 'time_algorithm_update': 0.0033380306044290234, 'loss': 0.5252546344201492, 'time_step': 0.0035722338876058887, 'init_value': -14.292322158813477, 'ave_value': -17.378782795863813, 'soft_opc': nan} step=16512




2022-04-20 17:26.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172527/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:26.46 [info     ] FQE_20220420172527: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00016047580297603162, 'time_algorithm_update': 0.0034293414548385976, 'loss': 0.5326080033880507, 'time_step': 0.0036647798017013906, 'init_value': -14.411334991455078, 'ave_value': -17.61736651049541, 'soft_opc': nan} step=16856




2022-04-20 17:26.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172527/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:26.48 [info     ] FQE_20220420172527: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.0001598471818968307, 'time_algorithm_update': 0.0034138262271881104, 'loss': 0.547598504883692, 'time_step': 0.0036463633526203246, 'init_value': -14.577688217163086, 'ave_value': -17.593868806233907, 'soft_opc': nan} step=17200




2022-04-20 17:26.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420172527/model_17200.pt
search iteration:  13
using hyper params:  [0.0002354003371688903, 0.0029142466382962567, 4.616753059423936e-05, 3]
2022-04-20 17:26.48 [debug    ] RoundIterator is selected.
2022-04-20 17:26.48 [info     ] Directory is created at d3rlpy_logs/CQL_20220420172648
2022-04-20 17:26.48 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 17:26.48 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 17:26.48 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220420172648/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0002354003371688903, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:26.56 [info     ] CQL_20220420172648: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.000333890580294425, 'time_algorithm_update': 0.023550824115150852, 'temp_loss': 4.714093261294895, 'temp': 0.9922696318891313, 'alpha_loss': -15.820486146804186, 'alpha': 1.0168467334836546, 'critic_loss': 32.49837149792945, 'actor_loss': 1.1662418648834771, 'time_step': 0.023982862980045073, 'td_error': 2.6452313608654205, 'init_value': -5.1078619956970215, 'ave_value': -2.870979349446055} step=342
2022-04-20 17:26.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420172648/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:27.05 [info     ] CQL_20220420172648: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.00033460653316207795, 'time_algorithm_update': 0.023777931754352055, 'temp_loss': 4.4623254335414595, 'temp': 0.9771174778018081, 'alpha_loss': -9.238874381048637, 'alpha': 1.044803752188097, 'critic_loss': 21.084403283414783, 'actor_loss': 4.500571113929414, 'time_step': 0.024208748549745793, 'td_error': 10.165339653940288, 'init_value': -13.267962455749512, 'ave_value': -6.8415002422611995} step=684
2022-04-20 17:27.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420172648/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:27.13 [info     ] CQL_20220420172648: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.0002987782160441081, 'time_algorithm_update': 0.021786217801055018, 'temp_loss': 3.982696885951081, 'temp': 0.9627905641382898, 'alpha_loss': -5.7384136120478315, 'alpha': 1.0660993069933171, 'critic_loss': 29.423196625291254, 'actor_loss': 9.1458901146002, 'time_step': 0.022171472248278166, 'td_error': 7.724891858215249, 'init_value': -19.28968048095703, 'ave_value': -10.356208636238835} step=1026
2022-04-20 17:27.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420172648/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:27.22 [info     ] CQL_20220420172648: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.0003341094792237756, 'time_algorithm_update': 0.02400553365897017, 'temp_loss': 3.461249707735073, 'temp': 0.9497502360776154, 'alpha_loss': -3.2846126075376545, 'alpha': 1.0826424527586551, 'critic_loss': 43.7603848663687, 'actor_loss': 13.443753822505126, 'time_step': 0.024436554713555943, 'td_error': 8.544978003388028, 'init_value': -24.138694763183594, 'ave_value': -12.794763518212749} step=1368
2022-04-20 17:27.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420172648/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:27.30 [info     ] CQL_20220420172648: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.00033413109026457136, 'time_algorithm_update': 0.023574232357984396, 'temp_loss': 3.043325357966953, 'temp': 0.9375785611874876, 'alpha_loss': -1.1727683185177653, 'alpha': 1.0929272840594688, 'critic_loss': 62.0639145165159, 'actor_loss': 17.481415999563115, 'time_step': 0.02400657238318906, 'td_error': 10.674017419668251, 'init_value': -30.325729370117188, 'ave_value': -16.425164740871605} step=1710
2022-04-20 17:27.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420172648/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:27.39 [info     ] CQL_20220420172648: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.00033259600923772446, 'time_algorithm_update': 0.023880635785777666, 'temp_loss': 2.6912154544863784, 'temp': 0.9261249101649948, 'alpha_loss': 0.6332470250175721, 'alpha': 1.09425079264836, 'critic_loss': 83.19058788589567, 'actor_loss': 21.326579389516372, 'time_step': 0.02431325466312163, 'td_error': 13.307482224974532, 'init_value': -36.448387145996094, 'ave_value': -19.53685678061534} step=2052
2022-04-20 17:27.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420172648/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:27.47 [info     ] CQL_20220420172648: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.00031116413094146904, 'time_algorithm_update': 0.022225775216755114, 'temp_loss': 2.3889459897900185, 'temp': 0.9152161757848416, 'alpha_loss': 2.195996693362705, 'alpha': 1.0839698335580659, 'critic_loss': 106.51148366649248, 'actor_loss': 25.027435665242155, 'time_step': 0.022630345054537232, 'td_error': 15.297550928829184, 'init_value': -41.49518966674805, 'ave_value': -22.00587588616588} step=2394
2022-04-20 17:27.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420172648/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:27.56 [info     ] CQL_20220420172648: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.0003286153949492159, 'time_algorithm_update': 0.023865439738446508, 'temp_loss': 2.148964287941916, 'temp': 0.9047198673786475, 'alpha_loss': 3.4133015872790797, 'alpha': 1.0620226302342108, 'critic_loss': 131.05764650043687, 'actor_loss': 28.717617241262694, 'time_step': 0.02428810289728711, 'td_error': 17.72186132239814, 'init_value': -47.03693771362305, 'ave_value': -25.36203310899302} step=2736
2022-04-20 17:27.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420172648/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:28.05 [info     ] CQL_20220420172648: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.00033065450121784766, 'time_algorithm_update': 0.024010694514938265, 'temp_loss': 1.9336493611335754, 'temp': 0.8944956182736402, 'alpha_loss': 4.382745147971382, 'alpha': 1.0302148220831888, 'critic_loss': 156.77548875864485, 'actor_loss': 32.27383809898332, 'time_step': 0.02444126592044942, 'td_error': 21.238981542437557, 'init_value': -52.22600555419922, 'ave_value': -27.608569723365004} step=3078
2022-04-20 17:28.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420172648/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:28.13 [info     ] CQL_20220420172648: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.00033270824722379273, 'time_algorithm_update': 0.02384538817823979, 'temp_loss': 1.7431748729003103, 'temp': 0.8845175396629245, 'alpha_loss': 5.1955109796328856, 'alpha': 0.9932725356336225, 'critic_loss': 182.7948972913954, 'actor_loss': 35.712548601697065, 'time_step': 0.024277683587102163, 'td_error': 24.91504352838273, 'init_value': -58.544456481933594, 'ave_value': -30.274773988533127} step=3420
2022-04-20 17:28.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420172648/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:28.22 [info     ] CQL_20220420172648: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.0003318800563700715, 'time_algorithm_update': 0.023895783033984448, 'temp_loss': 1.5584308114665293, 'temp': 0.8748935520300392, 'alpha_loss': 5.8847332223814135, 'alpha': 0.9539364928390548, 'critic_loss': 210.48111733776784, 'actor_loss': 39.168136730528715, 'time_step': 0.024326635383025944, 'td_error': 27.826155891942026, 'init_value': -64.92127990722656, 'ave_value': -34.26094752135846} step=3762
2022-04-20 17:28.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420172648/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:28.31 [info     ] CQL_20220420172648: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.0003318270744636045, 'time_algorithm_update': 0.02397418301007901, 'temp_loss': 1.3887520270389424, 'temp': 0.8655147972511269, 'alpha_loss': 6.420674426513806, 'alpha': 0.9157942726946714, 'critic_loss': 239.30122263947425, 'actor_loss': 42.46922990453174, 'time_step': 0.024402351407279744, 'td_error': 31.966394664376875, 'init_value': -70.62113952636719, 'ave_value': -35.98731417284758} step=4104
2022-04-20 17:28.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420172648/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:28.39 [info     ] CQL_20220420172648: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.0003318159203780325, 'time_algorithm_update': 0.023805633623000474, 'temp_loss': 1.2580793266407928, 'temp': 0.8564314321119185, 'alpha_loss': 6.88241197839815, 'alpha': 0.8792880302981326, 'critic_loss': 270.15177426700706, 'actor_loss': 45.82947040580169, 'time_step': 0.02423821555243598, 'td_error': 36.130157050485806, 'init_value': -76.70222473144531, 'ave_value': -39.477099015969145} step=4446
2022-04-20 17:28.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420172648/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:28.48 [info     ] CQL_20220420172648: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.0003333607612297549, 'time_algorithm_update': 0.02401069660632931, 'temp_loss': 1.1094698069388407, 'temp': 0.8475335144508652, 'alpha_loss': 7.300626968779759, 'alpha': 0.8446207011652271, 'critic_loss': 303.58338544522115, 'actor_loss': 49.18702179088927, 'time_step': 0.024441749728911104, 'td_error': 39.932523792479365, 'init_value': -82.3429946899414, 'ave_value': -42.49597582335274} step=4788
2022-04-20 17:28.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420172648/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:28.57 [info     ] CQL_20220420172648: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.00033528553812127367, 'time_algorithm_update': 0.02402312365191722, 'temp_loss': 0.9637321922165608, 'temp': 0.8390070259222511, 'alpha_loss': 7.588039537619429, 'alpha': 0.8119468371770535, 'critic_loss': 338.81504937779835, 'actor_loss': 52.520696907712704, 'time_step': 0.02445629395936665, 'td_error': 43.74756448333935, 'init_value': -88.8360595703125, 'ave_value': -45.2521190529216} step=5130
2022-04-20 17:28.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420172648/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:29.06 [info     ] CQL_20220420172648: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.0003316660373531587, 'time_algorithm_update': 0.024066155416923658, 'temp_loss': 0.8579615914420775, 'temp': 0.8308184411790636, 'alpha_loss': 7.888744888249894, 'alpha': 0.7812393938588817, 'critic_loss': 375.80044359352155, 'actor_loss': 55.79157717743812, 'time_step': 0.024498075769658675, 'td_error': 46.7381722391723, 'init_value': -95.35957336425781, 'ave_value': -48.60109556466475} step=5472
2022-04-20 17:29.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420172648/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:29.15 [info     ] CQL_20220420172648: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.0003339205568993998, 'time_algorithm_update': 0.02505167185911658, 'temp_loss': 0.7407827284949565, 'temp': 0.8229790882408967, 'alpha_loss': 8.187177156147204, 'alpha': 0.7519956356600711, 'critic_loss': 414.1954456351654, 'actor_loss': 59.06300590470521, 'time_step': 0.025484778030573974, 'td_error': 56.63286977175749, 'init_value': -102.48893737792969, 'ave_value': -51.814513273715704} step=5814
2022-04-20 17:29.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420172648/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:29.24 [info     ] CQL_20220420172648: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.0003335956941571152, 'time_algorithm_update': 0.02546041750768472, 'temp_loss': 0.6340203645764387, 'temp': 0.8154364512329213, 'alpha_loss': 8.51458165798968, 'alpha': 0.7239320555277038, 'critic_loss': 453.9121423910933, 'actor_loss': 62.412980486775005, 'time_step': 0.02589508943390428, 'td_error': 61.161018249006275, 'init_value': -110.30448913574219, 'ave_value': -55.7534911552085} step=6156
2022-04-20 17:29.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420172648/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:29.33 [info     ] CQL_20220420172648: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.00033139485364768937, 'time_algorithm_update': 0.025092786515665332, 'temp_loss': 0.5062565489492395, 'temp': 0.8085198055582437, 'alpha_loss': 8.771360660854139, 'alpha': 0.6973276863321226, 'critic_loss': 495.75163857978686, 'actor_loss': 65.67790622599641, 'time_step': 0.025521959477697895, 'td_error': 66.77426279323444, 'init_value': -116.61589050292969, 'ave_value': -58.98481836293866} step=6498
2022-04-20 17:29.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420172648/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:29.43 [info     ] CQL_20220420172648: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.00034144956466050175, 'time_algorithm_update': 0.025538007418314617, 'temp_loss': 0.4195080082427863, 'temp': 0.8022840814283717, 'alpha_loss': 8.91531413061577, 'alpha': 0.6719640911671153, 'critic_loss': 537.9828459087171, 'actor_loss': 68.96353401496397, 'time_step': 0.025975194590830662, 'td_error': 75.25250264147259, 'init_value': -121.8726806640625, 'ave_value': -60.3038952806592} step=6840
2022-04-20 17:29.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420172648/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:29.52 [info     ] CQL_20220420172648: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.0003365501325729995, 'time_algorithm_update': 0.025243071784750062, 'temp_loss': 0.345260297291373, 'temp': 0.7965186168575844, 'alpha_loss': 9.069491341797232, 'alpha': 0.6477887083215323, 'critic_loss': 580.2704304477625, 'actor_loss': 72.09145734463519, 'time_step': 0.02567845757244623, 'td_error': 79.80588079385556, 'init_value': -127.26167297363281, 'ave_value': -63.08978893489719} step=7182
2022-04-20 17:29.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420172648/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:30.01 [info     ] CQL_20220420172648: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.0003304948583680984, 'time_algorithm_update': 0.025502964069968777, 'temp_loss': 0.27533798149040745, 'temp': 0.7912852980240047, 'alpha_loss': 9.089070396813733, 'alpha': 0.6248724783024593, 'critic_loss': 623.1155959458379, 'actor_loss': 75.20145223294085, 'time_step': 0.025932935246250087, 'td_error': 98.9499023824933, 'init_value': -135.95553588867188, 'ave_value': -68.06281679847487} step=7524
2022-04-20 17:30.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420172648/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:30.10 [info     ] CQL_20220420172648: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.0003342956130267584, 'time_algorithm_update': 0.025150382727907414, 'temp_loss': 0.2189530945922199, 'temp': 0.7865154919219993, 'alpha_loss': 9.148609072144268, 'alpha': 0.6030908436105963, 'critic_loss': 667.6620642232616, 'actor_loss': 78.41946973298725, 'time_step': 0.025583910663225497, 'td_error': 105.59157547511307, 'init_value': -140.5065155029297, 'ave_value': -69.78563549872588} step=7866
2022-04-20 17:30.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420172648/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:30.20 [info     ] CQL_20220420172648: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.00033386269508049504, 'time_algorithm_update': 0.025467894230669703, 'temp_loss': 0.17694625809614423, 'temp': 0.782023210623111, 'alpha_loss': 9.162511686135455, 'alpha': 0.5824023748350422, 'critic_loss': 710.7285161603961, 'actor_loss': 81.37871642977174, 'time_step': 0.025901809073331065, 'td_error': 110.50566323051663, 'init_value': -148.3322296142578, 'ave_value': -73.65885351578528} step=8208
2022-04-20 17:30.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420172648/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:30.29 [info     ] CQL_20220420172648: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.0003361102433232536, 'time_algorithm_update': 0.02503800461863914, 'temp_loss': 0.1282505941201459, 'temp': 0.778290005106675, 'alpha_loss': 9.1128719982348, 'alpha': 0.562467220233895, 'critic_loss': 750.4373001187865, 'actor_loss': 84.09626216776887, 'time_step': 0.0254711031216627, 'td_error': 116.49561334534464, 'init_value': -152.2831268310547, 'ave_value': -74.77398222697076} step=8550
2022-04-20 17:30.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420172648/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:30.38 [info     ] CQL_20220420172648: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.00033592480665061905, 'time_algorithm_update': 0.0254572183765166, 'temp_loss': 0.07620232149806114, 'temp': 0.7753528804458373, 'alpha_loss': 9.033618839163529, 'alpha': 0.5436269022219362, 'critic_loss': 789.6634173476905, 'actor_loss': 86.82499132658306, 'time_step': 0.02589034894753618, 'td_error': 121.28855971643132, 'init_value': -156.67538452148438, 'ave_value': -76.28301567652875} step=8892
2022-04-20 17:30.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420172648/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:30.47 [info     ] CQL_20220420172648: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.00033775477381477577, 'time_algorithm_update': 0.025254540973239474, 'temp_loss': 0.05879874924491895, 'temp': 0.773408536848269, 'alpha_loss': 8.834554237231874, 'alpha': 0.5255768731323599, 'critic_loss': 828.5368734437819, 'actor_loss': 89.61731684277629, 'time_step': 0.02569129453067891, 'td_error': 119.29223143577951, 'init_value': -164.31674194335938, 'ave_value': -80.53209816664055} step=9234
2022-04-20 17:30.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420172648/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:30.57 [info     ] CQL_20220420172648: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.00033352319260089716, 'time_algorithm_update': 0.025644344195984957, 'temp_loss': 0.038268901385933335, 'temp': 0.7715209982199975, 'alpha_loss': 8.564480598907025, 'alpha': 0.5085581422549242, 'critic_loss': 866.0978015208104, 'actor_loss': 92.03085351687426, 'time_step': 0.026075821870948836, 'td_error': 135.63355702825046, 'init_value': -171.10157775878906, 'ave_value': -83.29614579387882} step=9576
2022-04-20 17:30.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420172648/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:31.06 [info     ] CQL_20220420172648: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.0003371224765889129, 'time_algorithm_update': 0.025432859247888042, 'temp_loss': 0.021834244494114006, 'temp': 0.7709118730840627, 'alpha_loss': 8.565008234559444, 'alpha': 0.49212274425908137, 'critic_loss': 898.1469257198579, 'actor_loss': 94.21528737029136, 'time_step': 0.025867510957327502, 'td_error': 138.05178931106164, 'init_value': -174.29745483398438, 'ave_value': -84.6776577708152} step=9918
2022-04-20 17:31.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420172648/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:31.15 [info     ] CQL_20220420172648: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.00033558112138893173, 'time_algorithm_update': 0.02531259798864175, 'temp_loss': -0.010007729124800678, 'temp': 0.7705778563231752, 'alpha_loss': 8.445712104875442, 'alpha': 0.4762447490329631, 'critic_loss': 929.2600095871596, 'actor_loss': 96.40122287594087, 'time_step': 0.025746116861265305, 'td_error': 163.29093119847667, 'init_value': -176.18289184570312, 'ave_value': -84.66810761497767} step=10260
2022-04-20 17:31.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420172648/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:31.24 [info     ] CQL_20220420172648: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.00033752681219089797, 'time_algorithm_update': 0.025495925144842495, 'temp_loss': -0.02857190968022186, 'temp': 0.7707939215919428, 'alpha_loss': 8.234174000589471, 'alpha': 0.4610653342733606, 'critic_loss': 959.9022247135988, 'actor_loss': 98.50978182073226, 'time_step': 0.025931553533899854, 'td_error': 153.79057716351912, 'init_value': -182.30313110351562, 'ave_value': -87.09997200507703} step=10602
2022-04-20 17:31.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420172648/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:31.34 [info     ] CQL_20220420172648: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.00033102258604172376, 'time_algorithm_update': 0.025387547866642823, 'temp_loss': -0.01964502818664612, 'temp': 0.7724203219887806, 'alpha_loss': 8.024288063160856, 'alpha': 0.4464022427448752, 'critic_loss': 990.1042998018321, 'actor_loss': 100.65915717855532, 'time_step': 0.025815473662482366, 'td_error': 179.50936690232808, 'init_value': -187.92703247070312, 'ave_value': -90.66738529919505} step=10944
2022-04-20 17:31.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420172648/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:31.43 [info     ] CQL_20220420172648: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.00033775965372721356, 'time_algorithm_update': 0.025548382809287624, 'temp_loss': -0.05093113891235743, 'temp': 0.7737898516376116, 'alpha_loss': 7.872527549141331, 'alpha': 0.43235094644869976, 'critic_loss': 1018.1384084601151, 'actor_loss': 102.54935671711526, 'time_step': 0.02598357061196489, 'td_error': 181.68227297899116, 'init_value': -190.51663208007812, 'ave_value': -92.18520359347264} step=11286
2022-04-20 17:31.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420172648/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:31.52 [info     ] CQL_20220420172648: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.000338591330232676, 'time_algorithm_update': 0.025310113416080585, 'temp_loss': -0.04533992852057107, 'temp': 0.7765691794498623, 'alpha_loss': 7.712627879360266, 'alpha': 0.41868705864538225, 'critic_loss': 1045.0337253146702, 'actor_loss': 104.3336105123598, 'time_step': 0.025746197031255354, 'td_error': 182.41492137156942, 'init_value': -195.26223754882812, 'ave_value': -93.82747922421844} step=11628
2022-04-20 17:31.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420172648/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:32.02 [info     ] CQL_20220420172648: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.0003336688928436815, 'time_algorithm_update': 0.02566944228278266, 'temp_loss': -0.03864590965254962, 'temp': 0.7788212933735541, 'alpha_loss': 7.413036437062492, 'alpha': 0.40566287725641015, 'critic_loss': 1069.513416134126, 'actor_loss': 105.91082578514055, 'time_step': 0.02610247316416244, 'td_error': 180.7235979770053, 'init_value': -198.80789184570312, 'ave_value': -95.83232873942939} step=11970
2022-04-20 17:32.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420172648/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:32.10 [info     ] CQL_20220420172648: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.00033453403160585996, 'time_algorithm_update': 0.024245198010004056, 'temp_loss': -0.0655454204728206, 'temp': 0.7810903271736457, 'alpha_loss': 7.198279483973631, 'alpha': 0.39320265950515254, 'critic_loss': 1094.5390889128746, 'actor_loss': 107.67477680228608, 'time_step': 0.024678419905099256, 'td_error': 193.7493809576961, 'init_value': -202.47207641601562, 'ave_value': -97.03405766891736} step=12312
2022-04-20 17:32.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420172648/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:32.19 [info     ] CQL_20220420172648: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.00033545982070833617, 'time_algorithm_update': 0.02391446194453546, 'temp_loss': -0.060132724924655684, 'temp': 0.7844580641964025, 'alpha_loss': 6.984678370213648, 'alpha': 0.38114811567186613, 'critic_loss': 1114.3744965491935, 'actor_loss': 108.98910252532066, 'time_step': 0.024349745254070437, 'td_error': 201.24975267099293, 'init_value': -205.5703125, 'ave_value': -99.02510823006028} step=12654
2022-04-20 17:32.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420172648/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:32.28 [info     ] CQL_20220420172648: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.0003359987024675336, 'time_algorithm_update': 0.023865184588739048, 'temp_loss': -0.041183407137943814, 'temp': 0.7878409285991512, 'alpha_loss': 6.822419391040913, 'alpha': 0.36944633068745597, 'critic_loss': 1135.0841200086807, 'actor_loss': 110.44377854553579, 'time_step': 0.02429568976686712, 'td_error': 203.15020741887852, 'init_value': -208.94076538085938, 'ave_value': -100.19562226250768} step=12996
2022-04-20 17:32.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420172648/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:32.37 [info     ] CQL_20220420172648: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.0003333314817551284, 'time_algorithm_update': 0.02396717475868805, 'temp_loss': -0.07655239028159028, 'temp': 0.790970133759125, 'alpha_loss': 6.5786487414822945, 'alpha': 0.35816664096207645, 'critic_loss': 1152.681631523266, 'actor_loss': 111.57251264337907, 'time_step': 0.024400579301934493, 'td_error': 217.02628079104286, 'init_value': -213.8144989013672, 'ave_value': -102.04623782104804} step=13338
2022-04-20 17:32.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420172648/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:32.45 [info     ] CQL_20220420172648: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.00033303798987851504, 'time_algorithm_update': 0.024028610764888294, 'temp_loss': -0.034064105227153904, 'temp': 0.7940822675911307, 'alpha_loss': 6.288296025398879, 'alpha': 0.3473858396735108, 'critic_loss': 1169.687684711657, 'actor_loss': 112.83907469810798, 'time_step': 0.024459833290144714, 'td_error': 215.0014279949438, 'init_value': -215.9392547607422, 'ave_value': -102.72065352627017} step=13680
2022-04-20 17:32.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420172648/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:32.54 [info     ] CQL_20220420172648: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.00033307145213523105, 'time_algorithm_update': 0.023835338347139415, 'temp_loss': -0.06288927102363423, 'temp': 0.7962405132619959, 'alpha_loss': 6.150529347665128, 'alpha': 0.3369268017728426, 'critic_loss': 1187.186441700361, 'actor_loss': 113.86027497854846, 'time_step': 0.024266970785040604, 'td_error': 198.6741724023785, 'init_value': -217.0885009765625, 'ave_value': -102.91144210866443} step=14022
2022-04-20 17:32.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420172648/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:33.03 [info     ] CQL_20220420172648: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.000339533153333162, 'time_algorithm_update': 0.0237475954301176, 'temp_loss': -0.01912517590197729, 'temp': 0.7985406010820154, 'alpha_loss': 5.947966446653444, 'alpha': 0.3267504445642059, 'critic_loss': 1201.1041700575088, 'actor_loss': 114.79848031830369, 'time_step': 0.024182561545344126, 'td_error': 185.1434750382246, 'init_value': -217.7853546142578, 'ave_value': -103.67350702957259} step=14364
2022-04-20 17:33.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420172648/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:33.11 [info     ] CQL_20220420172648: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.00033557484721579746, 'time_algorithm_update': 0.024013307359483507, 'temp_loss': -0.01500061776336522, 'temp': 0.7994503399782014, 'alpha_loss': 5.5740579673421315, 'alpha': 0.31705241384561994, 'critic_loss': 1216.4850861844961, 'actor_loss': 115.73719803213376, 'time_step': 0.024448586486236393, 'td_error': 190.4466815566102, 'init_value': -221.3545379638672, 'ave_value': -105.3795327458336} step=14706
2022-04-20 17:33.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420172648/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:33.20 [info     ] CQL_20220420172648: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.0003344636214406867, 'time_algorithm_update': 0.024051188725477072, 'temp_loss': 0.006245914971924316, 'temp': 0.7996094490003864, 'alpha_loss': 5.286627123230382, 'alpha': 0.307838766721257, 'critic_loss': 1229.7186798631099, 'actor_loss': 116.49683525130065, 'time_step': 0.024483271509583234, 'td_error': 215.14230798552518, 'init_value': -224.33560180664062, 'ave_value': -107.88076547984201} step=15048
2022-04-20 17:33.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420172648/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:33.29 [info     ] CQL_20220420172648: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.00033088594849346677, 'time_algorithm_update': 0.0240218750914635, 'temp_loss': -0.0005982034888706709, 'temp': 0.7998578316635556, 'alpha_loss': 5.103543976594133, 'alpha': 0.2990050092775222, 'critic_loss': 1237.6219555592677, 'actor_loss': 117.05702280858804, 'time_step': 0.024451139377571686, 'td_error': 180.4964212136246, 'init_value': -227.04541015625, 'ave_value': -107.29914834071789} step=15390
2022-04-20 17:33.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420172648/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:33.38 [info     ] CQL_20220420172648: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.0003314067048636096, 'time_algorithm_update': 0.02403373537007828, 'temp_loss': 0.01870548210449909, 'temp': 0.7985574320742959, 'alpha_loss': 4.802237799293117, 'alpha': 0.2903497617321405, 'critic_loss': 1246.4667517232615, 'actor_loss': 117.51240822864555, 'time_step': 0.02446045652467605, 'td_error': 190.4387742884624, 'init_value': -226.2355499267578, 'ave_value': -106.40117185705752} step=15732
2022-04-20 17:33.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420172648/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:33.46 [info     ] CQL_20220420172648: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.00033302125875015707, 'time_algorithm_update': 0.0240127712662457, 'temp_loss': 0.023493579806683706, 'temp': 0.7974057603649228, 'alpha_loss': 4.601255308117783, 'alpha': 0.28208243123620574, 'critic_loss': 1251.413981298257, 'actor_loss': 117.9086402313054, 'time_step': 0.02444376443561755, 'td_error': 171.9582011352656, 'init_value': -226.43923950195312, 'ave_value': -106.73125886470079} step=16074
2022-04-20 17:33.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420172648/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:33.55 [info     ] CQL_20220420172648: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.0003346169901173017, 'time_algorithm_update': 0.023872648066247417, 'temp_loss': 0.021237590600741884, 'temp': 0.7967285886842604, 'alpha_loss': 4.415604717550222, 'alpha': 0.27396143685307417, 'critic_loss': 1257.1963765105309, 'actor_loss': 118.23662752296492, 'time_step': 0.024304882127639146, 'td_error': 185.0530843481586, 'init_value': -228.84716796875, 'ave_value': -108.2095637456878} step=16416
2022-04-20 17:33.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420172648/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:34.04 [info     ] CQL_20220420172648: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.00033089570831834223, 'time_algorithm_update': 0.023997762049847875, 'temp_loss': 0.024464343535599477, 'temp': 0.7958393272949241, 'alpha_loss': 4.263863160596256, 'alpha': 0.2660403602827362, 'critic_loss': 1261.6594502409996, 'actor_loss': 118.5571601376896, 'time_step': 0.024428537017420718, 'td_error': 162.24941180719367, 'init_value': -227.38925170898438, 'ave_value': -107.68699658404196} step=16758
2022-04-20 17:34.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420172648/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:34.13 [info     ] CQL_20220420172648: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.00033595338899489733, 'time_algorithm_update': 0.024023564238297313, 'temp_loss': 0.023014307283518606, 'temp': 0.7938944882119608, 'alpha_loss': 4.069331319011443, 'alpha': 0.2584232328928005, 'critic_loss': 1265.4242887441178, 'actor_loss': 118.74924007214997, 'time_step': 0.024459009282073084, 'td_error': 171.33658402126156, 'init_value': -230.019775390625, 'ave_value': -109.23797959193587} step=17100
2022-04-20 17:34.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420172648/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4

Epoch 1/50:   0%|          | 0/177 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 17:34.13 [info     ] FQE_20220420173413: epoch=1 step=177 epoch=1 metrics={'time_sample_batch': 0.0001475083625922769, 'time_algorithm_update': 0.003416792821075957, 'loss': 0.007827567578540683, 'time_step': 0.003635335103266657, 'init_value': 0.007312271744012833, 'ave_value': 0.08820559220282255, 'soft_opc': nan} step=177




2022-04-20 17:34.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173413/model_177.pt


Epoch 2/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:34.14 [info     ] FQE_20220420173413: epoch=2 step=354 epoch=2 metrics={'time_sample_batch': 0.00015240873994126831, 'time_algorithm_update': 0.003507005292817024, 'loss': 0.006023137959109693, 'time_step': 0.0037261968278615486, 'init_value': -0.16640478372573853, 'ave_value': 0.00814169938857849, 'soft_opc': nan} step=354




2022-04-20 17:34.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173413/model_354.pt


Epoch 3/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:34.15 [info     ] FQE_20220420173413: epoch=3 step=531 epoch=3 metrics={'time_sample_batch': 0.00015240604594602422, 'time_algorithm_update': 0.003335840957986433, 'loss': 0.005380604891001252, 'time_step': 0.0035607343339650642, 'init_value': -0.30274486541748047, 'ave_value': -0.051277047302853596, 'soft_opc': nan} step=531




2022-04-20 17:34.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173413/model_531.pt


Epoch 4/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:34.16 [info     ] FQE_20220420173413: epoch=4 step=708 epoch=4 metrics={'time_sample_batch': 0.0001555351214220295, 'time_algorithm_update': 0.0035765992719574836, 'loss': 0.005251870388007265, 'time_step': 0.0038001779782570015, 'init_value': -0.3833412230014801, 'ave_value': -0.08899924970615107, 'soft_opc': nan} step=708




2022-04-20 17:34.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173413/model_708.pt


Epoch 5/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:34.16 [info     ] FQE_20220420173413: epoch=5 step=885 epoch=5 metrics={'time_sample_batch': 0.00015475251580362266, 'time_algorithm_update': 0.0033934614752645547, 'loss': 0.00508089936575142, 'time_step': 0.0036203511017190533, 'init_value': -0.4710801839828491, 'ave_value': -0.11401832110385547, 'soft_opc': nan} step=885




2022-04-20 17:34.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173413/model_885.pt


Epoch 6/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:34.17 [info     ] FQE_20220420173413: epoch=6 step=1062 epoch=6 metrics={'time_sample_batch': 0.00015453834318171787, 'time_algorithm_update': 0.0035481897451109805, 'loss': 0.005021280839271044, 'time_step': 0.003771701101529396, 'init_value': -0.5651057958602905, 'ave_value': -0.15056323115196493, 'soft_opc': nan} step=1062




2022-04-20 17:34.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173413/model_1062.pt


Epoch 7/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:34.18 [info     ] FQE_20220420173413: epoch=7 step=1239 epoch=7 metrics={'time_sample_batch': 0.0001515884183894443, 'time_algorithm_update': 0.0034093398832331944, 'loss': 0.004860210399495932, 'time_step': 0.003629631915334928, 'init_value': -0.6556780338287354, 'ave_value': -0.18826493456534615, 'soft_opc': nan} step=1239




2022-04-20 17:34.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173413/model_1239.pt


Epoch 8/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:34.18 [info     ] FQE_20220420173413: epoch=8 step=1416 epoch=8 metrics={'time_sample_batch': 0.00015323579648120254, 'time_algorithm_update': 0.003523620508484921, 'loss': 0.004656322927892966, 'time_step': 0.003749683078399486, 'init_value': -0.7217424511909485, 'ave_value': -0.20134479703504715, 'soft_opc': nan} step=1416




2022-04-20 17:34.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173413/model_1416.pt


Epoch 9/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:34.19 [info     ] FQE_20220420173413: epoch=9 step=1593 epoch=9 metrics={'time_sample_batch': 0.0001522982861362608, 'time_algorithm_update': 0.003458943070664918, 'loss': 0.004485896770121519, 'time_step': 0.0036814280148953367, 'init_value': -0.7475070953369141, 'ave_value': -0.20270254599215748, 'soft_opc': nan} step=1593




2022-04-20 17:34.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173413/model_1593.pt


Epoch 10/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:34.20 [info     ] FQE_20220420173413: epoch=10 step=1770 epoch=10 metrics={'time_sample_batch': 0.00015432955855030123, 'time_algorithm_update': 0.003473521625928286, 'loss': 0.004466691870626462, 'time_step': 0.003697422264659472, 'init_value': -0.7857548594474792, 'ave_value': -0.19743259413385983, 'soft_opc': nan} step=1770




2022-04-20 17:34.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173413/model_1770.pt


Epoch 11/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:34.21 [info     ] FQE_20220420173413: epoch=11 step=1947 epoch=11 metrics={'time_sample_batch': 0.00015607257347322452, 'time_algorithm_update': 0.0034138617542503915, 'loss': 0.004504812182858586, 'time_step': 0.003642844615009545, 'init_value': -0.8869307041168213, 'ave_value': -0.2650026474220751, 'soft_opc': nan} step=1947




2022-04-20 17:34.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173413/model_1947.pt


Epoch 12/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:34.21 [info     ] FQE_20220420173413: epoch=12 step=2124 epoch=12 metrics={'time_sample_batch': 0.00015623421318786967, 'time_algorithm_update': 0.003470435654376186, 'loss': 0.004549713834257281, 'time_step': 0.003700630812995178, 'init_value': -0.9862702488899231, 'ave_value': -0.3084318186416551, 'soft_opc': nan} step=2124




2022-04-20 17:34.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173413/model_2124.pt


Epoch 13/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:34.22 [info     ] FQE_20220420173413: epoch=13 step=2301 epoch=13 metrics={'time_sample_batch': 0.00015531017281914834, 'time_algorithm_update': 0.0034730622997391695, 'loss': 0.004500456899536069, 'time_step': 0.0036999600081794007, 'init_value': -1.0056272745132446, 'ave_value': -0.30629627844458585, 'soft_opc': nan} step=2301




2022-04-20 17:34.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173413/model_2301.pt


Epoch 14/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:34.23 [info     ] FQE_20220420173413: epoch=14 step=2478 epoch=14 metrics={'time_sample_batch': 0.00015462589802715064, 'time_algorithm_update': 0.0034786065419514975, 'loss': 0.004716416408497272, 'time_step': 0.003703070225688697, 'init_value': -1.0797449350357056, 'ave_value': -0.3454404520351921, 'soft_opc': nan} step=2478




2022-04-20 17:34.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173413/model_2478.pt


Epoch 15/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:34.23 [info     ] FQE_20220420173413: epoch=15 step=2655 epoch=15 metrics={'time_sample_batch': 0.00015301488887118755, 'time_algorithm_update': 0.0035238292931163376, 'loss': 0.004719748528079671, 'time_step': 0.0037482593019129867, 'init_value': -1.15418541431427, 'ave_value': -0.4013896180982436, 'soft_opc': nan} step=2655




2022-04-20 17:34.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173413/model_2655.pt


Epoch 16/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:34.24 [info     ] FQE_20220420173413: epoch=16 step=2832 epoch=16 metrics={'time_sample_batch': 0.00015058355816340043, 'time_algorithm_update': 0.003499110539754232, 'loss': 0.0050163675740224406, 'time_step': 0.003720393962105789, 'init_value': -1.1864887475967407, 'ave_value': -0.41880145237694405, 'soft_opc': nan} step=2832




2022-04-20 17:34.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173413/model_2832.pt


Epoch 17/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:34.25 [info     ] FQE_20220420173413: epoch=17 step=3009 epoch=17 metrics={'time_sample_batch': 0.00015290174107093595, 'time_algorithm_update': 0.0034513891080005022, 'loss': 0.00506533983644982, 'time_step': 0.0036762286040742517, 'init_value': -1.2900243997573853, 'ave_value': -0.5193073769069246, 'soft_opc': nan} step=3009




2022-04-20 17:34.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173413/model_3009.pt


Epoch 18/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:34.26 [info     ] FQE_20220420173413: epoch=18 step=3186 epoch=18 metrics={'time_sample_batch': 0.0001554004216598252, 'time_algorithm_update': 0.003460560814808991, 'loss': 0.005489492682361249, 'time_step': 0.0036857033853477, 'init_value': -1.2792176008224487, 'ave_value': -0.4774179197031188, 'soft_opc': nan} step=3186




2022-04-20 17:34.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173413/model_3186.pt


Epoch 19/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:34.26 [info     ] FQE_20220420173413: epoch=19 step=3363 epoch=19 metrics={'time_sample_batch': 0.00015736434419276352, 'time_algorithm_update': 0.0034044058309436517, 'loss': 0.005720001076463884, 'time_step': 0.003631583714889268, 'init_value': -1.3410018682479858, 'ave_value': -0.5233694785942992, 'soft_opc': nan} step=3363




2022-04-20 17:34.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173413/model_3363.pt


Epoch 20/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:34.27 [info     ] FQE_20220420173413: epoch=20 step=3540 epoch=20 metrics={'time_sample_batch': 0.00015407228200449107, 'time_algorithm_update': 0.003425533488645392, 'loss': 0.006006611822209832, 'time_step': 0.003650507684481346, 'init_value': -1.4055495262145996, 'ave_value': -0.5509098230399662, 'soft_opc': nan} step=3540




2022-04-20 17:34.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173413/model_3540.pt


Epoch 21/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:34.28 [info     ] FQE_20220420173413: epoch=21 step=3717 epoch=21 metrics={'time_sample_batch': 0.00015427567864541953, 'time_algorithm_update': 0.0033894164414055602, 'loss': 0.006374662198747198, 'time_step': 0.003617813358199125, 'init_value': -1.504613995552063, 'ave_value': -0.6111628222942084, 'soft_opc': nan} step=3717




2022-04-20 17:34.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173413/model_3717.pt


Epoch 22/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:34.28 [info     ] FQE_20220420173413: epoch=22 step=3894 epoch=22 metrics={'time_sample_batch': 0.0001527387543586688, 'time_algorithm_update': 0.0033789273709227134, 'loss': 0.006628499243350563, 'time_step': 0.0036035755933341335, 'init_value': -1.498327612876892, 'ave_value': -0.5980609515533635, 'soft_opc': nan} step=3894




2022-04-20 17:34.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173413/model_3894.pt


Epoch 23/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:34.29 [info     ] FQE_20220420173413: epoch=23 step=4071 epoch=23 metrics={'time_sample_batch': 0.00015683362712967867, 'time_algorithm_update': 0.0044933294845839675, 'loss': 0.007137066516946307, 'time_step': 0.004724071524237509, 'init_value': -1.6039128303527832, 'ave_value': -0.6675782843971172, 'soft_opc': nan} step=4071




2022-04-20 17:34.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173413/model_4071.pt


Epoch 24/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:34.30 [info     ] FQE_20220420173413: epoch=24 step=4248 epoch=24 metrics={'time_sample_batch': 0.00015439690843140337, 'time_algorithm_update': 0.00445461003793835, 'loss': 0.007635696062365663, 'time_step': 0.004680667219862426, 'init_value': -1.6489094495773315, 'ave_value': -0.7009355632079256, 'soft_opc': nan} step=4248




2022-04-20 17:34.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173413/model_4248.pt


Epoch 25/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:34.31 [info     ] FQE_20220420173413: epoch=25 step=4425 epoch=25 metrics={'time_sample_batch': 0.00015157764240846796, 'time_algorithm_update': 0.003654416671580514, 'loss': 0.007908816642711982, 'time_step': 0.0038775481746695134, 'init_value': -1.718515396118164, 'ave_value': -0.7251019787043333, 'soft_opc': nan} step=4425




2022-04-20 17:34.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173413/model_4425.pt


Epoch 26/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:34.32 [info     ] FQE_20220420173413: epoch=26 step=4602 epoch=26 metrics={'time_sample_batch': 0.00015713804859226034, 'time_algorithm_update': 0.004472529147304384, 'loss': 0.008384725067759259, 'time_step': 0.004698723722985909, 'init_value': -1.775402545928955, 'ave_value': -0.7690089644436297, 'soft_opc': nan} step=4602




2022-04-20 17:34.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173413/model_4602.pt


Epoch 27/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:34.33 [info     ] FQE_20220420173413: epoch=27 step=4779 epoch=27 metrics={'time_sample_batch': 0.00015629886907372772, 'time_algorithm_update': 0.00449388444760425, 'loss': 0.008959633927650245, 'time_step': 0.0047233158585715425, 'init_value': -1.8235276937484741, 'ave_value': -0.7994013640168193, 'soft_opc': nan} step=4779




2022-04-20 17:34.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173413/model_4779.pt


Epoch 28/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:34.33 [info     ] FQE_20220420173413: epoch=28 step=4956 epoch=28 metrics={'time_sample_batch': 0.0001564591617907508, 'time_algorithm_update': 0.004022168574360131, 'loss': 0.009388874504599447, 'time_step': 0.0042474795196015954, 'init_value': -1.930046558380127, 'ave_value': -0.8576879099576368, 'soft_opc': nan} step=4956




2022-04-20 17:34.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173413/model_4956.pt


Epoch 29/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:34.34 [info     ] FQE_20220420173413: epoch=29 step=5133 epoch=29 metrics={'time_sample_batch': 0.000154069588009247, 'time_algorithm_update': 0.004309161234710176, 'loss': 0.009818705922850317, 'time_step': 0.004533012034529347, 'init_value': -1.991261601448059, 'ave_value': -0.8847465613677918, 'soft_opc': nan} step=5133




2022-04-20 17:34.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173413/model_5133.pt


Epoch 30/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:34.35 [info     ] FQE_20220420173413: epoch=30 step=5310 epoch=30 metrics={'time_sample_batch': 0.00015668141639838784, 'time_algorithm_update': 0.004503519521594721, 'loss': 0.01014074045930605, 'time_step': 0.00472367685393425, 'init_value': -2.053511381149292, 'ave_value': -0.9464771500550904, 'soft_opc': nan} step=5310




2022-04-20 17:34.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173413/model_5310.pt


Epoch 31/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:34.36 [info     ] FQE_20220420173413: epoch=31 step=5487 epoch=31 metrics={'time_sample_batch': 0.00015719058149951998, 'time_algorithm_update': 0.004380063148541639, 'loss': 0.010830219946196566, 'time_step': 0.0046096885271665064, 'init_value': -2.0156733989715576, 'ave_value': -0.8928350391039798, 'soft_opc': nan} step=5487




2022-04-20 17:34.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173413/model_5487.pt


Epoch 32/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:34.37 [info     ] FQE_20220420173413: epoch=32 step=5664 epoch=32 metrics={'time_sample_batch': 0.00015858472403833422, 'time_algorithm_update': 0.003903046839654782, 'loss': 0.011225733408141397, 'time_step': 0.00413425494048555, 'init_value': -2.0606982707977295, 'ave_value': -0.9524927400209167, 'soft_opc': nan} step=5664




2022-04-20 17:34.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173413/model_5664.pt


Epoch 33/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:34.38 [info     ] FQE_20220420173413: epoch=33 step=5841 epoch=33 metrics={'time_sample_batch': 0.00015715690655896894, 'time_algorithm_update': 0.004435991836806475, 'loss': 0.011723451956969786, 'time_step': 0.004660427233593612, 'init_value': -2.1588919162750244, 'ave_value': -1.032640849348359, 'soft_opc': nan} step=5841




2022-04-20 17:34.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173413/model_5841.pt


Epoch 34/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:34.39 [info     ] FQE_20220420173413: epoch=34 step=6018 epoch=34 metrics={'time_sample_batch': 0.00015761757974570753, 'time_algorithm_update': 0.00450507530384818, 'loss': 0.011978171155445994, 'time_step': 0.0047329886484954314, 'init_value': -2.1740529537200928, 'ave_value': -1.0567822388286616, 'soft_opc': nan} step=6018




2022-04-20 17:34.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173413/model_6018.pt


Epoch 35/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:34.39 [info     ] FQE_20220420173413: epoch=35 step=6195 epoch=35 metrics={'time_sample_batch': 0.000150349180577165, 'time_algorithm_update': 0.003584642194758701, 'loss': 0.012374134079847842, 'time_step': 0.0038031063510873225, 'init_value': -2.140265464782715, 'ave_value': -1.013157558504369, 'soft_opc': nan} step=6195




2022-04-20 17:34.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173413/model_6195.pt


Epoch 36/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:34.40 [info     ] FQE_20220420173413: epoch=36 step=6372 epoch=36 metrics={'time_sample_batch': 0.00016216100272485766, 'time_algorithm_update': 0.0044589352473027285, 'loss': 0.01249307598708208, 'time_step': 0.004694619421231545, 'init_value': -2.1695804595947266, 'ave_value': -1.062358977775126, 'soft_opc': nan} step=6372




2022-04-20 17:34.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173413/model_6372.pt


Epoch 37/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:34.41 [info     ] FQE_20220420173413: epoch=37 step=6549 epoch=37 metrics={'time_sample_batch': 0.00015474712781313448, 'time_algorithm_update': 0.00445141900057173, 'loss': 0.013458434172700776, 'time_step': 0.004676364909457622, 'init_value': -2.2074930667877197, 'ave_value': -1.109583651549085, 'soft_opc': nan} step=6549




2022-04-20 17:34.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173413/model_6549.pt


Epoch 38/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:34.42 [info     ] FQE_20220420173413: epoch=38 step=6726 epoch=38 metrics={'time_sample_batch': 0.0001517716100660421, 'time_algorithm_update': 0.003946791934428242, 'loss': 0.014040383960167735, 'time_step': 0.0041703302307990985, 'init_value': -2.2565903663635254, 'ave_value': -1.1786309947916964, 'soft_opc': nan} step=6726




2022-04-20 17:34.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173413/model_6726.pt


Epoch 39/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:34.43 [info     ] FQE_20220420173413: epoch=39 step=6903 epoch=39 metrics={'time_sample_batch': 0.00016178519038830772, 'time_algorithm_update': 0.00435261268400203, 'loss': 0.013930442989646, 'time_step': 0.004584907811913786, 'init_value': -2.345893621444702, 'ave_value': -1.295124923110053, 'soft_opc': nan} step=6903




2022-04-20 17:34.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173413/model_6903.pt


Epoch 40/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:34.44 [info     ] FQE_20220420173413: epoch=40 step=7080 epoch=40 metrics={'time_sample_batch': 0.00015921242493020611, 'time_algorithm_update': 0.004430303465848589, 'loss': 0.01455810158789726, 'time_step': 0.004660828638884981, 'init_value': -2.282304525375366, 'ave_value': -1.2453775110142724, 'soft_opc': nan} step=7080




2022-04-20 17:34.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173413/model_7080.pt


Epoch 41/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:34.45 [info     ] FQE_20220420173413: epoch=41 step=7257 epoch=41 metrics={'time_sample_batch': 0.00015946161949028403, 'time_algorithm_update': 0.0043636297775527175, 'loss': 0.015052635149009484, 'time_step': 0.004592432140630517, 'init_value': -2.2254741191864014, 'ave_value': -1.2401185288505108, 'soft_opc': nan} step=7257




2022-04-20 17:34.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173413/model_7257.pt


Epoch 42/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:34.45 [info     ] FQE_20220420173413: epoch=42 step=7434 epoch=42 metrics={'time_sample_batch': 0.0001562732761189089, 'time_algorithm_update': 0.0038454290163719047, 'loss': 0.015729006085187028, 'time_step': 0.004070190386583576, 'init_value': -2.2092173099517822, 'ave_value': -1.211826643799138, 'soft_opc': nan} step=7434




2022-04-20 17:34.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173413/model_7434.pt


Epoch 43/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:34.46 [info     ] FQE_20220420173413: epoch=43 step=7611 epoch=43 metrics={'time_sample_batch': 0.00015963134119066142, 'time_algorithm_update': 0.004475492542072878, 'loss': 0.016907596627860674, 'time_step': 0.004706299237612277, 'init_value': -2.2563467025756836, 'ave_value': -1.3160650428806855, 'soft_opc': nan} step=7611




2022-04-20 17:34.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173413/model_7611.pt


Epoch 44/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:34.47 [info     ] FQE_20220420173413: epoch=44 step=7788 epoch=44 metrics={'time_sample_batch': 0.00015713804859226034, 'time_algorithm_update': 0.004536543862294342, 'loss': 0.01728364693705982, 'time_step': 0.004761122040829416, 'init_value': -2.237875461578369, 'ave_value': -1.313304791556442, 'soft_opc': nan} step=7788




2022-04-20 17:34.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173413/model_7788.pt


Epoch 45/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:34.48 [info     ] FQE_20220420173413: epoch=45 step=7965 epoch=45 metrics={'time_sample_batch': 0.00015371936862751588, 'time_algorithm_update': 0.0035484348986781922, 'loss': 0.01846319277662952, 'time_step': 0.0037750254916605976, 'init_value': -2.326462984085083, 'ave_value': -1.3961452970616854, 'soft_opc': nan} step=7965




2022-04-20 17:34.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173413/model_7965.pt


Epoch 46/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:34.49 [info     ] FQE_20220420173413: epoch=46 step=8142 epoch=46 metrics={'time_sample_batch': 0.0001567272143175373, 'time_algorithm_update': 0.004467494070193189, 'loss': 0.018399292366358184, 'time_step': 0.004692154415583206, 'init_value': -2.2789599895477295, 'ave_value': -1.3802544787643596, 'soft_opc': nan} step=8142




2022-04-20 17:34.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173413/model_8142.pt


Epoch 47/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:34.50 [info     ] FQE_20220420173413: epoch=47 step=8319 epoch=47 metrics={'time_sample_batch': 0.00015729295331879525, 'time_algorithm_update': 0.004510631669039106, 'loss': 0.018702753218374935, 'time_step': 0.004741202639994648, 'init_value': -2.2683417797088623, 'ave_value': -1.4043280961019946, 'soft_opc': nan} step=8319




2022-04-20 17:34.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173413/model_8319.pt


Epoch 48/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:34.51 [info     ] FQE_20220420173413: epoch=48 step=8496 epoch=48 metrics={'time_sample_batch': 0.00015441307240286788, 'time_algorithm_update': 0.004029781804919916, 'loss': 0.01967901555758159, 'time_step': 0.004255752778996182, 'init_value': -2.2050108909606934, 'ave_value': -1.384741751407483, 'soft_opc': nan} step=8496




2022-04-20 17:34.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173413/model_8496.pt


Epoch 49/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:34.51 [info     ] FQE_20220420173413: epoch=49 step=8673 epoch=49 metrics={'time_sample_batch': 0.0001552145359879833, 'time_algorithm_update': 0.004240157240528172, 'loss': 0.020273245598115868, 'time_step': 0.004466412431102688, 'init_value': -2.2370998859405518, 'ave_value': -1.4562637697961238, 'soft_opc': nan} step=8673




2022-04-20 17:34.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173413/model_8673.pt


Epoch 50/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 17:34.52 [info     ] FQE_20220420173413: epoch=50 step=8850 epoch=50 metrics={'time_sample_batch': 0.00015930671476374912, 'time_algorithm_update': 0.004466587540793553, 'loss': 0.02070701808542392, 'time_step': 0.004697573387016684, 'init_value': -2.2158100605010986, 'ave_value': -1.4420575747280395, 'soft_opc': nan} step=8850




2022-04-20 17:34.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173413/model_8850.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

2022-04-20 17:34.53 [debug    ] RoundIterator is selected.
2022-04-20 17:34.53 [info     ] Directory is created at d3rlpy_logs/FQE_20220420173453
2022-04-20 17:34.53 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 17:34.53 [debug    ] Building models...
2022-04-20 17:34.53 [debug    ] Models have been built.
2022-04-20 17:34.53 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420173453/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}},

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 17:34.55 [info     ] FQE_20220420173453: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.000161616608153942, 'time_algorithm_update': 0.004077132358107456, 'loss': 0.023112473922759988, 'time_step': 0.004310392363126888, 'init_value': -0.9318217039108276, 'ave_value': -0.944626410446457, 'soft_opc': nan} step=344




2022-04-20 17:34.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173453/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:34.56 [info     ] FQE_20220420173453: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00016281840413115745, 'time_algorithm_update': 0.004200364961180576, 'loss': 0.02184955162526823, 'time_step': 0.004432005937709365, 'init_value': -1.7185404300689697, 'ave_value': -1.7353898158078795, 'soft_opc': nan} step=688




2022-04-20 17:34.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173453/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:34.58 [info     ] FQE_20220420173453: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00016502238983331726, 'time_algorithm_update': 0.004469624092412549, 'loss': 0.0239136312265209, 'time_step': 0.004707527022029079, 'init_value': -2.546567440032959, 'ave_value': -2.5850168798018145, 'soft_opc': nan} step=1032




2022-04-20 17:34.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173453/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:35.00 [info     ] FQE_20220420173453: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00016523031301276628, 'time_algorithm_update': 0.003995126069978226, 'loss': 0.02636133933840536, 'time_step': 0.004231592943502027, 'init_value': -3.212085723876953, 'ave_value': -3.298246104733364, 'soft_opc': nan} step=1376




2022-04-20 17:35.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173453/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:35.01 [info     ] FQE_20220420173453: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00016609665959380394, 'time_algorithm_update': 0.0044707094514092735, 'loss': 0.031922450345984205, 'time_step': 0.00470721444418264, 'init_value': -4.005204200744629, 'ave_value': -4.185269824516129, 'soft_opc': nan} step=1720




2022-04-20 17:35.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173453/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:35.03 [info     ] FQE_20220420173453: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00016681191533110862, 'time_algorithm_update': 0.003987048947533896, 'loss': 0.038542190902368274, 'time_step': 0.004226978435072788, 'init_value': -4.417868614196777, 'ave_value': -4.7075300104043505, 'soft_opc': nan} step=2064




2022-04-20 17:35.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173453/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:35.05 [info     ] FQE_20220420173453: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.0001647056535232899, 'time_algorithm_update': 0.004407685163409211, 'loss': 0.046449946751276595, 'time_step': 0.004646293645681337, 'init_value': -5.2153520584106445, 'ave_value': -5.603755408095884, 'soft_opc': nan} step=2408




2022-04-20 17:35.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173453/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:35.06 [info     ] FQE_20220420173453: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.000163958516231803, 'time_algorithm_update': 0.00425136782402216, 'loss': 0.05892786139427403, 'time_step': 0.004486446463784506, 'init_value': -5.51259708404541, 'ave_value': -6.047698742715088, 'soft_opc': nan} step=2752




2022-04-20 17:35.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173453/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:35.08 [info     ] FQE_20220420173453: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.0001641699047975762, 'time_algorithm_update': 0.0040659024271854135, 'loss': 0.06757176218034569, 'time_step': 0.004305739042370818, 'init_value': -5.894891738891602, 'ave_value': -6.548201046239686, 'soft_opc': nan} step=3096




2022-04-20 17:35.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173453/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:35.10 [info     ] FQE_20220420173453: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00016612507576166199, 'time_algorithm_update': 0.004482031561607538, 'loss': 0.0837212075522646, 'time_step': 0.004721866097561148, 'init_value': -6.474213123321533, 'ave_value': -7.303825848046187, 'soft_opc': nan} step=3440




2022-04-20 17:35.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173453/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:35.11 [info     ] FQE_20220420173453: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00016386564387831578, 'time_algorithm_update': 0.004059862951899684, 'loss': 0.09474656895766857, 'time_step': 0.004296435866245004, 'init_value': -6.827520847320557, 'ave_value': -7.78612857251554, 'soft_opc': nan} step=3784




2022-04-20 17:35.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173453/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:35.13 [info     ] FQE_20220420173453: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00016588041948717693, 'time_algorithm_update': 0.004471236190130544, 'loss': 0.11305889268784762, 'time_step': 0.004707582468210265, 'init_value': -7.34303092956543, 'ave_value': -8.52105686859236, 'soft_opc': nan} step=4128




2022-04-20 17:35.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173453/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:35.15 [info     ] FQE_20220420173453: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00016735598098400028, 'time_algorithm_update': 0.004011267146398855, 'loss': 0.1349176853397038, 'time_step': 0.004250455041264378, 'init_value': -7.689123153686523, 'ave_value': -9.046475469307588, 'soft_opc': nan} step=4472




2022-04-20 17:35.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173453/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:35.16 [info     ] FQE_20220420173453: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.0001651734806770502, 'time_algorithm_update': 0.004217588624288869, 'loss': 0.1523079562737325, 'time_step': 0.004451249227967373, 'init_value': -8.244173049926758, 'ave_value': -9.875134472468416, 'soft_opc': nan} step=4816




2022-04-20 17:35.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173453/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:35.18 [info     ] FQE_20220420173453: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00016701013542885003, 'time_algorithm_update': 0.004397749900817871, 'loss': 0.17605187269577452, 'time_step': 0.004640715066776719, 'init_value': -8.396258354187012, 'ave_value': -10.229208606694666, 'soft_opc': nan} step=5160




2022-04-20 17:35.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173453/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:35.19 [info     ] FQE_20220420173453: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.0001653432846069336, 'time_algorithm_update': 0.0040077885916066725, 'loss': 0.20438546791326168, 'time_step': 0.004246318756147872, 'init_value': -9.124565124511719, 'ave_value': -11.12064575789211, 'soft_opc': nan} step=5504




2022-04-20 17:35.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173453/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:35.21 [info     ] FQE_20220420173453: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00016510070756424304, 'time_algorithm_update': 0.0044353230054988415, 'loss': 0.22939438809272508, 'time_step': 0.004669416782467864, 'init_value': -9.219584465026855, 'ave_value': -11.461330056605181, 'soft_opc': nan} step=5848




2022-04-20 17:35.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173453/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:35.23 [info     ] FQE_20220420173453: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.0001664730005486067, 'time_algorithm_update': 0.00408963963042858, 'loss': 0.25207028542861865, 'time_step': 0.004328957823819892, 'init_value': -9.419271469116211, 'ave_value': -11.814078330268076, 'soft_opc': nan} step=6192




2022-04-20 17:35.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173453/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:35.25 [info     ] FQE_20220420173453: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00016667815141899642, 'time_algorithm_update': 0.004510417234065921, 'loss': 0.2808114088541113, 'time_step': 0.004749388195747553, 'init_value': -9.659332275390625, 'ave_value': -12.248685313278793, 'soft_opc': nan} step=6536




2022-04-20 17:35.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173453/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:35.26 [info     ] FQE_20220420173453: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00016686805458955987, 'time_algorithm_update': 0.004184839337371116, 'loss': 0.2982691443351985, 'time_step': 0.004425522892974144, 'init_value': -9.89040756225586, 'ave_value': -12.600917641669124, 'soft_opc': nan} step=6880




2022-04-20 17:35.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173453/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:35.28 [info     ] FQE_20220420173453: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00016658389291097952, 'time_algorithm_update': 0.004128804040509601, 'loss': 0.32526776818341985, 'time_step': 0.0043700004732886025, 'init_value': -10.55887222290039, 'ave_value': -13.373283479236662, 'soft_opc': nan} step=7224




2022-04-20 17:35.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173453/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:35.30 [info     ] FQE_20220420173453: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00016725201939427577, 'time_algorithm_update': 0.00448363742163015, 'loss': 0.35105441637380524, 'time_step': 0.004720987968666609, 'init_value': -10.77740478515625, 'ave_value': -13.58060955115162, 'soft_opc': nan} step=7568




2022-04-20 17:35.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173453/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:35.31 [info     ] FQE_20220420173453: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.0001621980999791345, 'time_algorithm_update': 0.0040636242822159165, 'loss': 0.38013766367573204, 'time_step': 0.004297542710636937, 'init_value': -11.084869384765625, 'ave_value': -13.873284810411468, 'soft_opc': nan} step=7912




2022-04-20 17:35.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173453/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:35.33 [info     ] FQE_20220420173453: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00016695122386133947, 'time_algorithm_update': 0.0045148279777793, 'loss': 0.40129555133712846, 'time_step': 0.0047566648139510045, 'init_value': -11.624160766601562, 'ave_value': -14.38757517687066, 'soft_opc': nan} step=8256




2022-04-20 17:35.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173453/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:35.35 [info     ] FQE_20220420173453: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00016370484995287517, 'time_algorithm_update': 0.004069725441378217, 'loss': 0.4240829148762968, 'time_step': 0.004306107066398443, 'init_value': -12.195093154907227, 'ave_value': -14.845278793590213, 'soft_opc': nan} step=8600




2022-04-20 17:35.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173453/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:35.36 [info     ] FQE_20220420173453: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00016731578250264012, 'time_algorithm_update': 0.004191948923953744, 'loss': 0.45706626819446683, 'time_step': 0.004431491674378861, 'init_value': -12.659650802612305, 'ave_value': -15.24118457415759, 'soft_opc': nan} step=8944




2022-04-20 17:35.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173453/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:35.38 [info     ] FQE_20220420173453: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.0001655137816140818, 'time_algorithm_update': 0.004409160031828769, 'loss': 0.47915399168769635, 'time_step': 0.0046489273392876914, 'init_value': -13.409810066223145, 'ave_value': -15.824596108613862, 'soft_opc': nan} step=9288




2022-04-20 17:35.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173453/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:35.40 [info     ] FQE_20220420173453: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00016747380411902138, 'time_algorithm_update': 0.0039710929227429765, 'loss': 0.5088386316946166, 'time_step': 0.004213461349176806, 'init_value': -14.426077842712402, 'ave_value': -16.53956617757485, 'soft_opc': nan} step=9632




2022-04-20 17:35.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173453/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:35.41 [info     ] FQE_20220420173453: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00016828331836434297, 'time_algorithm_update': 0.00450907405032668, 'loss': 0.5396612179519739, 'time_step': 0.004751956047013749, 'init_value': -14.793116569519043, 'ave_value': -16.71878275621141, 'soft_opc': nan} step=9976




2022-04-20 17:35.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173453/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:35.43 [info     ] FQE_20220420173453: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00016479228818139366, 'time_algorithm_update': 0.004019223673399104, 'loss': 0.566617886963582, 'time_step': 0.004255885301634323, 'init_value': -15.797279357910156, 'ave_value': -17.716480830262064, 'soft_opc': nan} step=10320




2022-04-20 17:35.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173453/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:35.45 [info     ] FQE_20220420173453: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00016356762065443883, 'time_algorithm_update': 0.0044527039971462515, 'loss': 0.5971505248507615, 'time_step': 0.004689619291660397, 'init_value': -16.215187072753906, 'ave_value': -17.896367456452072, 'soft_opc': nan} step=10664




2022-04-20 17:35.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173453/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:35.46 [info     ] FQE_20220420173453: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00016407495321229447, 'time_algorithm_update': 0.004135640554649885, 'loss': 0.6157292469707859, 'time_step': 0.004374905381091806, 'init_value': -16.5820255279541, 'ave_value': -18.125164546436622, 'soft_opc': nan} step=11008




2022-04-20 17:35.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173453/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:35.48 [info     ] FQE_20220420173453: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00016340959903805754, 'time_algorithm_update': 0.004068683053171912, 'loss': 0.6394314311531394, 'time_step': 0.004304865765017133, 'init_value': -17.346532821655273, 'ave_value': -18.630223162608694, 'soft_opc': nan} step=11352




2022-04-20 17:35.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173453/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:35.50 [info     ] FQE_20220420173453: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00016554912855458814, 'time_algorithm_update': 0.004492458909056907, 'loss': 0.6600412568997939, 'time_step': 0.004732851372208706, 'init_value': -17.60235023498535, 'ave_value': -18.908770100868868, 'soft_opc': nan} step=11696




2022-04-20 17:35.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173453/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:35.51 [info     ] FQE_20220420173453: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.0001626007778700008, 'time_algorithm_update': 0.004026300685350285, 'loss': 0.6889342094316732, 'time_step': 0.004261594872141994, 'init_value': -17.968582153320312, 'ave_value': -19.106383744191046, 'soft_opc': nan} step=12040




2022-04-20 17:35.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173453/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:35.53 [info     ] FQE_20220420173453: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00016300276268360226, 'time_algorithm_update': 0.0044624985650528306, 'loss': 0.7016690174485881, 'time_step': 0.004700183868408203, 'init_value': -18.51949119567871, 'ave_value': -19.439318644298716, 'soft_opc': nan} step=12384




2022-04-20 17:35.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173453/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:35.55 [info     ] FQE_20220420173453: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.0001655914062677428, 'time_algorithm_update': 0.004131384367166564, 'loss': 0.7234362231589161, 'time_step': 0.004369317792182745, 'init_value': -18.87225341796875, 'ave_value': -19.595816117052728, 'soft_opc': nan} step=12728




2022-04-20 17:35.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173453/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:35.56 [info     ] FQE_20220420173453: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.0001688086709310842, 'time_algorithm_update': 0.004382623489512954, 'loss': 0.7237044277947483, 'time_step': 0.004627453726391459, 'init_value': -19.358911514282227, 'ave_value': -20.05754609311226, 'soft_opc': nan} step=13072




2022-04-20 17:35.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173453/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:35.58 [info     ] FQE_20220420173453: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00016506466754647188, 'time_algorithm_update': 0.0043277504832245585, 'loss': 0.73313843927801, 'time_step': 0.004565417766571045, 'init_value': -19.65709686279297, 'ave_value': -20.16522109847325, 'soft_opc': nan} step=13416




2022-04-20 17:35.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173453/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:36.00 [info     ] FQE_20220420173453: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00016486367513967115, 'time_algorithm_update': 0.004043686528538548, 'loss': 0.7597524446684347, 'time_step': 0.004280614298443461, 'init_value': -19.963165283203125, 'ave_value': -20.47053685857283, 'soft_opc': nan} step=13760




2022-04-20 17:36.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173453/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:36.01 [info     ] FQE_20220420173453: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00016329732052115507, 'time_algorithm_update': 0.004432256831679233, 'loss': 0.7677062142174691, 'time_step': 0.004668219144954238, 'init_value': -20.392677307128906, 'ave_value': -20.919659686204977, 'soft_opc': nan} step=14104




2022-04-20 17:36.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173453/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:36.03 [info     ] FQE_20220420173453: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.0001638296038605446, 'time_algorithm_update': 0.004051786522532619, 'loss': 0.7873890130444928, 'time_step': 0.004286656546038251, 'init_value': -20.59563446044922, 'ave_value': -20.90357942590427, 'soft_opc': nan} step=14448




2022-04-20 17:36.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173453/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:36.05 [info     ] FQE_20220420173453: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00016088056009869243, 'time_algorithm_update': 0.004497566195421441, 'loss': 0.8192517684913392, 'time_step': 0.004731477693069813, 'init_value': -21.09564971923828, 'ave_value': -21.245902590537106, 'soft_opc': nan} step=14792




2022-04-20 17:36.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173453/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:36.06 [info     ] FQE_20220420173453: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.0001645476319069086, 'time_algorithm_update': 0.004123425760934519, 'loss': 0.8385046622445157, 'time_step': 0.004361359879027965, 'init_value': -21.459671020507812, 'ave_value': -21.686078958808622, 'soft_opc': nan} step=15136




2022-04-20 17:36.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173453/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:36.08 [info     ] FQE_20220420173453: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00016123264334922614, 'time_algorithm_update': 0.004164979208347409, 'loss': 0.8582947898093004, 'time_step': 0.004399398731630902, 'init_value': -21.755992889404297, 'ave_value': -21.9952377117068, 'soft_opc': nan} step=15480




2022-04-20 17:36.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173453/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:36.10 [info     ] FQE_20220420173453: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00016764152881711028, 'time_algorithm_update': 0.004470398952794629, 'loss': 0.8710663666039012, 'time_step': 0.004712158857389938, 'init_value': -22.17462158203125, 'ave_value': -22.538270945028668, 'soft_opc': nan} step=15824




2022-04-20 17:36.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173453/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:36.11 [info     ] FQE_20220420173453: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.0001653897207836772, 'time_algorithm_update': 0.004085836715476457, 'loss': 0.8655426949912379, 'time_step': 0.004324973322624384, 'init_value': -22.281373977661133, 'ave_value': -22.515738379962006, 'soft_opc': nan} step=16168




2022-04-20 17:36.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173453/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:36.13 [info     ] FQE_20220420173453: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00016613616499789926, 'time_algorithm_update': 0.004440810098204502, 'loss': 0.8768798861067829, 'time_step': 0.004680478295614553, 'init_value': -22.77136993408203, 'ave_value': -22.984307989708004, 'soft_opc': nan} step=16512




2022-04-20 17:36.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173453/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:36.15 [info     ] FQE_20220420173453: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00016729845557101938, 'time_algorithm_update': 0.004068253345267717, 'loss': 0.9242335774957441, 'time_step': 0.00430899650551552, 'init_value': -22.968425750732422, 'ave_value': -23.14141566584239, 'soft_opc': nan} step=16856




2022-04-20 17:36.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173453/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:36.16 [info     ] FQE_20220420173453: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00016772816347521404, 'time_algorithm_update': 0.004373121400212132, 'loss': 0.9300288624173507, 'time_step': 0.004616183596988057, 'init_value': -23.492645263671875, 'ave_value': -23.526170969818224, 'soft_opc': nan} step=17200




2022-04-20 17:36.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420173453/model_17200.pt
search iteration:  14
using hyper params:  [0.003653218936258894, 0.009970104610734793, 8.122707832917991e-05, 5]
2022-04-20 17:36.16 [debug    ] RoundIterator is selected.
2022-04-20 17:36.16 [info     ] Directory is created at d3rlpy_logs/CQL_20220420173616
2022-04-20 17:36.16 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 17:36.16 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 17:36.16 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220420173616/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.003653218936258894, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'we

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:36.25 [info     ] CQL_20220420173616: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.00035534964667426213, 'time_algorithm_update': 0.025256416253876268, 'temp_loss': 4.428400793967889, 'temp': 0.9863594141271379, 'alpha_loss': -12.579355728556537, 'alpha': 1.015048651318801, 'critic_loss': 32.221551962066115, 'actor_loss': 4.862531428797203, 'time_step': 0.02570895981370357, 'td_error': 6.077907904141159, 'init_value': -12.117429733276367, 'ave_value': -7.578389509667281} step=342
2022-04-20 17:36.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420173616/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:36.35 [info     ] CQL_20220420173616: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.000360582307068228, 'time_algorithm_update': 0.025477624776070577, 'temp_loss': 3.129391756671214, 'temp': 0.9626274185571057, 'alpha_loss': -1.7112080846880482, 'alpha': 1.0299941364087557, 'critic_loss': 35.124129908823825, 'actor_loss': 12.135620460175632, 'time_step': 0.02593481331540827, 'td_error': 7.742658115193638, 'init_value': -20.960948944091797, 'ave_value': -12.121287017605594} step=684
2022-04-20 17:36.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420173616/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:36.44 [info     ] CQL_20220420173616: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.0003468802100733707, 'time_algorithm_update': 0.0247387000691821, 'temp_loss': 2.2034408328128836, 'temp': 0.9438961400971775, 'alpha_loss': 3.055018235085129, 'alpha': 1.0260952062774122, 'critic_loss': 64.34490433631585, 'actor_loss': 18.76001434995417, 'time_step': 0.025181381325972706, 'td_error': 12.19776728411643, 'init_value': -31.188350677490234, 'ave_value': -17.116474850831537} step=1026
2022-04-20 17:36.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420173616/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:36.53 [info     ] CQL_20220420173616: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.0003397834231281838, 'time_algorithm_update': 0.024451006922805517, 'temp_loss': 1.6671336458788977, 'temp': 0.9279062801982925, 'alpha_loss': 6.349852003549275, 'alpha': 1.002208142426976, 'critic_loss': 105.4501777559693, 'actor_loss': 25.350270990739787, 'time_step': 0.024882405822040043, 'td_error': 19.755601395085694, 'init_value': -41.01020812988281, 'ave_value': -23.542829515366808} step=1368
2022-04-20 17:36.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420173616/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:37.02 [info     ] CQL_20220420173616: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.00036199120750204163, 'time_algorithm_update': 0.02523796181929739, 'temp_loss': 1.2535263747848264, 'temp': 0.9140262591559984, 'alpha_loss': 8.679014876572012, 'alpha': 0.9651585857422031, 'critic_loss': 152.4014464261239, 'actor_loss': 31.85276466503478, 'time_step': 0.02569733446801615, 'td_error': 28.283988967786684, 'init_value': -53.728858947753906, 'ave_value': -30.07678049335642} step=1710
2022-04-20 17:37.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420173616/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:37.11 [info     ] CQL_20220420173616: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.0003588757319757116, 'time_algorithm_update': 0.023860247511612743, 'temp_loss': 0.8839422776772264, 'temp': 0.9020847261649126, 'alpha_loss': 10.508094058399312, 'alpha': 0.9246653612942723, 'critic_loss': 207.2675318132367, 'actor_loss': 38.47317883564018, 'time_step': 0.02431661761992159, 'td_error': 37.54070698649386, 'init_value': -63.431968688964844, 'ave_value': -34.389266260013926} step=2052
2022-04-20 17:37.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420173616/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:37.19 [info     ] CQL_20220420173616: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.00032922329261289004, 'time_algorithm_update': 0.022336430019802518, 'temp_loss': 0.6835142113377302, 'temp': 0.8917684966360616, 'alpha_loss': 10.828128280695418, 'alpha': 0.8867701659774223, 'critic_loss': 265.04449221962375, 'actor_loss': 44.475491540473804, 'time_step': 0.02275801820364612, 'td_error': 45.52518868861425, 'init_value': -74.2926254272461, 'ave_value': -40.522740232244736} step=2394
2022-04-20 17:37.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420173616/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:37.27 [info     ] CQL_20220420173616: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.0003621104167915924, 'time_algorithm_update': 0.02384378198991742, 'temp_loss': 0.4731026201111357, 'temp': 0.883261787961101, 'alpha_loss': 11.56737652856704, 'alpha': 0.8515766236865729, 'critic_loss': 319.77537893551835, 'actor_loss': 50.21921475728353, 'time_step': 0.024303449524773493, 'td_error': 60.655925365742874, 'init_value': -82.92997741699219, 'ave_value': -44.3099783009727} step=2736
2022-04-20 17:37.27 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420173616/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:37.36 [info     ] CQL_20220420173616: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.0003489423216434947, 'time_algorithm_update': 0.024006201509843794, 'temp_loss': 0.33926726060739737, 'temp': 0.8762517928728584, 'alpha_loss': 11.787124718838966, 'alpha': 0.8187039449200992, 'critic_loss': 372.121797885114, 'actor_loss': 55.414177833244814, 'time_step': 0.02445313317036768, 'td_error': 83.66192741107214, 'init_value': -90.49507141113281, 'ave_value': -48.39351695247867} step=3078
2022-04-20 17:37.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420173616/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:37.45 [info     ] CQL_20220420173616: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.00035302959687528556, 'time_algorithm_update': 0.023743190960577358, 'temp_loss': 0.1857753227633691, 'temp': 0.8709360327985551, 'alpha_loss': 12.2164936023846, 'alpha': 0.7880103385936447, 'critic_loss': 420.10683311774716, 'actor_loss': 60.14707770542792, 'time_step': 0.024196435833535, 'td_error': 62.338233187301086, 'init_value': -103.19319152832031, 'ave_value': -55.61985954506642} step=3420
2022-04-20 17:37.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420173616/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:37.53 [info     ] CQL_20220420173616: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.00035849370454487045, 'time_algorithm_update': 0.02373074369820935, 'temp_loss': 0.08716154477575369, 'temp': 0.8677960421606811, 'alpha_loss': 11.728702257948312, 'alpha': 0.7597488103902827, 'critic_loss': 463.9630296495226, 'actor_loss': 64.36609106454236, 'time_step': 0.02418786182738187, 'td_error': 65.10947553340692, 'init_value': -104.541015625, 'ave_value': -56.594940292167365} step=3762
2022-04-20 17:37.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420173616/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:38.02 [info     ] CQL_20220420173616: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.0003585188012374075, 'time_algorithm_update': 0.023842593382673655, 'temp_loss': 0.014243114195023363, 'temp': 0.8662099099298667, 'alpha_loss': 11.2111015682332, 'alpha': 0.7343270991280763, 'critic_loss': 500.58791275470577, 'actor_loss': 68.04941441719993, 'time_step': 0.024299012987237228, 'td_error': 90.04845109603094, 'init_value': -113.51863861083984, 'ave_value': -61.43941018506683} step=4104
2022-04-20 17:38.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420173616/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:38.11 [info     ] CQL_20220420173616: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.00035962375283938404, 'time_algorithm_update': 0.02400256946072941, 'temp_loss': 0.014524887462979868, 'temp': 0.8658622403939565, 'alpha_loss': 11.365409052162839, 'alpha': 0.7101655705281865, 'critic_loss': 538.8557501006545, 'actor_loss': 71.79249016165036, 'time_step': 0.024461205939800418, 'td_error': 120.6042658679634, 'init_value': -120.91646575927734, 'ave_value': -63.04260106630355} step=4446
2022-04-20 17:38.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420173616/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:38.20 [info     ] CQL_20220420173616: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.00035469922405934474, 'time_algorithm_update': 0.023964428762246293, 'temp_loss': -0.06860481339849923, 'temp': 0.8668550533509394, 'alpha_loss': 11.37207669960825, 'alpha': 0.6863176866233001, 'critic_loss': 568.4908759580021, 'actor_loss': 75.00367338615551, 'time_step': 0.02441768827494125, 'td_error': 86.81292404579582, 'init_value': -124.14534759521484, 'ave_value': -66.78156480083527} step=4788
2022-04-20 17:38.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420173616/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:38.28 [info     ] CQL_20220420173616: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.00035259946745041517, 'time_algorithm_update': 0.02401866550334016, 'temp_loss': -0.05381385762549457, 'temp': 0.8701277762128595, 'alpha_loss': 10.76599074943721, 'alpha': 0.6639173192936078, 'critic_loss': 595.6526484796178, 'actor_loss': 77.79384222644114, 'time_step': 0.024472993019728634, 'td_error': 78.76844932817231, 'init_value': -130.7366943359375, 'ave_value': -70.40215976506471} step=5130
2022-04-20 17:38.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420173616/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:38.37 [info     ] CQL_20220420173616: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.0003576606337787115, 'time_algorithm_update': 0.023993780041298672, 'temp_loss': -0.08008532066508169, 'temp': 0.8728144336165043, 'alpha_loss': 11.059714286648042, 'alpha': 0.6424759723986798, 'critic_loss': 615.3719967847679, 'actor_loss': 80.19577470299794, 'time_step': 0.024446545985707065, 'td_error': 143.57591567662018, 'init_value': -134.5481719970703, 'ave_value': -71.64875450243888} step=5472
2022-04-20 17:38.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420173616/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:38.46 [info     ] CQL_20220420173616: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.0003503435536434776, 'time_algorithm_update': 0.024098621474372015, 'temp_loss': -0.08183319862844826, 'temp': 0.8769490900095444, 'alpha_loss': 10.656579603228653, 'alpha': 0.6217819506313369, 'critic_loss': 638.1284891764323, 'actor_loss': 82.56180262426187, 'time_step': 0.024547249950163545, 'td_error': 101.31387636288109, 'init_value': -136.35409545898438, 'ave_value': -72.28401261185472} step=5814
2022-04-20 17:38.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420173616/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:38.55 [info     ] CQL_20220420173616: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.000354875598037452, 'time_algorithm_update': 0.02397999219727098, 'temp_loss': -0.10333564763137124, 'temp': 0.883079925824327, 'alpha_loss': 10.551270745651067, 'alpha': 0.6016884432201497, 'critic_loss': 649.8084475868627, 'actor_loss': 84.54456581428038, 'time_step': 0.024432205317313212, 'td_error': 81.1475986078877, 'init_value': -140.67276000976562, 'ave_value': -76.19044395436104} step=6156
2022-04-20 17:38.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420173616/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:39.03 [info     ] CQL_20220420173616: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.00035966139787818954, 'time_algorithm_update': 0.023884957993936817, 'temp_loss': -0.0953276145288296, 'temp': 0.8889541456922453, 'alpha_loss': 9.963416003344351, 'alpha': 0.5828583111539919, 'critic_loss': 662.5243449964022, 'actor_loss': 86.19951466789023, 'time_step': 0.02434379176089638, 'td_error': 128.98179567359347, 'init_value': -141.159912109375, 'ave_value': -77.34122645001086} step=6498
2022-04-20 17:39.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420173616/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:39.12 [info     ] CQL_20220420173616: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.000349806763275325, 'time_algorithm_update': 0.023853828335366055, 'temp_loss': -0.11217951672750781, 'temp': 0.896396446472023, 'alpha_loss': 9.722580250243695, 'alpha': 0.5647759514245373, 'critic_loss': 669.6367730034722, 'actor_loss': 87.48211942081564, 'time_step': 0.024298623988502903, 'td_error': 82.00427482973458, 'init_value': -145.21876525878906, 'ave_value': -78.01170145986048} step=6840
2022-04-20 17:39.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420173616/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:39.21 [info     ] CQL_20220420173616: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.00035395329458671706, 'time_algorithm_update': 0.024005779745983103, 'temp_loss': -0.07312118549618804, 'temp': 0.9024397526916704, 'alpha_loss': 9.46747673045822, 'alpha': 0.5474468586389084, 'critic_loss': 675.2439232541803, 'actor_loss': 88.8008185046458, 'time_step': 0.02446000617847108, 'td_error': 132.4720722388163, 'init_value': -145.7154541015625, 'ave_value': -79.51301283567406} step=7182
2022-04-20 17:39.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420173616/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:39.30 [info     ] CQL_20220420173616: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.00034912148414299504, 'time_algorithm_update': 0.024577563966226856, 'temp_loss': -0.08668339263500743, 'temp': 0.9088168925012065, 'alpha_loss': 9.584853430240475, 'alpha': 0.530172125986445, 'critic_loss': 682.8595233047217, 'actor_loss': 90.19508462203176, 'time_step': 0.025024291367558706, 'td_error': 121.1295404999821, 'init_value': -147.8713836669922, 'ave_value': -80.85935557495232} step=7524
2022-04-20 17:39.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420173616/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:39.39 [info     ] CQL_20220420173616: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.0003515607432315224, 'time_algorithm_update': 0.025449566673814206, 'temp_loss': -0.09476589155519566, 'temp': 0.9146790596825337, 'alpha_loss': 9.466354692191409, 'alpha': 0.5134443778392167, 'critic_loss': 689.7395012392635, 'actor_loss': 91.17134129931355, 'time_step': 0.025900019539727107, 'td_error': 136.62497738401208, 'init_value': -145.65640258789062, 'ave_value': -81.0180759840556} step=7866
2022-04-20 17:39.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420173616/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:39.48 [info     ] CQL_20220420173616: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.000357770780373735, 'time_algorithm_update': 0.025081901522407756, 'temp_loss': -0.062126957560883975, 'temp': 0.9209006878367642, 'alpha_loss': 9.332273842995626, 'alpha': 0.4971216985008173, 'critic_loss': 696.4328293828239, 'actor_loss': 92.16710232294093, 'time_step': 0.025537927248324568, 'td_error': 156.75102149620213, 'init_value': -148.47454833984375, 'ave_value': -81.63291925826589} step=8208
2022-04-20 17:39.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420173616/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:39.58 [info     ] CQL_20220420173616: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.00035786977288318657, 'time_algorithm_update': 0.02567210392645228, 'temp_loss': -0.029534098523402073, 'temp': 0.9263811989834434, 'alpha_loss': 9.044302630842777, 'alpha': 0.4813469236182888, 'critic_loss': 702.3104215923108, 'actor_loss': 93.12887912326389, 'time_step': 0.026127810366669592, 'td_error': 130.83391492102174, 'init_value': -151.17294311523438, 'ave_value': -85.08238963941398} step=8550
2022-04-20 17:39.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420173616/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:40.07 [info     ] CQL_20220420173616: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.00036161336285329005, 'time_algorithm_update': 0.025407525531032628, 'temp_loss': 0.025644640590755913, 'temp': 0.9255330180912688, 'alpha_loss': 8.619053560390807, 'alpha': 0.4665353145689992, 'critic_loss': 715.9690874780131, 'actor_loss': 94.26603663037395, 'time_step': 0.025866191289578264, 'td_error': 137.41171154966588, 'init_value': -151.064208984375, 'ave_value': -86.6858524764836} step=8892
2022-04-20 17:40.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420173616/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:40.16 [info     ] CQL_20220420173616: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.0003523415292215626, 'time_algorithm_update': 0.025205198087190326, 'temp_loss': -0.05610978355554984, 'temp': 0.9263833749015429, 'alpha_loss': 9.112568350563272, 'alpha': 0.4516298096430929, 'critic_loss': 717.2678758052357, 'actor_loss': 94.66525687948305, 'time_step': 0.025654457465947023, 'td_error': 217.75857731340415, 'init_value': -147.06698608398438, 'ave_value': -83.87945400480852} step=9234
2022-04-20 17:40.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420173616/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:40.25 [info     ] CQL_20220420173616: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.00036145929704632673, 'time_algorithm_update': 0.02559312951495076, 'temp_loss': -0.0009874464639010485, 'temp': 0.928860016036452, 'alpha_loss': 8.9100271735275, 'alpha': 0.43707589369419725, 'critic_loss': 720.7347272906387, 'actor_loss': 95.38458327800907, 'time_step': 0.026051699766638684, 'td_error': 181.03566842926722, 'init_value': -147.00051879882812, 'ave_value': -84.09052686455483} step=9576
2022-04-20 17:40.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420173616/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:40.35 [info     ] CQL_20220420173616: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.0003606617799279285, 'time_algorithm_update': 0.025343364442301074, 'temp_loss': 0.008506405347010545, 'temp': 0.927828594257957, 'alpha_loss': 8.327468951543173, 'alpha': 0.4232046936164823, 'critic_loss': 723.0620936343545, 'actor_loss': 95.91786776090923, 'time_step': 0.02580101099627757, 'td_error': 195.35811118475655, 'init_value': -147.69259643554688, 'ave_value': -86.9916593001011} step=9918
2022-04-20 17:40.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420173616/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:40.44 [info     ] CQL_20220420173616: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.0003583605526483547, 'time_algorithm_update': 0.025506260102255304, 'temp_loss': 0.012630099615855523, 'temp': 0.9282783611476073, 'alpha_loss': 8.521815199601022, 'alpha': 0.4099165170687681, 'critic_loss': 725.6558241816292, 'actor_loss': 96.28984462448031, 'time_step': 0.02596196584534227, 'td_error': 239.78151852540284, 'init_value': -148.3251495361328, 'ave_value': -86.79405873335462} step=10260
2022-04-20 17:40.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420173616/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:40.53 [info     ] CQL_20220420173616: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.0003608681305110106, 'time_algorithm_update': 0.025045864066185308, 'temp_loss': 0.024477860558102704, 'temp': 0.9253311059628314, 'alpha_loss': 8.566359992612872, 'alpha': 0.3967724813703905, 'critic_loss': 727.9439604463633, 'actor_loss': 96.77339071976512, 'time_step': 0.02550537683810407, 'td_error': 164.0782590836668, 'init_value': -146.2146759033203, 'ave_value': -86.24726278015353} step=10602
2022-04-20 17:40.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420173616/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:41.02 [info     ] CQL_20220420173616: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.00035722213878966214, 'time_algorithm_update': 0.02557771875147234, 'temp_loss': 0.00023732607781189923, 'temp': 0.92428688469686, 'alpha_loss': 8.566939765249776, 'alpha': 0.3838915554752127, 'critic_loss': 729.8905516507333, 'actor_loss': 97.10690588700145, 'time_step': 0.026032085307160314, 'td_error': 327.6853030367947, 'init_value': -145.2931671142578, 'ave_value': -86.70092771531695} step=10944
2022-04-20 17:41.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420173616/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:41.12 [info     ] CQL_20220420173616: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.00035896426752993936, 'time_algorithm_update': 0.02529519203810664, 'temp_loss': 0.05471874194012748, 'temp': 0.9232238907563058, 'alpha_loss': 8.231709155422902, 'alpha': 0.37163838874875454, 'critic_loss': 732.4379406309964, 'actor_loss': 97.44056462962725, 'time_step': 0.025751919077153792, 'td_error': 232.96857153724707, 'init_value': -148.39662170410156, 'ave_value': -87.07592853109243} step=11286
2022-04-20 17:41.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420173616/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:41.21 [info     ] CQL_20220420173616: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.0003572876690423977, 'time_algorithm_update': 0.02529973663084688, 'temp_loss': 0.0442644608866053, 'temp': 0.9169458393116443, 'alpha_loss': 7.8329932034364225, 'alpha': 0.35998713830758255, 'critic_loss': 731.6738013551947, 'actor_loss': 97.62960088183308, 'time_step': 0.025757013008608456, 'td_error': 158.83425176130305, 'init_value': -142.15699768066406, 'ave_value': -85.71382302378346} step=11628
2022-04-20 17:41.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420173616/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:41.30 [info     ] CQL_20220420173616: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.000358615005225466, 'time_algorithm_update': 0.025303477432295593, 'temp_loss': 0.030712075591392336, 'temp': 0.9145077446050811, 'alpha_loss': 7.983531632618598, 'alpha': 0.34874249097199467, 'critic_loss': 729.2676057871322, 'actor_loss': 97.77942128767047, 'time_step': 0.025760960857770597, 'td_error': 239.90709817825729, 'init_value': -145.33338928222656, 'ave_value': -86.82443839533539} step=11970
2022-04-20 17:41.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420173616/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:41.39 [info     ] CQL_20220420173616: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.0003609427234582734, 'time_algorithm_update': 0.025647305605704326, 'temp_loss': 0.04493883898078698, 'temp': 0.9120523884282474, 'alpha_loss': 7.111866942623205, 'alpha': 0.3381161540746689, 'critic_loss': 728.3976069221719, 'actor_loss': 97.82714990983929, 'time_step': 0.026105620010554442, 'td_error': 172.2243875535106, 'init_value': -141.5232391357422, 'ave_value': -86.96808998760307} step=12312
2022-04-20 17:41.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420173616/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:41.49 [info     ] CQL_20220420173616: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.00035937418017471047, 'time_algorithm_update': 0.025083049696091323, 'temp_loss': 0.06290106996567103, 'temp': 0.9078986470462286, 'alpha_loss': 7.019178122107745, 'alpha': 0.3282966980634377, 'critic_loss': 725.2366979052449, 'actor_loss': 97.72783225879334, 'time_step': 0.02554364999135335, 'td_error': 213.97552334614073, 'init_value': -144.0912322998047, 'ave_value': -88.0356877235652} step=12654
2022-04-20 17:41.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420173616/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:41.58 [info     ] CQL_20220420173616: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.00035642462167126393, 'time_algorithm_update': 0.025616281213816147, 'temp_loss': 0.0736242975462337, 'temp': 0.9017735853878378, 'alpha_loss': 6.8732759813119095, 'alpha': 0.31826817701783094, 'critic_loss': 722.650651184439, 'actor_loss': 97.74706696627433, 'time_step': 0.02606969270092702, 'td_error': 140.7217692597345, 'init_value': -143.1881866455078, 'ave_value': -87.9635822301066} step=12996
2022-04-20 17:41.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420173616/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:42.07 [info     ] CQL_20220420173616: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.00035169040947629693, 'time_algorithm_update': 0.025145647818582098, 'temp_loss': 0.09822518877924709, 'temp': 0.8940534616074367, 'alpha_loss': 5.988843731712877, 'alpha': 0.3091530218633295, 'critic_loss': 718.4387069612916, 'actor_loss': 97.51734625386914, 'time_step': 0.02559703205064026, 'td_error': 117.27781455278792, 'init_value': -139.7736053466797, 'ave_value': -87.29338663653176} step=13338
2022-04-20 17:42.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420173616/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:42.16 [info     ] CQL_20220420173616: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.0003635674192194353, 'time_algorithm_update': 0.02552877810963413, 'temp_loss': 0.06412468797783231, 'temp': 0.886484532328377, 'alpha_loss': 5.6346299418231895, 'alpha': 0.300838305587657, 'critic_loss': 708.870581732856, 'actor_loss': 97.17640045790644, 'time_step': 0.025990155705234462, 'td_error': 102.38689105183667, 'init_value': -141.8545379638672, 'ave_value': -88.17257154284599} step=13680
2022-04-20 17:42.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420173616/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:42.26 [info     ] CQL_20220420173616: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.00035832360473989745, 'time_algorithm_update': 0.02544246152130484, 'temp_loss': 0.09377700948205434, 'temp': 0.8795886703750544, 'alpha_loss': 5.417965595485174, 'alpha': 0.29262627757083604, 'critic_loss': 701.3376959192823, 'actor_loss': 96.93958235623543, 'time_step': 0.025898489338612697, 'td_error': 114.32263065474847, 'init_value': -138.58786010742188, 'ave_value': -86.85281180639323} step=14022
2022-04-20 17:42.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420173616/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:42.34 [info     ] CQL_20220420173616: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.00035306027061060856, 'time_algorithm_update': 0.023859461845710264, 'temp_loss': 0.09409146643194713, 'temp': 0.8729074979037569, 'alpha_loss': 5.180328542726082, 'alpha': 0.28440617887597336, 'critic_loss': 695.3451225771541, 'actor_loss': 96.64159342001753, 'time_step': 0.02431034205252664, 'td_error': 87.83576030921225, 'init_value': -135.77084350585938, 'ave_value': -85.73468314550716} step=14364
2022-04-20 17:42.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420173616/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:42.43 [info     ] CQL_20220420173616: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.0003566992910284745, 'time_algorithm_update': 0.024026866544756973, 'temp_loss': 0.12273421623793088, 'temp': 0.8636629994501147, 'alpha_loss': 4.7522338775166295, 'alpha': 0.27667963487363, 'critic_loss': 691.2229064584476, 'actor_loss': 96.50183906332094, 'time_step': 0.024480765325981275, 'td_error': 125.58948538025938, 'init_value': -136.78012084960938, 'ave_value': -86.658511808672} step=14706
2022-04-20 17:42.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420173616/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:42.52 [info     ] CQL_20220420173616: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.0003552144033867016, 'time_algorithm_update': 0.02423104138402214, 'temp_loss': 0.09048722430039742, 'temp': 0.8536179389869958, 'alpha_loss': 4.800038608891225, 'alpha': 0.269069124383536, 'critic_loss': 681.1954900730423, 'actor_loss': 96.1552023859749, 'time_step': 0.024684654341803655, 'td_error': 88.07293775184532, 'init_value': -132.66281127929688, 'ave_value': -84.63952155348383} step=15048
2022-04-20 17:42.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420173616/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:43.01 [info     ] CQL_20220420173616: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.00036296509859854717, 'time_algorithm_update': 0.0240185692993521, 'temp_loss': 0.12967762908195718, 'temp': 0.8447343910995283, 'alpha_loss': 4.408753917928328, 'alpha': 0.261315278926788, 'critic_loss': 673.6213759037487, 'actor_loss': 95.77933986284579, 'time_step': 0.024479115218446965, 'td_error': 107.74570069198315, 'init_value': -137.29953002929688, 'ave_value': -86.7350536980349} step=15390
2022-04-20 17:43.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420173616/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:43.09 [info     ] CQL_20220420173616: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.0003564748150563379, 'time_algorithm_update': 0.024014543371590955, 'temp_loss': 0.1085127089227065, 'temp': 0.8357998531812812, 'alpha_loss': 4.116257601314121, 'alpha': 0.25430517752616727, 'critic_loss': 661.2959689313208, 'actor_loss': 95.30015251650448, 'time_step': 0.024470219138072947, 'td_error': 99.81529629937553, 'init_value': -132.54103088378906, 'ave_value': -85.2936584038368} step=15732
2022-04-20 17:43.09 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420173616/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:43.18 [info     ] CQL_20220420173616: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.00036111630891498764, 'time_algorithm_update': 0.023776770335191873, 'temp_loss': 0.11599905843361777, 'temp': 0.8273157520949492, 'alpha_loss': 3.7580299739949186, 'alpha': 0.24749539997319728, 'critic_loss': 653.4430260016904, 'actor_loss': 95.12073324995431, 'time_step': 0.024237584649470813, 'td_error': 96.36690807056536, 'init_value': -133.0487060546875, 'ave_value': -84.41565863398885} step=16074
2022-04-20 17:43.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420173616/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:43.27 [info     ] CQL_20220420173616: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.00035952824598167374, 'time_algorithm_update': 0.02418704897339581, 'temp_loss': 0.1441897912528257, 'temp': 0.8164175778453113, 'alpha_loss': 3.4357381665218645, 'alpha': 0.24097232594650392, 'critic_loss': 647.5790876422012, 'actor_loss': 94.78275310226351, 'time_step': 0.024646755547551382, 'td_error': 68.46628976191059, 'init_value': -131.47048950195312, 'ave_value': -84.29189038469885} step=16416
2022-04-20 17:43.27 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420173616/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:43.36 [info     ] CQL_20220420173616: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.00036566926721940963, 'time_algorithm_update': 0.02426456986812123, 'temp_loss': 0.1321095653907641, 'temp': 0.8060951192825161, 'alpha_loss': 3.0990162554540133, 'alpha': 0.2347913587476775, 'critic_loss': 636.1003603572734, 'actor_loss': 94.23414701049091, 'time_step': 0.024728945821349383, 'td_error': 58.318411807524534, 'init_value': -126.09513092041016, 'ave_value': -82.64620076723263} step=16758
2022-04-20 17:43.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420173616/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:43.45 [info     ] CQL_20220420173616: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.00035974644778067607, 'time_algorithm_update': 0.02428272174812897, 'temp_loss': 0.1359527850011636, 'temp': 0.7957826823170422, 'alpha_loss': 2.8944096993981745, 'alpha': 0.22902956583172257, 'critic_loss': 621.4805162217882, 'actor_loss': 93.48622428046332, 'time_step': 0.02474207418006763, 'td_error': 58.82453927589287, 'init_value': -128.0666961669922, 'ave_value': -83.08077276124577} step=17100
2022-04-20 17:43.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420173616/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.5191

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 17:43.46 [info     ] FQE_20220420174345: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00015367800930896438, 'time_algorithm_update': 0.0035800890750195607, 'loss': 0.007152264013448842, 'time_step': 0.0038082872528627694, 'init_value': -0.09811170399188995, 'ave_value': -0.025470742528737933, 'soft_opc': nan} step=166




2022-04-20 17:43.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174345/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:43.46 [info     ] FQE_20220420174345: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00015581372272537416, 'time_algorithm_update': 0.0034316634557333336, 'loss': 0.006131101670360799, 'time_step': 0.003659729497978486, 'init_value': -0.2174728512763977, 'ave_value': -0.09361453172121499, 'soft_opc': nan} step=332




2022-04-20 17:43.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174345/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:43.47 [info     ] FQE_20220420174345: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00015288806823362787, 'time_algorithm_update': 0.0035134482096476727, 'loss': 0.005913785270258156, 'time_step': 0.0037373160741415367, 'init_value': -0.27371838688850403, 'ave_value': -0.12793929768675888, 'soft_opc': nan} step=498




2022-04-20 17:43.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174345/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:43.48 [info     ] FQE_20220420174345: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00015452970941382717, 'time_algorithm_update': 0.0034154481198414265, 'loss': 0.0059359446588724135, 'time_step': 0.003637767699827631, 'init_value': -0.351324200630188, 'ave_value': -0.1695814919905519, 'soft_opc': nan} step=664




2022-04-20 17:43.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174345/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:43.48 [info     ] FQE_20220420174345: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00015714513250144132, 'time_algorithm_update': 0.003530219376805317, 'loss': 0.005494809633749257, 'time_step': 0.0037562760962061136, 'init_value': -0.4221716523170471, 'ave_value': -0.20771250996596038, 'soft_opc': nan} step=830




2022-04-20 17:43.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174345/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:43.49 [info     ] FQE_20220420174345: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00014936636729412768, 'time_algorithm_update': 0.0033529092030352855, 'loss': 0.005361954409472016, 'time_step': 0.0035728848124124916, 'init_value': -0.4419287443161011, 'ave_value': -0.19833157703506987, 'soft_opc': nan} step=996




2022-04-20 17:43.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174345/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:43.50 [info     ] FQE_20220420174345: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00015276167766157403, 'time_algorithm_update': 0.0035519700452505826, 'loss': 0.005267753786160554, 'time_step': 0.0037754558655152836, 'init_value': -0.4634779691696167, 'ave_value': -0.18085597971252895, 'soft_opc': nan} step=1162




2022-04-20 17:43.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174345/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:43.50 [info     ] FQE_20220420174345: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00015516022601759578, 'time_algorithm_update': 0.003379706876823701, 'loss': 0.0050507366477725016, 'time_step': 0.003604567194559488, 'init_value': -0.5325148105621338, 'ave_value': -0.20626257463433856, 'soft_opc': nan} step=1328




2022-04-20 17:43.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174345/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:43.51 [info     ] FQE_20220420174345: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00015127227967043957, 'time_algorithm_update': 0.003515356994536986, 'loss': 0.00483800605478057, 'time_step': 0.003739283745547375, 'init_value': -0.5624666213989258, 'ave_value': -0.19288499893469585, 'soft_opc': nan} step=1494




2022-04-20 17:43.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174345/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:43.52 [info     ] FQE_20220420174345: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.0001517017203641225, 'time_algorithm_update': 0.003351329320884613, 'loss': 0.004861284677831299, 'time_step': 0.003568858985441277, 'init_value': -0.6368504762649536, 'ave_value': -0.2190129582061189, 'soft_opc': nan} step=1660




2022-04-20 17:43.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174345/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:43.52 [info     ] FQE_20220420174345: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00015719971024846457, 'time_algorithm_update': 0.0035094209464199572, 'loss': 0.004736133097917829, 'time_step': 0.003740412643156856, 'init_value': -0.7189087867736816, 'ave_value': -0.24507042733440595, 'soft_opc': nan} step=1826




2022-04-20 17:43.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174345/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:43.53 [info     ] FQE_20220420174345: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00015946899551943125, 'time_algorithm_update': 0.0034842735313507446, 'loss': 0.0047414041725997195, 'time_step': 0.003716118364449007, 'init_value': -0.7141082286834717, 'ave_value': -0.21247704310325888, 'soft_opc': nan} step=1992




2022-04-20 17:43.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174345/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:43.54 [info     ] FQE_20220420174345: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00015419937041868647, 'time_algorithm_update': 0.003457629537007895, 'loss': 0.004863661605898425, 'time_step': 0.0036844331097890095, 'init_value': -0.8458200693130493, 'ave_value': -0.29669627394812464, 'soft_opc': nan} step=2158




2022-04-20 17:43.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174345/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:43.54 [info     ] FQE_20220420174345: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00015338070421333773, 'time_algorithm_update': 0.0034293295389198394, 'loss': 0.005054230456021671, 'time_step': 0.003655689308442265, 'init_value': -0.877375602722168, 'ave_value': -0.2946984788050523, 'soft_opc': nan} step=2324




2022-04-20 17:43.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174345/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:43.55 [info     ] FQE_20220420174345: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00015497782144201807, 'time_algorithm_update': 0.003485574779740299, 'loss': 0.005363064334189227, 'time_step': 0.0037126426237175264, 'init_value': -0.9181802272796631, 'ave_value': -0.29313605786049485, 'soft_opc': nan} step=2490




2022-04-20 17:43.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174345/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:43.56 [info     ] FQE_20220420174345: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00015315664819924227, 'time_algorithm_update': 0.0033401451915143483, 'loss': 0.005870221166574138, 'time_step': 0.003561898886439312, 'init_value': -1.0266050100326538, 'ave_value': -0.35378136486696027, 'soft_opc': nan} step=2656




2022-04-20 17:43.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174345/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:43.56 [info     ] FQE_20220420174345: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.0001555753041462726, 'time_algorithm_update': 0.003495281001171434, 'loss': 0.005913996759729168, 'time_step': 0.0037206526262214385, 'init_value': -1.0965938568115234, 'ave_value': -0.3712945948673731, 'soft_opc': nan} step=2822




2022-04-20 17:43.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174345/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:43.57 [info     ] FQE_20220420174345: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00015798821506730044, 'time_algorithm_update': 0.003462423761206937, 'loss': 0.006048006776310728, 'time_step': 0.003691417625151485, 'init_value': -1.1379454135894775, 'ave_value': -0.37911843111579147, 'soft_opc': nan} step=2988




2022-04-20 17:43.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174345/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:43.58 [info     ] FQE_20220420174345: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00015421086047069137, 'time_algorithm_update': 0.0035209598311458728, 'loss': 0.007048741947292878, 'time_step': 0.0037481842270816663, 'init_value': -1.2373418807983398, 'ave_value': -0.43021244922954893, 'soft_opc': nan} step=3154




2022-04-20 17:43.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174345/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:43.58 [info     ] FQE_20220420174345: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.0001607027398534568, 'time_algorithm_update': 0.0034948371979127446, 'loss': 0.006713752558381936, 'time_step': 0.0037277865122599774, 'init_value': -1.3413019180297852, 'ave_value': -0.4756559491333728, 'soft_opc': nan} step=3320




2022-04-20 17:43.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174345/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:43.59 [info     ] FQE_20220420174345: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00015544747731771814, 'time_algorithm_update': 0.003500964268144355, 'loss': 0.0068364204756118895, 'time_step': 0.0037304651306336186, 'init_value': -1.4364246129989624, 'ave_value': -0.52645577880814, 'soft_opc': nan} step=3486




2022-04-20 17:43.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174345/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:44.00 [info     ] FQE_20220420174345: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00015252900410847492, 'time_algorithm_update': 0.0034382099128631226, 'loss': 0.009396030061249902, 'time_step': 0.003661465932087726, 'init_value': -1.5400004386901855, 'ave_value': -0.5936219788194206, 'soft_opc': nan} step=3652




2022-04-20 17:44.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174345/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:44.01 [info     ] FQE_20220420174345: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00015866756439208984, 'time_algorithm_update': 0.0035160507064267814, 'loss': 0.009720823549418655, 'time_step': 0.0037481799183121645, 'init_value': -1.5996357202529907, 'ave_value': -0.60433792971234, 'soft_opc': nan} step=3818




2022-04-20 17:44.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174345/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:44.01 [info     ] FQE_20220420174345: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00015380727239401945, 'time_algorithm_update': 0.0035322014107761614, 'loss': 0.009999945667903331, 'time_step': 0.0037557690976613976, 'init_value': -1.6468358039855957, 'ave_value': -0.6106316874995157, 'soft_opc': nan} step=3984




2022-04-20 17:44.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174345/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:44.02 [info     ] FQE_20220420174345: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00015440331884177336, 'time_algorithm_update': 0.003508312156401485, 'loss': 0.011367883493265429, 'time_step': 0.0037311789501144224, 'init_value': -1.7868764400482178, 'ave_value': -0.6892193680729818, 'soft_opc': nan} step=4150




2022-04-20 17:44.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174345/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:44.03 [info     ] FQE_20220420174345: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00015465609998588102, 'time_algorithm_update': 0.0035573373357933686, 'loss': 0.011303828490606273, 'time_step': 0.0037845847118331724, 'init_value': -1.8610026836395264, 'ave_value': -0.7301045243695445, 'soft_opc': nan} step=4316




2022-04-20 17:44.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174345/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:44.03 [info     ] FQE_20220420174345: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00015946468674992942, 'time_algorithm_update': 0.0034750398383083113, 'loss': 0.01247727475339457, 'time_step': 0.003707174795219697, 'init_value': -1.9454303979873657, 'ave_value': -0.7995426595974546, 'soft_opc': nan} step=4482




2022-04-20 17:44.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174345/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:44.04 [info     ] FQE_20220420174345: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00015370817069547722, 'time_algorithm_update': 0.0035277418343417615, 'loss': 0.013267578705808664, 'time_step': 0.0037530689354402474, 'init_value': -2.006171464920044, 'ave_value': -0.8229537717427488, 'soft_opc': nan} step=4648




2022-04-20 17:44.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174345/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:44.05 [info     ] FQE_20220420174345: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.0001547652554799275, 'time_algorithm_update': 0.00337205162967544, 'loss': 0.014274043301737541, 'time_step': 0.0035953105214130447, 'init_value': -2.096853256225586, 'ave_value': -0.8561189394335154, 'soft_opc': nan} step=4814




2022-04-20 17:44.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174345/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:44.05 [info     ] FQE_20220420174345: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00015173044549413473, 'time_algorithm_update': 0.0034361259046807348, 'loss': 0.014876538548192167, 'time_step': 0.003662151026438518, 'init_value': -2.2313151359558105, 'ave_value': -0.9338631061694442, 'soft_opc': nan} step=4980




2022-04-20 17:44.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174345/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:44.06 [info     ] FQE_20220420174345: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.0001573404633855245, 'time_algorithm_update': 0.0034101253532501587, 'loss': 0.01659102092429726, 'time_step': 0.003640009696225086, 'init_value': -2.3605430126190186, 'ave_value': -1.0469161925157784, 'soft_opc': nan} step=5146




2022-04-20 17:44.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174345/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:44.07 [info     ] FQE_20220420174345: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00015524640140763247, 'time_algorithm_update': 0.0034666549728577397, 'loss': 0.016597253013772507, 'time_step': 0.0036906047039721385, 'init_value': -2.456416606903076, 'ave_value': -1.079574038935322, 'soft_opc': nan} step=5312




2022-04-20 17:44.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174345/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:44.07 [info     ] FQE_20220420174345: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00015365072043545274, 'time_algorithm_update': 0.003474729606904179, 'loss': 0.01756245740450896, 'time_step': 0.0037013306675187075, 'init_value': -2.5129857063293457, 'ave_value': -1.1275838715808786, 'soft_opc': nan} step=5478




2022-04-20 17:44.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174345/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:44.08 [info     ] FQE_20220420174345: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.0001568880425878318, 'time_algorithm_update': 0.003495227859680911, 'loss': 0.019504395722417182, 'time_step': 0.0037224838532597185, 'init_value': -2.632577896118164, 'ave_value': -1.2455895855582344, 'soft_opc': nan} step=5644




2022-04-20 17:44.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174345/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:44.09 [info     ] FQE_20220420174345: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00015869341700910087, 'time_algorithm_update': 0.0033912931580141366, 'loss': 0.019628575970755374, 'time_step': 0.003625368497457849, 'init_value': -2.758789300918579, 'ave_value': -1.2799786028141777, 'soft_opc': nan} step=5810




2022-04-20 17:44.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174345/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:44.09 [info     ] FQE_20220420174345: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00015798677881079983, 'time_algorithm_update': 0.003566138715629118, 'loss': 0.021101647285262055, 'time_step': 0.0037948237844260342, 'init_value': -2.792041301727295, 'ave_value': -1.294731593344713, 'soft_opc': nan} step=5976




2022-04-20 17:44.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174345/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:44.10 [info     ] FQE_20220420174345: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00015763776848115116, 'time_algorithm_update': 0.0034212606498994023, 'loss': 0.022521768512866313, 'time_step': 0.003655364714473127, 'init_value': -2.9121344089508057, 'ave_value': -1.3747763216986346, 'soft_opc': nan} step=6142




2022-04-20 17:44.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174345/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:44.11 [info     ] FQE_20220420174345: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.0001571322061929358, 'time_algorithm_update': 0.0035745839038527154, 'loss': 0.023190442878747337, 'time_step': 0.003801814044814512, 'init_value': -2.994621753692627, 'ave_value': -1.4308644930936967, 'soft_opc': nan} step=6308




2022-04-20 17:44.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174345/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:44.11 [info     ] FQE_20220420174345: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00015615124300301793, 'time_algorithm_update': 0.003456061144909227, 'loss': 0.02454388194509883, 'time_step': 0.003682563103825213, 'init_value': -2.9870851039886475, 'ave_value': -1.4049040680423983, 'soft_opc': nan} step=6474




2022-04-20 17:44.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174345/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:44.12 [info     ] FQE_20220420174345: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00015930382602186087, 'time_algorithm_update': 0.0036187286836555205, 'loss': 0.024616707184391254, 'time_step': 0.0038528945072587714, 'init_value': -3.078016519546509, 'ave_value': -1.48102099607019, 'soft_opc': nan} step=6640




2022-04-20 17:44.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174345/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:44.13 [info     ] FQE_20220420174345: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00015582664903387967, 'time_algorithm_update': 0.0034505200673298664, 'loss': 0.02646871600215918, 'time_step': 0.0036775778575115895, 'init_value': -3.15488600730896, 'ave_value': -1.560797153578165, 'soft_opc': nan} step=6806




2022-04-20 17:44.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174345/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:44.14 [info     ] FQE_20220420174345: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.0001599142350346209, 'time_algorithm_update': 0.0035251824252576715, 'loss': 0.027026716221393233, 'time_step': 0.0037578459245612822, 'init_value': -3.2133984565734863, 'ave_value': -1.5995153716807295, 'soft_opc': nan} step=6972




2022-04-20 17:44.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174345/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:44.14 [info     ] FQE_20220420174345: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00015557386788977198, 'time_algorithm_update': 0.0034967660903930664, 'loss': 0.027758811625753003, 'time_step': 0.0037215373602258153, 'init_value': -3.287616729736328, 'ave_value': -1.6648381298483426, 'soft_opc': nan} step=7138




2022-04-20 17:44.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174345/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:44.15 [info     ] FQE_20220420174345: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.0001569483653608575, 'time_algorithm_update': 0.003544701151100986, 'loss': 0.02860722004811852, 'time_step': 0.0037746386355664357, 'init_value': -3.39560866355896, 'ave_value': -1.7577823559485108, 'soft_opc': nan} step=7304




2022-04-20 17:44.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174345/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:44.16 [info     ] FQE_20220420174345: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00015524352889463125, 'time_algorithm_update': 0.003471332860280232, 'loss': 0.029421446109976036, 'time_step': 0.0036949149097304746, 'init_value': -3.395672559738159, 'ave_value': -1.7182193334189217, 'soft_opc': nan} step=7470




2022-04-20 17:44.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174345/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:44.16 [info     ] FQE_20220420174345: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.0001608621643250247, 'time_algorithm_update': 0.0035488806575177663, 'loss': 0.030465637999923384, 'time_step': 0.0037829172180359623, 'init_value': -3.476449966430664, 'ave_value': -1.7974165907708575, 'soft_opc': nan} step=7636




2022-04-20 17:44.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174345/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:44.17 [info     ] FQE_20220420174345: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.0001613576728177358, 'time_algorithm_update': 0.0034719734306795052, 'loss': 0.03077089093577857, 'time_step': 0.003706463848251894, 'init_value': -3.467499017715454, 'ave_value': -1.7694991698840985, 'soft_opc': nan} step=7802




2022-04-20 17:44.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174345/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:44.18 [info     ] FQE_20220420174345: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00015605357756097633, 'time_algorithm_update': 0.003443387617547828, 'loss': 0.03207989573529192, 'time_step': 0.003669109689183982, 'init_value': -3.5560479164123535, 'ave_value': -1.8397991428336313, 'soft_opc': nan} step=7968




2022-04-20 17:44.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174345/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:44.18 [info     ] FQE_20220420174345: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00015514442719608904, 'time_algorithm_update': 0.003390747380543904, 'loss': 0.03329222970249423, 'time_step': 0.003615803029163774, 'init_value': -3.6541154384613037, 'ave_value': -1.9588211857172586, 'soft_opc': nan} step=8134




2022-04-20 17:44.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174345/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:44.19 [info     ] FQE_20220420174345: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00015860006033656108, 'time_algorithm_update': 0.0035335773445037476, 'loss': 0.03429010998477307, 'time_step': 0.0037647902247417405, 'init_value': -3.6153945922851562, 'ave_value': -1.881963542947831, 'soft_opc': nan} step=8300




2022-04-20 17:44.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174345/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

2022-04-20 17:44.19 [info     ] Directory is created at d3rlpy_logs/FQE_20220420174419
2022-04-20 17:44.19 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 17:44.19 [debug    ] Building models...
2022-04-20 17:44.19 [debug    ] Models have been built.
2022-04-20 17:44.19 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420174419/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 17:44.21 [info     ] FQE_20220420174419: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00016247394473053688, 'time_algorithm_update': 0.0034992015639016797, 'loss': 0.02387320705916906, 'time_step': 0.0037351645702539487, 'init_value': -0.9530223608016968, 'ave_value': -0.9707523973004238, 'soft_opc': nan} step=344




2022-04-20 17:44.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174419/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:44.22 [info     ] FQE_20220420174419: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00016054095223892566, 'time_algorithm_update': 0.0034979276878889217, 'loss': 0.02158178183044372, 'time_step': 0.0037307482819224514, 'init_value': -1.8007934093475342, 'ave_value': -1.808415586306705, 'soft_opc': nan} step=688




2022-04-20 17:44.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174419/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:44.24 [info     ] FQE_20220420174419: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00015957618868628213, 'time_algorithm_update': 0.0035012953503187312, 'loss': 0.025254560994569127, 'time_step': 0.003734908131665962, 'init_value': -2.864675998687744, 'ave_value': -2.8701579394372736, 'soft_opc': nan} step=1032




2022-04-20 17:44.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174419/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:44.25 [info     ] FQE_20220420174419: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.0001621911692064862, 'time_algorithm_update': 0.0035182937633159547, 'loss': 0.028919966883285967, 'time_step': 0.0037542207296504533, 'init_value': -3.597062349319458, 'ave_value': -3.5632790817736506, 'soft_opc': nan} step=1376




2022-04-20 17:44.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174419/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:44.26 [info     ] FQE_20220420174419: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00016220918921537177, 'time_algorithm_update': 0.0034563652304715887, 'loss': 0.03633200138168366, 'time_step': 0.0036920468474543372, 'init_value': -4.523554801940918, 'ave_value': -4.467640350316022, 'soft_opc': nan} step=1720




2022-04-20 17:44.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174419/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:44.28 [info     ] FQE_20220420174419: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.0001619749290998592, 'time_algorithm_update': 0.0034847987252612446, 'loss': 0.04403559675949171, 'time_step': 0.003719533598700235, 'init_value': -5.121972560882568, 'ave_value': -5.009538776930925, 'soft_opc': nan} step=2064




2022-04-20 17:44.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174419/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:44.29 [info     ] FQE_20220420174419: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00016281979028568713, 'time_algorithm_update': 0.003460509832515273, 'loss': 0.055475679172072995, 'time_step': 0.003698222858961238, 'init_value': -6.060370445251465, 'ave_value': -5.924797067959029, 'soft_opc': nan} step=2408




2022-04-20 17:44.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174419/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:44.30 [info     ] FQE_20220420174419: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.0001619402752366177, 'time_algorithm_update': 0.003519275160722954, 'loss': 0.06998915566391377, 'time_step': 0.0037543558797170948, 'init_value': -6.611741065979004, 'ave_value': -6.496337490057078, 'soft_opc': nan} step=2752




2022-04-20 17:44.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174419/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:44.32 [info     ] FQE_20220420174419: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.0001589655876159668, 'time_algorithm_update': 0.0034211174000141234, 'loss': 0.08544754785442249, 'time_step': 0.0036540218563966974, 'init_value': -7.121566295623779, 'ave_value': -7.03113397817182, 'soft_opc': nan} step=3096




2022-04-20 17:44.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174419/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:44.33 [info     ] FQE_20220420174419: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00016332920207533727, 'time_algorithm_update': 0.0034186167772426164, 'loss': 0.10703871124640627, 'time_step': 0.0036518636137940165, 'init_value': -7.702872276306152, 'ave_value': -7.719377730743171, 'soft_opc': nan} step=3440




2022-04-20 17:44.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174419/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:44.35 [info     ] FQE_20220420174419: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00016147036885106288, 'time_algorithm_update': 0.003601062436436498, 'loss': 0.1281388215828947, 'time_step': 0.0038386784320653873, 'init_value': -7.991273880004883, 'ave_value': -8.121194227144693, 'soft_opc': nan} step=3784




2022-04-20 17:44.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174419/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:44.36 [info     ] FQE_20220420174419: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00016503347906955454, 'time_algorithm_update': 0.004371201576188553, 'loss': 0.1460433633792279, 'time_step': 0.0046071735925452655, 'init_value': -8.361703872680664, 'ave_value': -8.701798968651618, 'soft_opc': nan} step=4128




2022-04-20 17:44.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174419/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:44.38 [info     ] FQE_20220420174419: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00016303048577419546, 'time_algorithm_update': 0.0042393789734951285, 'loss': 0.16514951710827475, 'time_step': 0.004474429890166881, 'init_value': -8.640376091003418, 'ave_value': -9.152653781480055, 'soft_opc': nan} step=4472




2022-04-20 17:44.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174419/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:44.40 [info     ] FQE_20220420174419: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00016442357107650403, 'time_algorithm_update': 0.004061956738316735, 'loss': 0.1890108386801859, 'time_step': 0.004300764826841132, 'init_value': -9.054232597351074, 'ave_value': -9.829496162210859, 'soft_opc': nan} step=4816




2022-04-20 17:44.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174419/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:44.41 [info     ] FQE_20220420174419: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00016527744226677474, 'time_algorithm_update': 0.004458683174709941, 'loss': 0.21326670170285153, 'time_step': 0.004698591869930888, 'init_value': -9.358553886413574, 'ave_value': -10.238881468381901, 'soft_opc': nan} step=5160




2022-04-20 17:44.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174419/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:44.43 [info     ] FQE_20220420174419: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.0001657695271248041, 'time_algorithm_update': 0.004044046235638995, 'loss': 0.23129633193575713, 'time_step': 0.004282319961592208, 'init_value': -9.763899803161621, 'ave_value': -10.83046448472216, 'soft_opc': nan} step=5504




2022-04-20 17:44.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174419/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:44.45 [info     ] FQE_20220420174419: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00016278305719065112, 'time_algorithm_update': 0.004513069640758426, 'loss': 0.2519744078054764, 'time_step': 0.0047477878803430604, 'init_value': -9.57227611541748, 'ave_value': -10.858973052368608, 'soft_opc': nan} step=5848




2022-04-20 17:44.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174419/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:44.46 [info     ] FQE_20220420174419: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.0001663835935814436, 'time_algorithm_update': 0.0040225982666015625, 'loss': 0.25693723874513147, 'time_step': 0.004265757494194563, 'init_value': -9.742937088012695, 'ave_value': -11.174951357470809, 'soft_opc': nan} step=6192




2022-04-20 17:44.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174419/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:44.48 [info     ] FQE_20220420174419: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00016538209693376407, 'time_algorithm_update': 0.004371488510176193, 'loss': 0.27440023749287046, 'time_step': 0.004613303167875423, 'init_value': -9.72756290435791, 'ave_value': -11.465606480061185, 'soft_opc': nan} step=6536




2022-04-20 17:44.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174419/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:44.50 [info     ] FQE_20220420174419: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.0001652490260989167, 'time_algorithm_update': 0.0043368831623432245, 'loss': 0.28783606719442234, 'time_step': 0.004577626322591027, 'init_value': -9.876077651977539, 'ave_value': -11.818258478710646, 'soft_opc': nan} step=6880




2022-04-20 17:44.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174419/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:44.51 [info     ] FQE_20220420174419: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00016381227692892385, 'time_algorithm_update': 0.004033141357954158, 'loss': 0.3008742603137656, 'time_step': 0.004270229228707247, 'init_value': -10.151239395141602, 'ave_value': -12.22515739316859, 'soft_opc': nan} step=7224




2022-04-20 17:44.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174419/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:44.53 [info     ] FQE_20220420174419: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.0001663926035858864, 'time_algorithm_update': 0.004495884789976963, 'loss': 0.30985492889141275, 'time_step': 0.004736368046250454, 'init_value': -10.324005126953125, 'ave_value': -12.577210097850577, 'soft_opc': nan} step=7568




2022-04-20 17:44.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174419/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:44.55 [info     ] FQE_20220420174419: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00016308038733726325, 'time_algorithm_update': 0.003986205472502597, 'loss': 0.3187410819593297, 'time_step': 0.0042211940122205156, 'init_value': -10.309709548950195, 'ave_value': -12.758183216986138, 'soft_opc': nan} step=7912




2022-04-20 17:44.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174419/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:44.56 [info     ] FQE_20220420174419: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00016620547272438226, 'time_algorithm_update': 0.004445809957592986, 'loss': 0.31987276268841397, 'time_step': 0.004685060229412345, 'init_value': -10.201125144958496, 'ave_value': -12.674056335839417, 'soft_opc': nan} step=8256




2022-04-20 17:44.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174419/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:44.58 [info     ] FQE_20220420174419: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00016193265138670455, 'time_algorithm_update': 0.003987231226854546, 'loss': 0.32633939878148743, 'time_step': 0.0042230563108311145, 'init_value': -10.124458312988281, 'ave_value': -12.631497642319012, 'soft_opc': nan} step=8600




2022-04-20 17:44.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174419/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:45.00 [info     ] FQE_20220420174419: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00016561289166295253, 'time_algorithm_update': 0.004339220218880232, 'loss': 0.3305309287239906, 'time_step': 0.004583075296047122, 'init_value': -10.226426124572754, 'ave_value': -12.932530175035334, 'soft_opc': nan} step=8944




2022-04-20 17:45.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174419/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:45.01 [info     ] FQE_20220420174419: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00016340751980626307, 'time_algorithm_update': 0.004377997198770213, 'loss': 0.3302686584810185, 'time_step': 0.004614710807800293, 'init_value': -10.140615463256836, 'ave_value': -12.962452694835132, 'soft_opc': nan} step=9288




2022-04-20 17:45.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174419/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:45.03 [info     ] FQE_20220420174419: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.0001654770485190458, 'time_algorithm_update': 0.004125737866689992, 'loss': 0.33527184474325283, 'time_step': 0.004369548586911933, 'init_value': -10.462180137634277, 'ave_value': -13.390746670040965, 'soft_opc': nan} step=9632




2022-04-20 17:45.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174419/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:45.05 [info     ] FQE_20220420174419: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00016365564146707224, 'time_algorithm_update': 0.004473833150641863, 'loss': 0.3480142025157896, 'time_step': 0.004715140475783237, 'init_value': -10.738438606262207, 'ave_value': -13.753945406115204, 'soft_opc': nan} step=9976




2022-04-20 17:45.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174419/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:45.06 [info     ] FQE_20220420174419: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.0001619361167730287, 'time_algorithm_update': 0.004052359004353368, 'loss': 0.3489000612415027, 'time_step': 0.004287832005079402, 'init_value': -10.955511093139648, 'ave_value': -13.85507049193052, 'soft_opc': nan} step=10320




2022-04-20 17:45.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174419/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:45.08 [info     ] FQE_20220420174419: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00016571962556173635, 'time_algorithm_update': 0.0045341128526731975, 'loss': 0.3531560257272145, 'time_step': 0.004774580861246863, 'init_value': -11.107139587402344, 'ave_value': -14.058943679606593, 'soft_opc': nan} step=10664




2022-04-20 17:45.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174419/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:45.10 [info     ] FQE_20220420174419: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.0001668091430220493, 'time_algorithm_update': 0.004236253195030745, 'loss': 0.3569795933313841, 'time_step': 0.004479334797970084, 'init_value': -11.337678909301758, 'ave_value': -14.383324078133178, 'soft_opc': nan} step=11008




2022-04-20 17:45.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174419/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:45.11 [info     ] FQE_20220420174419: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00016195136447285497, 'time_algorithm_update': 0.004130264354306598, 'loss': 0.3636570428335649, 'time_step': 0.0043630613837131235, 'init_value': -11.788969993591309, 'ave_value': -14.906746083348864, 'soft_opc': nan} step=11352




2022-04-20 17:45.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174419/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:45.13 [info     ] FQE_20220420174419: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00016594972721365996, 'time_algorithm_update': 0.004531182521997496, 'loss': 0.36609878571335835, 'time_step': 0.004773575899212859, 'init_value': -11.941499710083008, 'ave_value': -15.092203383087726, 'soft_opc': nan} step=11696




2022-04-20 17:45.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174419/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:45.15 [info     ] FQE_20220420174419: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00016332504361174827, 'time_algorithm_update': 0.004021086665086968, 'loss': 0.3673706569892982, 'time_step': 0.004259549601133479, 'init_value': -12.341108322143555, 'ave_value': -15.531498652885391, 'soft_opc': nan} step=12040




2022-04-20 17:45.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174419/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:45.17 [info     ] FQE_20220420174419: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00016590190488238667, 'time_algorithm_update': 0.004581397355988968, 'loss': 0.36415431698307743, 'time_step': 0.00482375607934109, 'init_value': -12.275491714477539, 'ave_value': -15.654686872921632, 'soft_opc': nan} step=12384




2022-04-20 17:45.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174419/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:45.18 [info     ] FQE_20220420174419: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00016313652659571447, 'time_algorithm_update': 0.004045681204906729, 'loss': 0.3644496901556416, 'time_step': 0.004285180291464162, 'init_value': -12.181299209594727, 'ave_value': -15.858696163227458, 'soft_opc': nan} step=12728




2022-04-20 17:45.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174419/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:45.20 [info     ] FQE_20220420174419: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.0001632883105167123, 'time_algorithm_update': 0.004387788994367732, 'loss': 0.3639304037801488, 'time_step': 0.0046259774718173715, 'init_value': -11.972997665405273, 'ave_value': -15.49388487713531, 'soft_opc': nan} step=13072




2022-04-20 17:45.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174419/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:45.22 [info     ] FQE_20220420174419: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00016564200090807537, 'time_algorithm_update': 0.004296598739402239, 'loss': 0.351613309781262, 'time_step': 0.004537231700364934, 'init_value': -11.776504516601562, 'ave_value': -15.34525003131135, 'soft_opc': nan} step=13416




2022-04-20 17:45.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174419/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:45.23 [info     ] FQE_20220420174419: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.0001654534838920416, 'time_algorithm_update': 0.0041014967962752944, 'loss': 0.3522820886939244, 'time_step': 0.004343137491581051, 'init_value': -12.255058288574219, 'ave_value': -15.864254536796865, 'soft_opc': nan} step=13760




2022-04-20 17:45.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174419/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:45.25 [info     ] FQE_20220420174419: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00016530724458916244, 'time_algorithm_update': 0.004524624624917674, 'loss': 0.37088745410640744, 'time_step': 0.004766947308252024, 'init_value': -12.438272476196289, 'ave_value': -16.03537012511646, 'soft_opc': nan} step=14104




2022-04-20 17:45.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174419/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:45.27 [info     ] FQE_20220420174419: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.0001665395359660304, 'time_algorithm_update': 0.004106768341951592, 'loss': 0.37371700777412326, 'time_step': 0.004347890615463257, 'init_value': -12.338582992553711, 'ave_value': -16.004633026724463, 'soft_opc': nan} step=14448




2022-04-20 17:45.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174419/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:45.28 [info     ] FQE_20220420174419: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.0001651734806770502, 'time_algorithm_update': 0.004542677208434704, 'loss': 0.3746033633820886, 'time_step': 0.00478226431580477, 'init_value': -12.210673332214355, 'ave_value': -15.825960689820013, 'soft_opc': nan} step=14792




2022-04-20 17:45.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174419/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:45.30 [info     ] FQE_20220420174419: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00016262919403785882, 'time_algorithm_update': 0.004018826540126357, 'loss': 0.3769679256920638, 'time_step': 0.004254435383996298, 'init_value': -12.67323112487793, 'ave_value': -16.257736859815342, 'soft_opc': nan} step=15136




2022-04-20 17:45.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174419/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:45.32 [info     ] FQE_20220420174419: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00016469802967337675, 'time_algorithm_update': 0.004271528748578803, 'loss': 0.3882482829828595, 'time_step': 0.0045120612133380976, 'init_value': -12.905206680297852, 'ave_value': -16.489407893882326, 'soft_opc': nan} step=15480




2022-04-20 17:45.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174419/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:45.33 [info     ] FQE_20220420174419: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00016485674436702284, 'time_algorithm_update': 0.004470983910006146, 'loss': 0.3938180945033944, 'time_step': 0.00471166261406832, 'init_value': -13.119596481323242, 'ave_value': -16.565354805288163, 'soft_opc': nan} step=15824




2022-04-20 17:45.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174419/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:45.35 [info     ] FQE_20220420174419: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.0001658727956372638, 'time_algorithm_update': 0.0041115180004474726, 'loss': 0.4077206354068462, 'time_step': 0.00435155006342156, 'init_value': -12.886165618896484, 'ave_value': -16.524611933301163, 'soft_opc': nan} step=16168




2022-04-20 17:45.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174419/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:45.37 [info     ] FQE_20220420174419: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00016560665396756904, 'time_algorithm_update': 0.0045403949050016185, 'loss': 0.4227737068133645, 'time_step': 0.004780174687851307, 'init_value': -13.76002025604248, 'ave_value': -17.305196117655257, 'soft_opc': nan} step=16512




2022-04-20 17:45.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174419/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:45.38 [info     ] FQE_20220420174419: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00016556784164073856, 'time_algorithm_update': 0.0040664485720701, 'loss': 0.4362302354082119, 'time_step': 0.004306728756704996, 'init_value': -13.868356704711914, 'ave_value': -17.328667371204492, 'soft_opc': nan} step=16856




2022-04-20 17:45.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174419/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 17:45.40 [info     ] FQE_20220420174419: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00016682231149008108, 'time_algorithm_update': 0.004506802143052567, 'loss': 0.45826215208183196, 'time_step': 0.004747525897136954, 'init_value': -13.90713119506836, 'ave_value': -17.465961108438414, 'soft_opc': nan} step=17200




2022-04-20 17:45.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420174419/model_17200.pt
search iteration:  15
using hyper params:  [0.00886952813291376, 0.006557465642528463, 2.733885521476885e-05, 1]
2022-04-20 17:45.40 [debug    ] RoundIterator is selected.
2022-04-20 17:45.40 [info     ] Directory is created at d3rlpy_logs/CQL_20220420174540
2022-04-20 17:45.40 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 17:45.40 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 17:45.40 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220420174540/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.00886952813291376, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weig

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:45.49 [info     ] CQL_20220420174540: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.0003116507279245477, 'time_algorithm_update': 0.02532890177609628, 'temp_loss': 4.609441877108568, 'temp': 0.9955403511984307, 'alpha_loss': -11.872792678966857, 'alpha': 1.015416493541316, 'critic_loss': 17.8517171960128, 'actor_loss': -0.7926194246379081, 'time_step': 0.02574142587115193, 'td_error': 9.152721120935688, 'init_value': -3.0237021446228027, 'ave_value': -0.5309596213550718} step=342
2022-04-20 17:45.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420174540/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:45.59 [info     ] CQL_20220420174540: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.0003135678363822357, 'time_algorithm_update': 0.02542142212739465, 'temp_loss': 3.3802389512982285, 'temp': 0.9873272360068316, 'alpha_loss': -1.9931437673635388, 'alpha': 1.0329792015036645, 'critic_loss': 29.850655851308364, 'actor_loss': 1.8909383559958977, 'time_step': 0.025834748619481138, 'td_error': 7.1260105026913045, 'init_value': -4.353180885314941, 'ave_value': -0.46799268165284447} step=684
2022-04-20 17:45.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420174540/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:46.08 [info     ] CQL_20220420174540: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.0003091989204897518, 'time_algorithm_update': 0.025427538749070194, 'temp_loss': 2.454302108427237, 'temp': 0.9806356356855024, 'alpha_loss': 3.3210557298544163, 'alpha': 1.028739573314176, 'critic_loss': 66.64481760883889, 'actor_loss': 4.321130791602776, 'time_step': 0.025833947616710998, 'td_error': 10.969519693521539, 'init_value': -6.630247592926025, 'ave_value': -1.482512552679122} step=1026
2022-04-20 17:46.08 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420174540/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:46.17 [info     ] CQL_20220420174540: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.0003057648563942714, 'time_algorithm_update': 0.02551880356861137, 'temp_loss': 1.8395941738496746, 'temp': 0.9748134093674999, 'alpha_loss': 6.781502793406882, 'alpha': 1.0015446362439653, 'critic_loss': 115.20119056924742, 'actor_loss': 6.73576089373806, 'time_step': 0.025924230876721834, 'td_error': 16.152658474222413, 'init_value': -12.258920669555664, 'ave_value': -3.8768787988832405} step=1368
2022-04-20 17:46.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420174540/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:46.26 [info     ] CQL_20220420174540: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.0002887472074631362, 'time_algorithm_update': 0.024020942331057542, 'temp_loss': 1.3879943096149734, 'temp': 0.9697050947194908, 'alpha_loss': 8.987240531988311, 'alpha': 0.9627013223910192, 'critic_loss': 172.83825299893206, 'actor_loss': 9.454155237354033, 'time_step': 0.02439977132786087, 'td_error': 19.34802813050046, 'init_value': -17.024776458740234, 'ave_value': -5.331033180133179} step=1710
2022-04-20 17:46.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420174540/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:46.35 [info     ] CQL_20220420174540: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.0003106189750091374, 'time_algorithm_update': 0.02544772276404308, 'temp_loss': 1.0118502835955536, 'temp': 0.9652845800271508, 'alpha_loss': 10.965810948645162, 'alpha': 0.921471860847975, 'critic_loss': 239.33420518127798, 'actor_loss': 12.602126776823523, 'time_step': 0.02585713765774554, 'td_error': 33.72972959749027, 'init_value': -26.2850284576416, 'ave_value': -9.473610075736367} step=2052
2022-04-20 17:46.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420174540/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:46.44 [info     ] CQL_20220420174540: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.0003096346269574082, 'time_algorithm_update': 0.025186231958935833, 'temp_loss': 0.6369310664285345, 'temp': 0.9617282694194749, 'alpha_loss': 12.699468061937923, 'alpha': 0.8806003895070817, 'critic_loss': 319.9708533036081, 'actor_loss': 16.47172817152146, 'time_step': 0.025590801099587602, 'td_error': 41.36743022094074, 'init_value': -31.66046714782715, 'ave_value': -10.767417009462108} step=2394
2022-04-20 17:46.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420174540/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:46.53 [info     ] CQL_20220420174540: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.00029967193715056483, 'time_algorithm_update': 0.024689523100155836, 'temp_loss': 0.4246492328841174, 'temp': 0.9590567047135872, 'alpha_loss': 13.30990445265296, 'alpha': 0.8435000813843911, 'critic_loss': 411.3277078371996, 'actor_loss': 20.507706288008663, 'time_step': 0.02508175233651323, 'td_error': 49.75612195364705, 'init_value': -39.03484344482422, 'ave_value': -16.859845690367457} step=2736
2022-04-20 17:46.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420174540/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:47.02 [info     ] CQL_20220420174540: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.0003131488610429373, 'time_algorithm_update': 0.025472881501181086, 'temp_loss': 0.11945244233663145, 'temp': 0.9575351250101949, 'alpha_loss': 14.900479238632826, 'alpha': 0.8087518351119861, 'critic_loss': 512.2983202125594, 'actor_loss': 25.24462321766636, 'time_step': 0.02588043157120197, 'td_error': 105.58146107188543, 'init_value': -50.72693634033203, 'ave_value': -21.442651399696196} step=3078
2022-04-20 17:47.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420174540/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:47.12 [info     ] CQL_20220420174540: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.00030970712851362617, 'time_algorithm_update': 0.025345112148084138, 'temp_loss': 0.030661494302296498, 'temp': 0.9570822253910422, 'alpha_loss': 14.847672551696064, 'alpha': 0.7767008434959323, 'critic_loss': 649.1890831662897, 'actor_loss': 31.080681873343842, 'time_step': 0.025752814889651292, 'td_error': 72.34987985116723, 'init_value': -57.52531814575195, 'ave_value': -24.89918557324388} step=3420
2022-04-20 17:47.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420174540/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:47.21 [info     ] CQL_20220420174540: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.000313766518531487, 'time_algorithm_update': 0.02501759821908516, 'temp_loss': -0.10738305794347448, 'temp': 0.9572656945875514, 'alpha_loss': 15.17674415989926, 'alpha': 0.7471850005506772, 'critic_loss': 780.5273257249977, 'actor_loss': 36.63861575879549, 'time_step': 0.02543120635183234, 'td_error': 82.05132067088906, 'init_value': -68.4181137084961, 'ave_value': -29.198030884942494} step=3762
2022-04-20 17:47.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420174540/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:47.29 [info     ] CQL_20220420174540: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.00030519739229079577, 'time_algorithm_update': 0.023739546363116704, 'temp_loss': -0.2491842171950647, 'temp': 0.959028616983291, 'alpha_loss': 15.818705965900978, 'alpha': 0.7196907078662114, 'critic_loss': 923.5914649294134, 'actor_loss': 42.92190903111508, 'time_step': 0.024141911874737656, 'td_error': 79.5691044419426, 'init_value': -76.4205322265625, 'ave_value': -33.44169336167542} step=4104
2022-04-20 17:47.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420174540/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:47.38 [info     ] CQL_20220420174540: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.0003049052946748789, 'time_algorithm_update': 0.023595867798342343, 'temp_loss': -0.3704380310679737, 'temp': 0.9623949785678707, 'alpha_loss': 16.769464180483457, 'alpha': 0.6932464787486003, 'critic_loss': 1062.3796549122235, 'actor_loss': 48.69395200272053, 'time_step': 0.02399996707313939, 'td_error': 161.15158921813347, 'init_value': -91.84860229492188, 'ave_value': -40.05492978805894} step=4446
2022-04-20 17:47.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420174540/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:47.46 [info     ] CQL_20220420174540: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.00031115576537729005, 'time_algorithm_update': 0.02326128287622106, 'temp_loss': -0.7059093327615519, 'temp': 0.9686120678806862, 'alpha_loss': 18.917950025078845, 'alpha': 0.6664213337744885, 'critic_loss': 1257.093041314019, 'actor_loss': 58.489907247978344, 'time_step': 0.023671599159463805, 'td_error': 169.35776596278163, 'init_value': -108.93186950683594, 'ave_value': -49.95361037161436} step=4788
2022-04-20 17:47.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420174540/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:47.55 [info     ] CQL_20220420174540: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.00031112230312057404, 'time_algorithm_update': 0.023678341107061733, 'temp_loss': -0.770927783281526, 'temp': 0.9772197891745651, 'alpha_loss': 20.87061299775776, 'alpha': 0.6402701268419188, 'critic_loss': 1475.5126913862618, 'actor_loss': 68.13316799186127, 'time_step': 0.024086166543570178, 'td_error': 292.9370074941156, 'init_value': -128.7718505859375, 'ave_value': -59.582554630733824} step=5130
2022-04-20 17:47.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420174540/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:48.04 [info     ] CQL_20220420174540: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.0003066069898549576, 'time_algorithm_update': 0.023893189011958606, 'temp_loss': -0.7376283764054901, 'temp': 0.9862157162169964, 'alpha_loss': 19.097511941229392, 'alpha': 0.6157200756477333, 'critic_loss': 1691.6375271981224, 'actor_loss': 75.96723665828593, 'time_step': 0.024295910757187515, 'td_error': 326.68546616334663, 'init_value': -139.6233673095703, 'ave_value': -66.29348549201681} step=5472
2022-04-20 17:48.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420174540/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:48.12 [info     ] CQL_20220420174540: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.0003156069426508675, 'time_algorithm_update': 0.023582533089041013, 'temp_loss': -1.0047815135238987, 'temp': 0.9953260827831357, 'alpha_loss': 24.588999134755273, 'alpha': 0.5928355392656828, 'critic_loss': 1929.092244109215, 'actor_loss': 88.18259168926038, 'time_step': 0.023993905524761355, 'td_error': 541.2544914493942, 'init_value': -166.25314331054688, 'ave_value': -81.14446865136021} step=5814
2022-04-20 17:48.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420174540/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:48.21 [info     ] CQL_20220420174540: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.00030710613518430475, 'time_algorithm_update': 0.02402276044700578, 'temp_loss': -1.0507218739921935, 'temp': 1.006503262715033, 'alpha_loss': 23.007142828221905, 'alpha': 0.5686449534014651, 'critic_loss': 2236.7206399482593, 'actor_loss': 100.4467073853253, 'time_step': 0.02442942864713613, 'td_error': 689.7548032928887, 'init_value': -181.55596923828125, 'ave_value': -87.03922783412375} step=6156
2022-04-20 17:48.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420174540/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:48.30 [info     ] CQL_20220420174540: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.00031009473298725327, 'time_algorithm_update': 0.02377610875849138, 'temp_loss': -1.2157704326493002, 'temp': 1.0167484436815941, 'alpha_loss': 25.928449073033025, 'alpha': 0.5476809638982628, 'critic_loss': 2531.9380339683844, 'actor_loss': 113.40434771532203, 'time_step': 0.02418467454742967, 'td_error': 1190.38542379753, 'init_value': -213.26882934570312, 'ave_value': -104.42746577508815} step=6498
2022-04-20 17:48.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420174540/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:48.38 [info     ] CQL_20220420174540: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.00030449189637836655, 'time_algorithm_update': 0.023844855570653727, 'temp_loss': -1.2629764589824175, 'temp': 1.0277427567376032, 'alpha_loss': 30.01776699713099, 'alpha': 0.525521111941477, 'critic_loss': 2923.3580486453766, 'actor_loss': 132.2199216251485, 'time_step': 0.024249541829204, 'td_error': 1113.7906489188915, 'init_value': -260.26568603515625, 'ave_value': -123.93933905041433} step=6840
2022-04-20 17:48.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420174540/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:48.47 [info     ] CQL_20220420174540: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.00030684959121614867, 'time_algorithm_update': 0.023754184009038913, 'temp_loss': -1.1104725526961667, 'temp': 1.037476256576895, 'alpha_loss': 28.927303952780385, 'alpha': 0.5065909035024587, 'critic_loss': 3351.49220677426, 'actor_loss': 148.5147017094127, 'time_step': 0.024155326056898685, 'td_error': 2604.604861216162, 'init_value': -277.27020263671875, 'ave_value': -138.71057587609636} step=7182
2022-04-20 17:48.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420174540/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:48.56 [info     ] CQL_20220420174540: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.00030585896899128516, 'time_algorithm_update': 0.02384101717095626, 'temp_loss': -1.329824852316003, 'temp': 1.047511907697421, 'alpha_loss': 32.36158012367829, 'alpha': 0.48656993729677817, 'critic_loss': 3789.9800382915296, 'actor_loss': 168.74757871572038, 'time_step': 0.024242077654565288, 'td_error': 4364.029076115494, 'init_value': -327.1487731933594, 'ave_value': -168.20240543541607} step=7524
2022-04-20 17:48.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420174540/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:49.04 [info     ] CQL_20220420174540: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.00030531660158034655, 'time_algorithm_update': 0.023800969820970682, 'temp_loss': -1.3988918994079556, 'temp': 1.0583389519948012, 'alpha_loss': 29.17135937450922, 'alpha': 0.469856756123883, 'critic_loss': 4235.6514685558295, 'actor_loss': 187.05730340076468, 'time_step': 0.024201886695727967, 'td_error': 4166.498299136846, 'init_value': -340.8607482910156, 'ave_value': -177.5923709320807} step=7866
2022-04-20 17:49.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420174540/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:49.13 [info     ] CQL_20220420174540: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.00030789249821713095, 'time_algorithm_update': 0.023862879875807735, 'temp_loss': -1.6810014487532845, 'temp': 1.0693920669499895, 'alpha_loss': 36.38043294454876, 'alpha': 0.4526391479878398, 'critic_loss': 4709.0382094355355, 'actor_loss': 210.16790503786322, 'time_step': 0.024267027252598813, 'td_error': 2784.042957133077, 'init_value': -374.6932373046875, 'ave_value': -202.5942093689216} step=8208
2022-04-20 17:49.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420174540/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:49.22 [info     ] CQL_20220420174540: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.00030122932634855574, 'time_algorithm_update': 0.02400069208870157, 'temp_loss': -1.7109837498494058, 'temp': 1.0816864733807525, 'alpha_loss': 34.30542926621019, 'alpha': 0.4353792399865145, 'critic_loss': 5301.544023837262, 'actor_loss': 236.876572770682, 'time_step': 0.024394774994654964, 'td_error': 3888.555657635514, 'init_value': -421.3374938964844, 'ave_value': -221.49148433921573} step=8550
2022-04-20 17:49.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420174540/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:49.30 [info     ] CQL_20220420174540: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.0003074135696678831, 'time_algorithm_update': 0.023654173689278944, 'temp_loss': -1.5723204784400282, 'temp': 1.0918407917719835, 'alpha_loss': 43.43691769259715, 'alpha': 0.42033591997205166, 'critic_loss': 5888.460112276133, 'actor_loss': 263.95030605026153, 'time_step': 0.0240599028548302, 'td_error': 9397.60868839081, 'init_value': -485.0357971191406, 'ave_value': -258.7835340343402} step=8892
2022-04-20 17:49.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420174540/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:49.39 [info     ] CQL_20220420174540: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.00031076188673052874, 'time_algorithm_update': 0.02385761096463566, 'temp_loss': -1.7950353842903997, 'temp': 1.1025433948165493, 'alpha_loss': 43.42353315241853, 'alpha': 0.4029712156768431, 'critic_loss': 6681.177198978892, 'actor_loss': 299.0792288975409, 'time_step': 0.024267139490584882, 'td_error': 14941.124138356794, 'init_value': -543.6277465820312, 'ave_value': -295.5301238709742} step=9234
2022-04-20 17:49.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420174540/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:49.48 [info     ] CQL_20220420174540: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.0003038066172460366, 'time_algorithm_update': 0.023677184567813984, 'temp_loss': -1.8821678546785612, 'temp': 1.1137409938706293, 'alpha_loss': 41.47447856546145, 'alpha': 0.3881642306060122, 'critic_loss': 7491.470497532895, 'actor_loss': 337.8947445607325, 'time_step': 0.024079448298404093, 'td_error': 12367.435910161385, 'init_value': -599.5087890625, 'ave_value': -332.45234964522155} step=9576
2022-04-20 17:49.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420174540/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:49.57 [info     ] CQL_20220420174540: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.00030344829224703603, 'time_algorithm_update': 0.025383914423267744, 'temp_loss': -1.9265807788623006, 'temp': 1.124850376656181, 'alpha_loss': 44.972667164272735, 'alpha': 0.37518342379589525, 'critic_loss': 8243.235113132767, 'actor_loss': 369.2188545807063, 'time_step': 0.0257833296792549, 'td_error': 12241.444003742037, 'init_value': -653.0255737304688, 'ave_value': -363.42936356862384} step=9918
2022-04-20 17:49.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420174540/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:50.06 [info     ] CQL_20220420174540: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.00030918288649174207, 'time_algorithm_update': 0.025280933631093877, 'temp_loss': -2.201701754802152, 'temp': 1.1365381036585533, 'alpha_loss': 51.97852906009607, 'alpha': 0.3614274975326326, 'critic_loss': 9106.079863966557, 'actor_loss': 413.0495102196409, 'time_step': 0.025689092993039137, 'td_error': 28924.38418925093, 'init_value': -764.8125, 'ave_value': -423.7112804830611} step=10260
2022-04-20 17:50.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420174540/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:50.15 [info     ] CQL_20220420174540: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.00030927699908875583, 'time_algorithm_update': 0.025548144390708523, 'temp_loss': -2.2214445128426914, 'temp': 1.1487227127565975, 'alpha_loss': 48.24901082920052, 'alpha': 0.3479764641028399, 'critic_loss': 10164.520156592653, 'actor_loss': 461.87275213944287, 'time_step': 0.025953575881601076, 'td_error': 23152.850709940696, 'init_value': -817.2205810546875, 'ave_value': -466.2038471658595} step=10602
2022-04-20 17:50.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420174540/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:50.25 [info     ] CQL_20220420174540: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.0003094840468021861, 'time_algorithm_update': 0.025391300519307453, 'temp_loss': -2.259531303630238, 'temp': 1.1601816830579301, 'alpha_loss': 51.22795049628319, 'alpha': 0.336856551511943, 'critic_loss': 11116.539079632676, 'actor_loss': 506.57042297006353, 'time_step': 0.025796725038896528, 'td_error': 46419.221889955545, 'init_value': -920.4646606445312, 'ave_value': -527.870280789051} step=10944
2022-04-20 17:50.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420174540/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:50.34 [info     ] CQL_20220420174540: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.0003014426482351203, 'time_algorithm_update': 0.025176698004293164, 'temp_loss': -2.461861929001167, 'temp': 1.1717580135105645, 'alpha_loss': 62.451236082099335, 'alpha': 0.3240648072365432, 'critic_loss': 12294.416262621071, 'actor_loss': 568.6231666252627, 'time_step': 0.02557674625463653, 'td_error': 23797.134403784505, 'init_value': -962.10986328125, 'ave_value': -552.5386416990049} step=11286
2022-04-20 17:50.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420174540/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:50.43 [info     ] CQL_20220420174540: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.00030471358382911013, 'time_algorithm_update': 0.02538346477419312, 'temp_loss': -2.5245893060812477, 'temp': 1.184164207232626, 'alpha_loss': 48.65669815582142, 'alpha': 0.3128701346485238, 'critic_loss': 13533.266758612024, 'actor_loss': 619.7365545975534, 'time_step': 0.025784649347004137, 'td_error': 57889.73673117853, 'init_value': -1077.7373046875, 'ave_value': -633.7615046440374} step=11628
2022-04-20 17:50.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420174540/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:50.52 [info     ] CQL_20220420174540: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.0003046410822728921, 'time_algorithm_update': 0.024923514204415662, 'temp_loss': -2.5274011196448787, 'temp': 1.1959258725768642, 'alpha_loss': 51.66836192036233, 'alpha': 0.3036352670506427, 'critic_loss': 14611.087756419043, 'actor_loss': 671.7215486939191, 'time_step': 0.025325824642739102, 'td_error': 41626.446922843126, 'init_value': -1191.057373046875, 'ave_value': -711.7183928176519} step=11970
2022-04-20 17:50.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420174540/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:51.01 [info     ] CQL_20220420174540: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.0003059649328042192, 'time_algorithm_update': 0.025318117866739195, 'temp_loss': -2.4338385809583274, 'temp': 1.2073571831859342, 'alpha_loss': 50.237218542405735, 'alpha': 0.29400176698701425, 'critic_loss': 15805.14977099324, 'actor_loss': 728.4419599388078, 'time_step': 0.025724966623629743, 'td_error': 38210.69216417706, 'init_value': -1224.52294921875, 'ave_value': -729.9315021029679} step=12312
2022-04-20 17:51.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420174540/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:51.11 [info     ] CQL_20220420174540: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.0003083086850350363, 'time_algorithm_update': 0.024982232099388078, 'temp_loss': -2.812086073278684, 'temp': 1.2193313247976247, 'alpha_loss': 71.347034819642, 'alpha': 0.2838332395978838, 'critic_loss': 17075.469038400035, 'actor_loss': 801.8245583695975, 'time_step': 0.025388962344119425, 'td_error': 64624.573241981096, 'init_value': -1386.028564453125, 'ave_value': -828.8760551267379} step=12654
2022-04-20 17:51.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420174540/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:51.20 [info     ] CQL_20220420174540: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.00030740520410370406, 'time_algorithm_update': 0.025344913465934888, 'temp_loss': -2.8600889378123813, 'temp': 1.2320229275184764, 'alpha_loss': 45.97069371461171, 'alpha': 0.274290775607901, 'critic_loss': 18492.176672149122, 'actor_loss': 860.5922323305007, 'time_step': 0.025748675329643384, 'td_error': 47937.17928481502, 'init_value': -1438.053466796875, 'ave_value': -871.0705946386182} step=12996
2022-04-20 17:51.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420174540/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:51.29 [info     ] CQL_20220420174540: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.00031048303459122866, 'time_algorithm_update': 0.025187353641666167, 'temp_loss': -2.806389121632827, 'temp': 1.2438461320442067, 'alpha_loss': 66.69226511458905, 'alpha': 0.26653656520341573, 'critic_loss': 19633.606405336257, 'actor_loss': 923.7343332391036, 'time_step': 0.02559529619607312, 'td_error': 120633.22356044056, 'init_value': -1609.345703125, 'ave_value': -992.0573349047996} step=13338
2022-04-20 17:51.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420174540/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:51.38 [info     ] CQL_20220420174540: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.00030793014325593645, 'time_algorithm_update': 0.02543085151248508, 'temp_loss': -2.8559507319801734, 'temp': 1.2560146234885992, 'alpha_loss': 81.58633214270162, 'alpha': 0.25575474941591075, 'critic_loss': 21286.49820106908, 'actor_loss': 1015.2346187836943, 'time_step': 0.02583638966432092, 'td_error': 106727.07479834295, 'init_value': -1746.295654296875, 'ave_value': -1049.8612548603776} step=13680
2022-04-20 17:51.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420174540/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:51.48 [info     ] CQL_20220420174540: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.00031310145617925635, 'time_algorithm_update': 0.025405866360803792, 'temp_loss': -2.8096900578827886, 'temp': 1.2679557057849147, 'alpha_loss': 53.62740498781204, 'alpha': 0.2471261503752212, 'critic_loss': 22882.595863029972, 'actor_loss': 1085.1234396772775, 'time_step': 0.02581638411471718, 'td_error': 63769.5593724936, 'init_value': -1822.365234375, 'ave_value': -1111.7523735156683} step=14022
2022-04-20 17:51.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420174540/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:51.57 [info     ] CQL_20220420174540: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.0003096255642628809, 'time_algorithm_update': 0.02541809890702454, 'temp_loss': -2.834305935435825, 'temp': 1.279825194188726, 'alpha_loss': 34.64715767115877, 'alpha': 0.24147304713900325, 'critic_loss': 24144.008212262426, 'actor_loss': 1140.618365728367, 'time_step': 0.02582659219440661, 'td_error': 58436.27150308129, 'init_value': -1901.8511962890625, 'ave_value': -1160.6275092745257} step=14364
2022-04-20 17:51.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420174540/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:52.06 [info     ] CQL_20220420174540: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.0003049603679723907, 'time_algorithm_update': 0.025310670423228838, 'temp_loss': -2.7322301380118432, 'temp': 1.2917366484452408, 'alpha_loss': 30.129263983832466, 'alpha': 0.23736868306384448, 'critic_loss': 25131.423091419958, 'actor_loss': 1180.0880981802243, 'time_step': 0.025715838398849757, 'td_error': 57536.07541277865, 'init_value': -1901.7398681640625, 'ave_value': -1201.5832300377965} step=14706
2022-04-20 17:52.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420174540/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:52.15 [info     ] CQL_20220420174540: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.000306245876334564, 'time_algorithm_update': 0.02512887137675146, 'temp_loss': -2.554453174273173, 'temp': 1.303032318053887, 'alpha_loss': 28.6891940922765, 'alpha': 0.23315140962252143, 'critic_loss': 26033.412337810674, 'actor_loss': 1223.245775724712, 'time_step': 0.02553278312348483, 'td_error': 67600.49448144964, 'init_value': -1992.0452880859375, 'ave_value': -1264.967444817193} step=15048
2022-04-20 17:52.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420174540/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:52.24 [info     ] CQL_20220420174540: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.00030619847147088304, 'time_algorithm_update': 0.025348603376868176, 'temp_loss': -2.5961537671368022, 'temp': 1.3141975347061603, 'alpha_loss': 28.73771338295518, 'alpha': 0.22848140783825813, 'critic_loss': 26939.684621710527, 'actor_loss': 1270.4309540687248, 'time_step': 0.025754076695581624, 'td_error': 61752.023220650786, 'init_value': -2048.23583984375, 'ave_value': -1320.6759663706212} step=15390
2022-04-20 17:52.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420174540/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:52.33 [info     ] CQL_20220420174540: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.0003131105188737836, 'time_algorithm_update': 0.0249952314889919, 'temp_loss': -2.545139639001144, 'temp': 1.325835997598213, 'alpha_loss': 28.830666880052515, 'alpha': 0.22346964709417164, 'critic_loss': 27830.406381350513, 'actor_loss': 1315.0159014428568, 'time_step': 0.025407065425002782, 'td_error': 59119.961340868475, 'init_value': -2122.11572265625, 'ave_value': -1356.0859349936838} step=15732
2022-04-20 17:52.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420174540/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:52.43 [info     ] CQL_20220420174540: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.000308987689994232, 'time_algorithm_update': 0.025213915702195194, 'temp_loss': -2.697280820350201, 'temp': 1.3378234512624685, 'alpha_loss': 25.712953012589125, 'alpha': 0.21834702855139448, 'critic_loss': 28678.868021290204, 'actor_loss': 1357.1641701146175, 'time_step': 0.025621170188948425, 'td_error': 44734.16436763732, 'init_value': -2166.641845703125, 'ave_value': -1400.8432043546813} step=16074
2022-04-20 17:52.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420174540/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:52.51 [info     ] CQL_20220420174540: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.0003057704334370574, 'time_algorithm_update': 0.024212896475317883, 'temp_loss': -2.746860700392584, 'temp': 1.3501809445040964, 'alpha_loss': 24.021277334955002, 'alpha': 0.21369868582277968, 'critic_loss': 29497.704689784357, 'actor_loss': 1396.8101556789109, 'time_step': 0.02461745515901443, 'td_error': 62866.22840019751, 'init_value': -2231.72314453125, 'ave_value': -1485.9750986378901} step=16416
2022-04-20 17:52.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420174540/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:53.00 [info     ] CQL_20220420174540: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.00030488019798234194, 'time_algorithm_update': 0.023841206093280637, 'temp_loss': -2.8609865935225236, 'temp': 1.3630824918635407, 'alpha_loss': 27.349180837123715, 'alpha': 0.20827828944600812, 'critic_loss': 30361.82573213633, 'actor_loss': 1443.3406525550529, 'time_step': 0.024243805840698598, 'td_error': 50685.55914441376, 'init_value': -2299.521240234375, 'ave_value': -1521.072895656985} step=16758
2022-04-20 17:53.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420174540/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:53.09 [info     ] CQL_20220420174540: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.0003087262661136382, 'time_algorithm_update': 0.023611771432976974, 'temp_loss': -2.799259724672775, 'temp': 1.3760820194991707, 'alpha_loss': 22.62905891980344, 'alpha': 0.20290168021854602, 'critic_loss': 31220.34558319627, 'actor_loss': 1486.6386222616275, 'time_step': 0.02401841453641479, 'td_error': 55720.19298738469, 'init_value': -2341.116455078125, 'ave_value': -1571.125188162778} step=17100
2022-04-20 17:53.09 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420174540/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.5191

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 17:53.10 [info     ] FQE_20220420175309: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00014785398919898342, 'time_algorithm_update': 0.0034143852900309734, 'loss': 0.0053561439650716735, 'time_step': 0.0036304844431130283, 'init_value': -0.19874338805675507, 'ave_value': -0.1279796455021914, 'soft_opc': nan} step=166




2022-04-20 17:53.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175309/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:53.10 [info     ] FQE_20220420175309: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00014870568930384624, 'time_algorithm_update': 0.003315201724868223, 'loss': 0.0037877071902705692, 'time_step': 0.00353287932384445, 'init_value': -0.3496205806732178, 'ave_value': -0.23668191950466183, 'soft_opc': nan} step=332




2022-04-20 17:53.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175309/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:53.11 [info     ] FQE_20220420175309: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.0001524887889264578, 'time_algorithm_update': 0.0033172570079205983, 'loss': 0.0031816007980392373, 'time_step': 0.0035389230911990247, 'init_value': -0.41805946826934814, 'ave_value': -0.27710690018427264, 'soft_opc': nan} step=498




2022-04-20 17:53.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175309/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:53.12 [info     ] FQE_20220420175309: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.0001493204070861081, 'time_algorithm_update': 0.0033275046980524637, 'loss': 0.003242878538573228, 'time_step': 0.0035501057843127883, 'init_value': -0.5067409873008728, 'ave_value': -0.32163385485072393, 'soft_opc': nan} step=664




2022-04-20 17:53.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175309/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:53.12 [info     ] FQE_20220420175309: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00015193008514771978, 'time_algorithm_update': 0.003402425582150379, 'loss': 0.003264311057969883, 'time_step': 0.0036229397877153144, 'init_value': -0.59300696849823, 'ave_value': -0.38676233737653976, 'soft_opc': nan} step=830




2022-04-20 17:53.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175309/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:53.13 [info     ] FQE_20220420175309: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00015631497624408767, 'time_algorithm_update': 0.0034764861486044274, 'loss': 0.003221226595229952, 'time_step': 0.003704533519515072, 'init_value': -0.620814323425293, 'ave_value': -0.3874520433580016, 'soft_opc': nan} step=996




2022-04-20 17:53.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175309/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:53.14 [info     ] FQE_20220420175309: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.0001504363783870835, 'time_algorithm_update': 0.003420862806848733, 'loss': 0.003310339814276401, 'time_step': 0.0036392599703317665, 'init_value': -0.6979368329048157, 'ave_value': -0.43767427010377785, 'soft_opc': nan} step=1162




2022-04-20 17:53.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175309/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:53.14 [info     ] FQE_20220420175309: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00014783100909497365, 'time_algorithm_update': 0.00332643899572901, 'loss': 0.0033330694591368446, 'time_step': 0.0035434817693319664, 'init_value': -0.7673922777175903, 'ave_value': -0.48020290569612034, 'soft_opc': nan} step=1328




2022-04-20 17:53.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175309/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:53.15 [info     ] FQE_20220420175309: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00015241410358842597, 'time_algorithm_update': 0.0033455225358526392, 'loss': 0.0033256985220485873, 'time_step': 0.003567178565335561, 'init_value': -0.8010916709899902, 'ave_value': -0.509259224870989, 'soft_opc': nan} step=1494




2022-04-20 17:53.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175309/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:53.16 [info     ] FQE_20220420175309: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00015503383544554193, 'time_algorithm_update': 0.0034358199820461043, 'loss': 0.003365732988213604, 'time_step': 0.003660530929105828, 'init_value': -0.8564380407333374, 'ave_value': -0.5530916072864521, 'soft_opc': nan} step=1660




2022-04-20 17:53.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175309/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:53.16 [info     ] FQE_20220420175309: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00015603490622646837, 'time_algorithm_update': 0.003473327820559582, 'loss': 0.0033726658341082104, 'time_step': 0.003696424415312618, 'init_value': -0.9047866463661194, 'ave_value': -0.5901570535726376, 'soft_opc': nan} step=1826




2022-04-20 17:53.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175309/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:53.17 [info     ] FQE_20220420175309: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00015564998948430442, 'time_algorithm_update': 0.003505791526242911, 'loss': 0.00330608617724761, 'time_step': 0.0037332701395793133, 'init_value': -0.9509410858154297, 'ave_value': -0.6183364069475247, 'soft_opc': nan} step=1992




2022-04-20 17:53.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175309/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:53.18 [info     ] FQE_20220420175309: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.0001495387180742011, 'time_algorithm_update': 0.0034150186791477434, 'loss': 0.0035540933358898185, 'time_step': 0.003632328596459814, 'init_value': -1.0368103981018066, 'ave_value': -0.690694505788453, 'soft_opc': nan} step=2158




2022-04-20 17:53.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175309/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:53.18 [info     ] FQE_20220420175309: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00014967372618525862, 'time_algorithm_update': 0.003352245652532003, 'loss': 0.0035658023882578745, 'time_step': 0.003569831331092191, 'init_value': -1.07696533203125, 'ave_value': -0.7091040054285849, 'soft_opc': nan} step=2324




2022-04-20 17:53.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175309/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:53.19 [info     ] FQE_20220420175309: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00015069777707019485, 'time_algorithm_update': 0.0034645566021103457, 'loss': 0.0034522822818225138, 'time_step': 0.0036850966602922923, 'init_value': -1.108424186706543, 'ave_value': -0.7269148049926436, 'soft_opc': nan} step=2490




2022-04-20 17:53.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175309/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:53.20 [info     ] FQE_20220420175309: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00014935056847262094, 'time_algorithm_update': 0.003461062190044357, 'loss': 0.0036317610247790007, 'time_step': 0.0036789164485701597, 'init_value': -1.1608669757843018, 'ave_value': -0.7543260924652353, 'soft_opc': nan} step=2656




2022-04-20 17:53.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175309/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:53.20 [info     ] FQE_20220420175309: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00015726146927799088, 'time_algorithm_update': 0.0034259284835263908, 'loss': 0.00357143641225096, 'time_step': 0.003655545682792204, 'init_value': -1.2188441753387451, 'ave_value': -0.8024722811740798, 'soft_opc': nan} step=2822




2022-04-20 17:53.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175309/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:53.21 [info     ] FQE_20220420175309: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00015499792903302665, 'time_algorithm_update': 0.003467456403985081, 'loss': 0.003862761403574238, 'time_step': 0.003691894462309688, 'init_value': -1.2651270627975464, 'ave_value': -0.8348571996565337, 'soft_opc': nan} step=2988




2022-04-20 17:53.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175309/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:53.22 [info     ] FQE_20220420175309: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00015459146844335348, 'time_algorithm_update': 0.0034753543784819454, 'loss': 0.0038535782403918543, 'time_step': 0.003698709499405091, 'init_value': -1.3468530178070068, 'ave_value': -0.896498564637459, 'soft_opc': nan} step=3154




2022-04-20 17:53.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175309/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:53.22 [info     ] FQE_20220420175309: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00014857786247529178, 'time_algorithm_update': 0.003403909235115511, 'loss': 0.0040046131281821756, 'time_step': 0.003619996898145561, 'init_value': -1.4098567962646484, 'ave_value': -0.9512704062435005, 'soft_opc': nan} step=3320




2022-04-20 17:53.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175309/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:53.23 [info     ] FQE_20220420175309: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.0001532801662582949, 'time_algorithm_update': 0.0034233906182898097, 'loss': 0.004192195604807776, 'time_step': 0.003649652722370194, 'init_value': -1.4590239524841309, 'ave_value': -0.9834828499872406, 'soft_opc': nan} step=3486




2022-04-20 17:53.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175309/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:53.24 [info     ] FQE_20220420175309: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00015338644923934018, 'time_algorithm_update': 0.0034237324473369553, 'loss': 0.004148852162859796, 'time_step': 0.0036506265042776085, 'init_value': -1.4480278491973877, 'ave_value': -0.9750105184947585, 'soft_opc': nan} step=3652




2022-04-20 17:53.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175309/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:53.24 [info     ] FQE_20220420175309: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.0001525764005729951, 'time_algorithm_update': 0.0034459211740149073, 'loss': 0.004346667961091229, 'time_step': 0.0036673574562532357, 'init_value': -1.5098105669021606, 'ave_value': -0.9993078133947141, 'soft_opc': nan} step=3818




2022-04-20 17:53.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175309/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:53.25 [info     ] FQE_20220420175309: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00015679755842829324, 'time_algorithm_update': 0.0034177547477814087, 'loss': 0.004623262673484565, 'time_step': 0.0036432786160204783, 'init_value': -1.6143577098846436, 'ave_value': -1.084541447078054, 'soft_opc': nan} step=3984




2022-04-20 17:53.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175309/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:53.26 [info     ] FQE_20220420175309: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.000150597239115152, 'time_algorithm_update': 0.0034663073987845914, 'loss': 0.004559912290690601, 'time_step': 0.003683281232075519, 'init_value': -1.6487064361572266, 'ave_value': -1.090774048394985, 'soft_opc': nan} step=4150




2022-04-20 17:53.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175309/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:53.26 [info     ] FQE_20220420175309: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00015178071447165617, 'time_algorithm_update': 0.003448197640568377, 'loss': 0.004735465327709779, 'time_step': 0.003671337323016431, 'init_value': -1.739264965057373, 'ave_value': -1.148079911162993, 'soft_opc': nan} step=4316




2022-04-20 17:53.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175309/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:53.27 [info     ] FQE_20220420175309: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00015097641083131353, 'time_algorithm_update': 0.003543540655848492, 'loss': 0.004874277461238812, 'time_step': 0.003769934895526932, 'init_value': -1.8000216484069824, 'ave_value': -1.2075025938652657, 'soft_opc': nan} step=4482




2022-04-20 17:53.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175309/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:53.28 [info     ] FQE_20220420175309: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.0001594287803374141, 'time_algorithm_update': 0.0035771074065242907, 'loss': 0.005303766790928925, 'time_step': 0.0038044567567756377, 'init_value': -1.8596004247665405, 'ave_value': -1.2319303061242577, 'soft_opc': nan} step=4648




2022-04-20 17:53.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175309/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:53.28 [info     ] FQE_20220420175309: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00015188986996570266, 'time_algorithm_update': 0.0035011208201029213, 'loss': 0.005441798016440168, 'time_step': 0.003721909350659474, 'init_value': -1.924544095993042, 'ave_value': -1.2914315910556833, 'soft_opc': nan} step=4814




2022-04-20 17:53.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175309/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:53.29 [info     ] FQE_20220420175309: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00015466902629438653, 'time_algorithm_update': 0.003419100520122482, 'loss': 0.005825175026765497, 'time_step': 0.0036427428923457503, 'init_value': -1.971156120300293, 'ave_value': -1.3191460305707412, 'soft_opc': nan} step=4980




2022-04-20 17:53.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175309/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:53.30 [info     ] FQE_20220420175309: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00014608452119022967, 'time_algorithm_update': 0.0023735057876770756, 'loss': 0.005781180816506182, 'time_step': 0.0025897528751786933, 'init_value': -2.0594942569732666, 'ave_value': -1.394684857083065, 'soft_opc': nan} step=5146




2022-04-20 17:53.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175309/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:53.30 [info     ] FQE_20220420175309: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.0001551027757575713, 'time_algorithm_update': 0.0035097239965415864, 'loss': 0.005964563370202612, 'time_step': 0.0037346877247454173, 'init_value': -2.162109375, 'ave_value': -1.4755850228237677, 'soft_opc': nan} step=5312




2022-04-20 17:53.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175309/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:53.31 [info     ] FQE_20220420175309: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00015247299010495106, 'time_algorithm_update': 0.003376838672591979, 'loss': 0.006424420205343245, 'time_step': 0.0036021901900509753, 'init_value': -2.195336103439331, 'ave_value': -1.5112651681443592, 'soft_opc': nan} step=5478




2022-04-20 17:53.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175309/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:53.32 [info     ] FQE_20220420175309: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00015314946691673923, 'time_algorithm_update': 0.0035002317773290426, 'loss': 0.006693900024927928, 'time_step': 0.003727405904287315, 'init_value': -2.2962188720703125, 'ave_value': -1.5877316509683927, 'soft_opc': nan} step=5644




2022-04-20 17:53.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175309/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:53.32 [info     ] FQE_20220420175309: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00015139436147299158, 'time_algorithm_update': 0.003504201590296734, 'loss': 0.0068240552377628995, 'time_step': 0.0037272493523287484, 'init_value': -2.3703575134277344, 'ave_value': -1.630986452612791, 'soft_opc': nan} step=5810




2022-04-20 17:53.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175309/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:53.33 [info     ] FQE_20220420175309: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00015121770192341632, 'time_algorithm_update': 0.003446077725973474, 'loss': 0.006999462741253202, 'time_step': 0.003670186881559441, 'init_value': -2.4219751358032227, 'ave_value': -1.6653318533220807, 'soft_opc': nan} step=5976




2022-04-20 17:53.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175309/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:53.34 [info     ] FQE_20220420175309: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00015094050441879825, 'time_algorithm_update': 0.0035004400345216313, 'loss': 0.007643926604328318, 'time_step': 0.003719299672597862, 'init_value': -2.429624080657959, 'ave_value': -1.6738065095903638, 'soft_opc': nan} step=6142




2022-04-20 17:53.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175309/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:53.34 [info     ] FQE_20220420175309: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00015084427523325724, 'time_algorithm_update': 0.0034064241202480822, 'loss': 0.0074610958098159, 'time_step': 0.0036258022469210336, 'init_value': -2.5080134868621826, 'ave_value': -1.726886908575758, 'soft_opc': nan} step=6308




2022-04-20 17:53.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175309/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:53.35 [info     ] FQE_20220420175309: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00015405287225562405, 'time_algorithm_update': 0.0035148025995277495, 'loss': 0.007857223746860513, 'time_step': 0.0037380500012133494, 'init_value': -2.5641980171203613, 'ave_value': -1.779582396767161, 'soft_opc': nan} step=6474




2022-04-20 17:53.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175309/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:53.36 [info     ] FQE_20220420175309: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00015116886920239552, 'time_algorithm_update': 0.0034540633121168756, 'loss': 0.008194618542112005, 'time_step': 0.0036757968994508305, 'init_value': -2.7014074325561523, 'ave_value': -1.8917116300077053, 'soft_opc': nan} step=6640




2022-04-20 17:53.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175309/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:53.36 [info     ] FQE_20220420175309: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00015419506164918463, 'time_algorithm_update': 0.003577726433076054, 'loss': 0.008548103049201105, 'time_step': 0.0038044093603111177, 'init_value': -2.762454032897949, 'ave_value': -1.949942822982599, 'soft_opc': nan} step=6806




2022-04-20 17:53.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175309/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:53.37 [info     ] FQE_20220420175309: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.0001535142760678946, 'time_algorithm_update': 0.0033466054732541003, 'loss': 0.008576265149833971, 'time_step': 0.0035712288086672864, 'init_value': -2.819727659225464, 'ave_value': -1.991665760421001, 'soft_opc': nan} step=6972




2022-04-20 17:53.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175309/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:53.38 [info     ] FQE_20220420175309: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.000150571386498141, 'time_algorithm_update': 0.003475543964340026, 'loss': 0.00916991203761076, 'time_step': 0.003697477191327566, 'init_value': -2.863847017288208, 'ave_value': -2.023923029166621, 'soft_opc': nan} step=7138




2022-04-20 17:53.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175309/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:53.38 [info     ] FQE_20220420175309: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.0001558079776993717, 'time_algorithm_update': 0.0034078517592096903, 'loss': 0.009336826020882589, 'time_step': 0.0036370739879378355, 'init_value': -2.9823975563049316, 'ave_value': -2.125319853693515, 'soft_opc': nan} step=7304




2022-04-20 17:53.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175309/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:53.39 [info     ] FQE_20220420175309: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00015141159655099892, 'time_algorithm_update': 0.003477587757340397, 'loss': 0.009794732939005908, 'time_step': 0.0036971396710499226, 'init_value': -3.0300469398498535, 'ave_value': -2.1431368483884916, 'soft_opc': nan} step=7470




2022-04-20 17:53.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175309/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:53.40 [info     ] FQE_20220420175309: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00014985756701733693, 'time_algorithm_update': 0.0033637658659234106, 'loss': 0.00998307735075703, 'time_step': 0.003586092627192118, 'init_value': -3.024919033050537, 'ave_value': -2.1285428843116976, 'soft_opc': nan} step=7636




2022-04-20 17:53.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175309/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:53.40 [info     ] FQE_20220420175309: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00015349991350288852, 'time_algorithm_update': 0.0034481746604643673, 'loss': 0.009985322432861808, 'time_step': 0.0036756949252392873, 'init_value': -3.0778493881225586, 'ave_value': -2.1697122472497794, 'soft_opc': nan} step=7802




2022-04-20 17:53.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175309/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:53.41 [info     ] FQE_20220420175309: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00015226760542536355, 'time_algorithm_update': 0.0034426795430930264, 'loss': 0.0104081994701031, 'time_step': 0.0036653380796133755, 'init_value': -3.138741970062256, 'ave_value': -2.217638179901484, 'soft_opc': nan} step=7968




2022-04-20 17:53.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175309/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:53.42 [info     ] FQE_20220420175309: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00015240117727992046, 'time_algorithm_update': 0.0035094080201114515, 'loss': 0.010790266083617953, 'time_step': 0.0037315566855740836, 'init_value': -3.2521812915802, 'ave_value': -2.3196858099854745, 'soft_opc': nan} step=8134




2022-04-20 17:53.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175309/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 17:53.42 [info     ] FQE_20220420175309: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00015443204397178558, 'time_algorithm_update': 0.003461728613060641, 'loss': 0.010990966542719314, 'time_step': 0.003690903445324266, 'init_value': -3.2501089572906494, 'ave_value': -2.314191163015795, 'soft_opc': nan} step=8300




2022-04-20 17:53.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175309/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

[ 0.00000000e+00  7.95731469e+08  4.27108923e-02  1.24000047e-02
  1.42999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.34732792e-01  6.00000000e-01  3.37421461e-01]
Read chunk # 39 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.38489108e-01  4.94000047e-02
 -1.56000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -7.49080829e-02  7.04145269e-02]
Read chunk # 40 out of 4999
torch.Size([44400, 6])
2022-04-20 17:53.43 [debug    ] RoundIterator is selected.
2022-04-20 17:53.43 [info     ] Directory is created at d3rlpy_logs/FQE_20220420175343
2022-04-20 17:53.43 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 17:53.43 [debug    ] Building models...
2022-04-20 17:53.43 [debug    ] Models have been built.
2022-04-20 17:53.43 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420175343/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size':

Epoch 1/50:   0%|          | 0/355 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 17:53.45 [info     ] FQE_20220420175343: epoch=1 step=355 epoch=1 metrics={'time_sample_batch': 0.00016178010215221996, 'time_algorithm_update': 0.0035023104976600323, 'loss': 0.021406429019612327, 'time_step': 0.0037351225463437363, 'init_value': -1.2943909168243408, 'ave_value': -1.25691443723141, 'soft_opc': nan} step=355




2022-04-20 17:53.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175343/model_355.pt


Epoch 2/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:53.46 [info     ] FQE_20220420175343: epoch=2 step=710 epoch=2 metrics={'time_sample_batch': 0.00016119043591996315, 'time_algorithm_update': 0.0034673663931833187, 'loss': 0.021239987086557166, 'time_step': 0.0036970353462326693, 'init_value': -2.522183656692505, 'ave_value': -2.481949101359688, 'soft_opc': nan} step=710




2022-04-20 17:53.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175343/model_710.pt


Epoch 3/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:53.47 [info     ] FQE_20220420175343: epoch=3 step=1065 epoch=3 metrics={'time_sample_batch': 0.0001604785381908148, 'time_algorithm_update': 0.0034528268894679108, 'loss': 0.02251466653296645, 'time_step': 0.0036831446096930705, 'init_value': -3.3227341175079346, 'ave_value': -3.224917910580776, 'soft_opc': nan} step=1065




2022-04-20 17:53.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175343/model_1065.pt


Epoch 4/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:53.49 [info     ] FQE_20220420175343: epoch=4 step=1420 epoch=4 metrics={'time_sample_batch': 0.00016273377646862621, 'time_algorithm_update': 0.0033872100668893733, 'loss': 0.025703523297544936, 'time_step': 0.003619856229970153, 'init_value': -4.360270977020264, 'ave_value': -4.257886418259129, 'soft_opc': nan} step=1420




2022-04-20 17:53.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175343/model_1420.pt


Epoch 5/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:53.50 [info     ] FQE_20220420175343: epoch=5 step=1775 epoch=5 metrics={'time_sample_batch': 0.00016148258262956646, 'time_algorithm_update': 0.0034430080736187144, 'loss': 0.03076286699465463, 'time_step': 0.00367710422462141, 'init_value': -5.1242146492004395, 'ave_value': -5.051609921087169, 'soft_opc': nan} step=1775




2022-04-20 17:53.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175343/model_1775.pt


Epoch 6/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:53.52 [info     ] FQE_20220420175343: epoch=6 step=2130 epoch=6 metrics={'time_sample_batch': 0.00016238521522199604, 'time_algorithm_update': 0.0034667733689429056, 'loss': 0.03732157244911076, 'time_step': 0.0037009494405397226, 'init_value': -5.999953269958496, 'ave_value': -6.058808724453704, 'soft_opc': nan} step=2130




2022-04-20 17:53.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175343/model_2130.pt


Epoch 7/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:53.53 [info     ] FQE_20220420175343: epoch=7 step=2485 epoch=7 metrics={'time_sample_batch': 0.00016303331079617353, 'time_algorithm_update': 0.0034169620191547233, 'loss': 0.04399460129125018, 'time_step': 0.0036529205214809366, 'init_value': -6.68456506729126, 'ave_value': -6.911322881049324, 'soft_opc': nan} step=2485




2022-04-20 17:53.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175343/model_2485.pt


Epoch 8/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:53.54 [info     ] FQE_20220420175343: epoch=8 step=2840 epoch=8 metrics={'time_sample_batch': 0.00016400847636478049, 'time_algorithm_update': 0.0034003210739350655, 'loss': 0.054421514128400406, 'time_step': 0.0036340968709596445, 'init_value': -7.5002055168151855, 'ave_value': -7.944605603702894, 'soft_opc': nan} step=2840




2022-04-20 17:53.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175343/model_2840.pt


Epoch 9/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:53.56 [info     ] FQE_20220420175343: epoch=9 step=3195 epoch=9 metrics={'time_sample_batch': 0.0001619399433404627, 'time_algorithm_update': 0.0034175201201103104, 'loss': 0.062332185638517563, 'time_step': 0.0036510521257427375, 'init_value': -8.145660400390625, 'ave_value': -8.908950317289532, 'soft_opc': nan} step=3195




2022-04-20 17:53.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175343/model_3195.pt


Epoch 10/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:53.57 [info     ] FQE_20220420175343: epoch=10 step=3550 epoch=10 metrics={'time_sample_batch': 0.00016124013444067725, 'time_algorithm_update': 0.003485721937367614, 'loss': 0.07666890551139352, 'time_step': 0.003720085385819556, 'init_value': -9.0106201171875, 'ave_value': -10.068134470550557, 'soft_opc': nan} step=3550




2022-04-20 17:53.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175343/model_3550.pt


Epoch 11/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:53.59 [info     ] FQE_20220420175343: epoch=11 step=3905 epoch=11 metrics={'time_sample_batch': 0.00016239193123830875, 'time_algorithm_update': 0.0034196853637695314, 'loss': 0.08708766546677536, 'time_step': 0.0036545249777780453, 'init_value': -9.861936569213867, 'ave_value': -11.208865308209276, 'soft_opc': nan} step=3905




2022-04-20 17:53.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175343/model_3905.pt


Epoch 12/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:54.00 [info     ] FQE_20220420175343: epoch=12 step=4260 epoch=12 metrics={'time_sample_batch': 0.00016340537809989822, 'time_algorithm_update': 0.0034932129819628217, 'loss': 0.10180793804129665, 'time_step': 0.0037286597238460058, 'init_value': -10.349220275878906, 'ave_value': -11.81680105594748, 'soft_opc': nan} step=4260




2022-04-20 17:54.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175343/model_4260.pt


Epoch 13/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:54.01 [info     ] FQE_20220420175343: epoch=13 step=4615 epoch=13 metrics={'time_sample_batch': 0.00016034018825477277, 'time_algorithm_update': 0.0034354639724946357, 'loss': 0.11350440827631195, 'time_step': 0.0036662142041703344, 'init_value': -11.476142883300781, 'ave_value': -13.124981844839443, 'soft_opc': nan} step=4615




2022-04-20 17:54.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175343/model_4615.pt


Epoch 14/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:54.03 [info     ] FQE_20220420175343: epoch=14 step=4970 epoch=14 metrics={'time_sample_batch': 0.00015889020033285652, 'time_algorithm_update': 0.0035066752366616693, 'loss': 0.128717327301561, 'time_step': 0.0037335013000058454, 'init_value': -11.951106071472168, 'ave_value': -13.71541864703214, 'soft_opc': nan} step=4970




2022-04-20 17:54.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175343/model_4970.pt


Epoch 15/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:54.04 [info     ] FQE_20220420175343: epoch=15 step=5325 epoch=15 metrics={'time_sample_batch': 0.00016345776302713742, 'time_algorithm_update': 0.003421301908896003, 'loss': 0.1413420292115967, 'time_step': 0.003657017963033327, 'init_value': -12.715629577636719, 'ave_value': -14.654972593695348, 'soft_opc': nan} step=5325




2022-04-20 17:54.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175343/model_5325.pt


Epoch 16/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:54.06 [info     ] FQE_20220420175343: epoch=16 step=5680 epoch=16 metrics={'time_sample_batch': 0.00015975320842904103, 'time_algorithm_update': 0.003487533918568786, 'loss': 0.15161068775809147, 'time_step': 0.0037161733063173965, 'init_value': -13.32147216796875, 'ave_value': -15.478000975514318, 'soft_opc': nan} step=5680




2022-04-20 17:54.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175343/model_5680.pt


Epoch 17/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:54.07 [info     ] FQE_20220420175343: epoch=17 step=6035 epoch=17 metrics={'time_sample_batch': 0.00016369551000460772, 'time_algorithm_update': 0.003444684391290369, 'loss': 0.16612099666200894, 'time_step': 0.003679592508665273, 'init_value': -14.07749080657959, 'ave_value': -16.26459147988446, 'soft_opc': nan} step=6035




2022-04-20 17:54.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175343/model_6035.pt


Epoch 18/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:54.08 [info     ] FQE_20220420175343: epoch=18 step=6390 epoch=18 metrics={'time_sample_batch': 0.00016314278186207087, 'time_algorithm_update': 0.003488049708621603, 'loss': 0.17675270289182662, 'time_step': 0.003721638800392688, 'init_value': -14.70589542388916, 'ave_value': -17.009679855128205, 'soft_opc': nan} step=6390




2022-04-20 17:54.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175343/model_6390.pt


Epoch 19/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:54.10 [info     ] FQE_20220420175343: epoch=19 step=6745 epoch=19 metrics={'time_sample_batch': 0.000161903676852374, 'time_algorithm_update': 0.003450577695604781, 'loss': 0.19093220653143567, 'time_step': 0.0036835663755175095, 'init_value': -15.354222297668457, 'ave_value': -17.830991906641074, 'soft_opc': nan} step=6745




2022-04-20 17:54.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175343/model_6745.pt


Epoch 20/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:54.11 [info     ] FQE_20220420175343: epoch=20 step=7100 epoch=20 metrics={'time_sample_batch': 0.00016050674545932823, 'time_algorithm_update': 0.003447175361740757, 'loss': 0.20463939208904622, 'time_step': 0.0036773916701195947, 'init_value': -15.9879789352417, 'ave_value': -18.58577679765424, 'soft_opc': nan} step=7100




2022-04-20 17:54.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175343/model_7100.pt


Epoch 21/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:54.13 [info     ] FQE_20220420175343: epoch=21 step=7455 epoch=21 metrics={'time_sample_batch': 0.0001597250011605276, 'time_algorithm_update': 0.0034822658753730883, 'loss': 0.2155130520725334, 'time_step': 0.0037123807719056036, 'init_value': -16.410188674926758, 'ave_value': -19.115317693548313, 'soft_opc': nan} step=7455




2022-04-20 17:54.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175343/model_7455.pt


Epoch 22/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:54.14 [info     ] FQE_20220420175343: epoch=22 step=7810 epoch=22 metrics={'time_sample_batch': 0.00016006953279737015, 'time_algorithm_update': 0.003494005471887723, 'loss': 0.2231838890521879, 'time_step': 0.003730013001133019, 'init_value': -16.784555435180664, 'ave_value': -19.630645187112815, 'soft_opc': nan} step=7810




2022-04-20 17:54.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175343/model_7810.pt


Epoch 23/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:54.15 [info     ] FQE_20220420175343: epoch=23 step=8165 epoch=23 metrics={'time_sample_batch': 0.00016459411298724966, 'time_algorithm_update': 0.003500770515119526, 'loss': 0.2412693580061617, 'time_step': 0.0037400494159107477, 'init_value': -17.536779403686523, 'ave_value': -20.456053728854794, 'soft_opc': nan} step=8165




2022-04-20 17:54.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175343/model_8165.pt


Epoch 24/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:54.17 [info     ] FQE_20220420175343: epoch=24 step=8520 epoch=24 metrics={'time_sample_batch': 0.00016203262436557822, 'time_algorithm_update': 0.0033936715461838412, 'loss': 0.25610841705765525, 'time_step': 0.0036294849825577, 'init_value': -18.159713745117188, 'ave_value': -21.28534875327249, 'soft_opc': nan} step=8520




2022-04-20 17:54.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175343/model_8520.pt


Epoch 25/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:54.18 [info     ] FQE_20220420175343: epoch=25 step=8875 epoch=25 metrics={'time_sample_batch': 0.00016115081142371808, 'time_algorithm_update': 0.003425027283144669, 'loss': 0.26532503353353115, 'time_step': 0.003658314154181682, 'init_value': -18.644758224487305, 'ave_value': -21.812533803949638, 'soft_opc': nan} step=8875




2022-04-20 17:54.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175343/model_8875.pt


Epoch 26/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:54.20 [info     ] FQE_20220420175343: epoch=26 step=9230 epoch=26 metrics={'time_sample_batch': 0.0001644349434006382, 'time_algorithm_update': 0.003469038681245186, 'loss': 0.2881976213935815, 'time_step': 0.0037051174002633966, 'init_value': -19.221630096435547, 'ave_value': -22.618206876708122, 'soft_opc': nan} step=9230




2022-04-20 17:54.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175343/model_9230.pt


Epoch 27/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:54.21 [info     ] FQE_20220420175343: epoch=27 step=9585 epoch=27 metrics={'time_sample_batch': 0.0001618365166892468, 'time_algorithm_update': 0.003406480332495461, 'loss': 0.2985212627693381, 'time_step': 0.0036374622667339485, 'init_value': -19.710599899291992, 'ave_value': -23.212669090322546, 'soft_opc': nan} step=9585




2022-04-20 17:54.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175343/model_9585.pt


Epoch 28/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:54.22 [info     ] FQE_20220420175343: epoch=28 step=9940 epoch=28 metrics={'time_sample_batch': 0.00016397623948647942, 'time_algorithm_update': 0.0034956173158027758, 'loss': 0.31297118545732866, 'time_step': 0.0037326275462835606, 'init_value': -20.378864288330078, 'ave_value': -23.915924332224723, 'soft_opc': nan} step=9940




2022-04-20 17:54.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175343/model_9940.pt


Epoch 29/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:54.24 [info     ] FQE_20220420175343: epoch=29 step=10295 epoch=29 metrics={'time_sample_batch': 0.00016295674821020851, 'time_algorithm_update': 0.0034371611098168602, 'loss': 0.3314258795913676, 'time_step': 0.003675442010584012, 'init_value': -20.702791213989258, 'ave_value': -24.15904393429308, 'soft_opc': nan} step=10295




2022-04-20 17:54.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175343/model_10295.pt


Epoch 30/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:54.25 [info     ] FQE_20220420175343: epoch=30 step=10650 epoch=30 metrics={'time_sample_batch': 0.00016374789493184695, 'time_algorithm_update': 0.0034578202475964185, 'loss': 0.3474541001913833, 'time_step': 0.0036932266933817257, 'init_value': -21.34501838684082, 'ave_value': -24.95426161948932, 'soft_opc': nan} step=10650




2022-04-20 17:54.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175343/model_10650.pt


Epoch 31/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:54.27 [info     ] FQE_20220420175343: epoch=31 step=11005 epoch=31 metrics={'time_sample_batch': 0.00016081299580318826, 'time_algorithm_update': 0.003423780790517028, 'loss': 0.36260485503770096, 'time_step': 0.0036580643183748486, 'init_value': -22.32389259338379, 'ave_value': -26.1563647507087, 'soft_opc': nan} step=11005




2022-04-20 17:54.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175343/model_11005.pt


Epoch 32/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:54.28 [info     ] FQE_20220420175343: epoch=32 step=11360 epoch=32 metrics={'time_sample_batch': 0.0001598935731699769, 'time_algorithm_update': 0.003473228803822692, 'loss': 0.39139819290016736, 'time_step': 0.0037022449600864465, 'init_value': -22.599685668945312, 'ave_value': -26.77359616980436, 'soft_opc': nan} step=11360




2022-04-20 17:54.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175343/model_11360.pt


Epoch 33/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:54.29 [info     ] FQE_20220420175343: epoch=33 step=11715 epoch=33 metrics={'time_sample_batch': 0.00016335366477429028, 'time_algorithm_update': 0.0034870241729306503, 'loss': 0.40703637530161463, 'time_step': 0.00372120158773073, 'init_value': -22.992849349975586, 'ave_value': -27.13260189900834, 'soft_opc': nan} step=11715




2022-04-20 17:54.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175343/model_11715.pt


Epoch 34/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:54.31 [info     ] FQE_20220420175343: epoch=34 step=12070 epoch=34 metrics={'time_sample_batch': 0.00016055241437025472, 'time_algorithm_update': 0.0034293671728859486, 'loss': 0.4167830929947151, 'time_step': 0.0036637554705982477, 'init_value': -23.170482635498047, 'ave_value': -27.37508782904734, 'soft_opc': nan} step=12070




2022-04-20 17:54.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175343/model_12070.pt


Epoch 35/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:54.32 [info     ] FQE_20220420175343: epoch=35 step=12425 epoch=35 metrics={'time_sample_batch': 0.00016144161493005887, 'time_algorithm_update': 0.003456123110274194, 'loss': 0.43197114802896974, 'time_step': 0.003689678621963716, 'init_value': -23.571990966796875, 'ave_value': -28.2169814821069, 'soft_opc': nan} step=12425




2022-04-20 17:54.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175343/model_12425.pt


Epoch 36/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:54.34 [info     ] FQE_20220420175343: epoch=36 step=12780 epoch=36 metrics={'time_sample_batch': 0.0001608989608119911, 'time_algorithm_update': 0.0034564152569837975, 'loss': 0.44957715675864424, 'time_step': 0.003690000319145095, 'init_value': -23.712100982666016, 'ave_value': -28.634892223915386, 'soft_opc': nan} step=12780




2022-04-20 17:54.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175343/model_12780.pt


Epoch 37/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:54.35 [info     ] FQE_20220420175343: epoch=37 step=13135 epoch=37 metrics={'time_sample_batch': 0.0001636095449958049, 'time_algorithm_update': 0.003538720036896182, 'loss': 0.4573452042232097, 'time_step': 0.003775657062799158, 'init_value': -24.37014389038086, 'ave_value': -29.430509476986938, 'soft_opc': nan} step=13135




2022-04-20 17:54.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175343/model_13135.pt


Epoch 38/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:54.36 [info     ] FQE_20220420175343: epoch=38 step=13490 epoch=38 metrics={'time_sample_batch': 0.00016236506717305787, 'time_algorithm_update': 0.00347717714981294, 'loss': 0.4847411920434572, 'time_step': 0.0037123082389294263, 'init_value': -24.407230377197266, 'ave_value': -29.86443404414939, 'soft_opc': nan} step=13490




2022-04-20 17:54.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175343/model_13490.pt


Epoch 39/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:54.38 [info     ] FQE_20220420175343: epoch=39 step=13845 epoch=39 metrics={'time_sample_batch': 0.00016246043460469852, 'time_algorithm_update': 0.0034961673575387873, 'loss': 0.4972413453680109, 'time_step': 0.003731944527424557, 'init_value': -24.821287155151367, 'ave_value': -30.626315777151433, 'soft_opc': nan} step=13845




2022-04-20 17:54.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175343/model_13845.pt


Epoch 40/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:54.39 [info     ] FQE_20220420175343: epoch=40 step=14200 epoch=40 metrics={'time_sample_batch': 0.00016014475218007263, 'time_algorithm_update': 0.003455078769737566, 'loss': 0.5048068389139125, 'time_step': 0.003685881386340504, 'init_value': -24.386350631713867, 'ave_value': -30.51773972370143, 'soft_opc': nan} step=14200




2022-04-20 17:54.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175343/model_14200.pt


Epoch 41/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:54.41 [info     ] FQE_20220420175343: epoch=41 step=14555 epoch=41 metrics={'time_sample_batch': 0.000161560488418794, 'time_algorithm_update': 0.003953775217835332, 'loss': 0.50551089769308, 'time_step': 0.004186935827765666, 'init_value': -24.929885864257812, 'ave_value': -31.268119946532575, 'soft_opc': nan} step=14555




2022-04-20 17:54.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175343/model_14555.pt


Epoch 42/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:54.43 [info     ] FQE_20220420175343: epoch=42 step=14910 epoch=42 metrics={'time_sample_batch': 0.00016524959617937116, 'time_algorithm_update': 0.004068005924493494, 'loss': 0.5175815508661555, 'time_step': 0.004304828778119154, 'init_value': -25.002742767333984, 'ave_value': -31.6439564441622, 'soft_opc': nan} step=14910




2022-04-20 17:54.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175343/model_14910.pt


Epoch 43/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:54.44 [info     ] FQE_20220420175343: epoch=43 step=15265 epoch=43 metrics={'time_sample_batch': 0.00016723417899977994, 'time_algorithm_update': 0.004461054734780755, 'loss': 0.52858677035801, 'time_step': 0.004699793667860434, 'init_value': -25.288515090942383, 'ave_value': -32.17960753901124, 'soft_opc': nan} step=15265




2022-04-20 17:54.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175343/model_15265.pt


Epoch 44/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:54.46 [info     ] FQE_20220420175343: epoch=44 step=15620 epoch=44 metrics={'time_sample_batch': 0.00016587552889971665, 'time_algorithm_update': 0.004064283908252985, 'loss': 0.5418068816806649, 'time_step': 0.004302919414681448, 'init_value': -25.387210845947266, 'ave_value': -32.53102255689898, 'soft_opc': nan} step=15620




2022-04-20 17:54.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175343/model_15620.pt


Epoch 45/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:54.48 [info     ] FQE_20220420175343: epoch=45 step=15975 epoch=45 metrics={'time_sample_batch': 0.00016670428531270632, 'time_algorithm_update': 0.004460752514046682, 'loss': 0.5567413918149303, 'time_step': 0.004701431032637475, 'init_value': -26.08265495300293, 'ave_value': -33.3776820490259, 'soft_opc': nan} step=15975




2022-04-20 17:54.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175343/model_15975.pt


Epoch 46/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:54.49 [info     ] FQE_20220420175343: epoch=46 step=16330 epoch=46 metrics={'time_sample_batch': 0.000166419526221047, 'time_algorithm_update': 0.004108251652247469, 'loss': 0.5713567373565804, 'time_step': 0.004347828072561345, 'init_value': -26.35146141052246, 'ave_value': -34.01047706634689, 'soft_opc': nan} step=16330




2022-04-20 17:54.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175343/model_16330.pt


Epoch 47/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:54.51 [info     ] FQE_20220420175343: epoch=47 step=16685 epoch=47 metrics={'time_sample_batch': 0.00016335903758734046, 'time_algorithm_update': 0.004085726805136238, 'loss': 0.5750054156958637, 'time_step': 0.004321112431270976, 'init_value': -26.175771713256836, 'ave_value': -34.218376768509856, 'soft_opc': nan} step=16685




2022-04-20 17:54.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175343/model_16685.pt


Epoch 48/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:54.53 [info     ] FQE_20220420175343: epoch=48 step=17040 epoch=48 metrics={'time_sample_batch': 0.00016687957333846832, 'time_algorithm_update': 0.004469686830547494, 'loss': 0.6027353087728712, 'time_step': 0.004709443911700181, 'init_value': -26.86279296875, 'ave_value': -35.10805587842197, 'soft_opc': nan} step=17040




2022-04-20 17:54.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175343/model_17040.pt


Epoch 49/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:54.54 [info     ] FQE_20220420175343: epoch=49 step=17395 epoch=49 metrics={'time_sample_batch': 0.00016317837674852828, 'time_algorithm_update': 0.004063362470814879, 'loss': 0.6174464220627093, 'time_step': 0.004299431787410253, 'init_value': -26.684009552001953, 'ave_value': -35.25218006671626, 'soft_opc': nan} step=17395




2022-04-20 17:54.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175343/model_17395.pt


Epoch 50/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 17:54.56 [info     ] FQE_20220420175343: epoch=50 step=17750 epoch=50 metrics={'time_sample_batch': 0.00016548398514868508, 'time_algorithm_update': 0.004428542499810877, 'loss': 0.6227425640894914, 'time_step': 0.0046684224840620875, 'init_value': -26.469449996948242, 'ave_value': -35.37373668782346, 'soft_opc': nan} step=17750




2022-04-20 17:54.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420175343/model_17750.pt
search iteration:  16
using hyper params:  [0.0023476643550042068, 0.002304921855695497, 6.798724590673883e-05, 1]
2022-04-20 17:54.56 [debug    ] RoundIterator is selected.
2022-04-20 17:54.56 [info     ] Directory is created at d3rlpy_logs/CQL_20220420175456
2022-04-20 17:54.56 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 17:54.56 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 17:54.56 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220420175456/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0023476643550042068, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, '

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:55.05 [info     ] CQL_20220420175456: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.00030422350119429023, 'time_algorithm_update': 0.024989588218822814, 'temp_loss': 4.782679690255059, 'temp': 0.988556851943334, 'alpha_loss': -15.08374057178609, 'alpha': 1.0166022746186507, 'critic_loss': 19.803045183594463, 'actor_loss': -1.673491797252008, 'time_step': 0.025390058233026872, 'td_error': 2.522420382064523, 'init_value': -0.162954181432724, 'ave_value': 0.7791184552799205} step=342
2022-04-20 17:55.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420175456/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:55.15 [info     ] CQL_20220420175456: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.0003075041966131556, 'time_algorithm_update': 0.025432730975903962, 'temp_loss': 4.232503315858674, 'temp': 0.966916215872904, 'alpha_loss': -9.454465401800055, 'alpha': 1.0455847731110646, 'critic_loss': 18.574754422171075, 'actor_loss': -0.8613578528928304, 'time_step': 0.025836908329300017, 'td_error': 3.5468359297824854, 'init_value': -1.8069175481796265, 'ave_value': 0.6791653373830758} step=684
2022-04-20 17:55.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420175456/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:55.24 [info     ] CQL_20220420175456: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.0003115384899384794, 'time_algorithm_update': 0.025265485919707002, 'temp_loss': 3.4896492191225463, 'temp': 0.9475475886063269, 'alpha_loss': -5.338622035338864, 'alpha': 1.0670947171094125, 'critic_loss': 29.26822679084644, 'actor_loss': 0.18212414976231187, 'time_step': 0.025677337283976594, 'td_error': 3.4424464930453635, 'init_value': -3.1365675926208496, 'ave_value': 0.8270459160181853} step=1026
2022-04-20 17:55.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420175456/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:55.33 [info     ] CQL_20220420175456: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.0003150973403662966, 'time_algorithm_update': 0.02569451109010574, 'temp_loss': 2.925271763439067, 'temp': 0.9302948727245219, 'alpha_loss': -2.5659729368679707, 'alpha': 1.0818271706675926, 'critic_loss': 43.83940628676387, 'actor_loss': 1.0816513908188246, 'time_step': 0.026109323166964345, 'td_error': 4.332140253911116, 'init_value': -5.107811450958252, 'ave_value': 0.4170525034913072} step=1368
2022-04-20 17:55.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420175456/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:55.42 [info     ] CQL_20220420175456: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.0003094958980181064, 'time_algorithm_update': 0.025611059707507752, 'temp_loss': 2.5111649085206595, 'temp': 0.9143259047764783, 'alpha_loss': -0.4538178816563668, 'alpha': 1.0888478972061335, 'critic_loss': 59.04941295601471, 'actor_loss': 1.877776355945576, 'time_step': 0.026018427826507748, 'td_error': 5.031227029558929, 'init_value': -6.601534366607666, 'ave_value': 0.2296558348552601} step=1710
2022-04-20 17:55.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420175456/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:55.52 [info     ] CQL_20220420175456: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.0003122181920280233, 'time_algorithm_update': 0.02516954405265942, 'temp_loss': 2.1897192590417918, 'temp': 0.8992859421417727, 'alpha_loss': 1.1629060172633818, 'alpha': 1.086232580636677, 'critic_loss': 74.47442140634995, 'actor_loss': 2.6259506071171566, 'time_step': 0.025579234312849437, 'td_error': 5.45622480057199, 'init_value': -8.951489448547363, 'ave_value': -0.3802549210284744} step=2052
2022-04-20 17:55.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420175456/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:56.00 [info     ] CQL_20220420175456: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.0002868050023129112, 'time_algorithm_update': 0.024191415100766903, 'temp_loss': 1.9221191169225682, 'temp': 0.8848961398266909, 'alpha_loss': 2.438791502920682, 'alpha': 1.0734666812489604, 'critic_loss': 90.38385288617765, 'actor_loss': 3.395162200892878, 'time_step': 0.024568770364014027, 'td_error': 5.950546801793579, 'init_value': -10.808816909790039, 'ave_value': -0.21513794649023193} step=2394
2022-04-20 17:56.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420175456/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:56.10 [info     ] CQL_20220420175456: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.00031024113036038584, 'time_algorithm_update': 0.02542850915451496, 'temp_loss': 1.7042914897377728, 'temp': 0.8710638939985755, 'alpha_loss': 3.417275568372325, 'alpha': 1.0499535359834369, 'critic_loss': 106.87420080976877, 'actor_loss': 4.149419967455474, 'time_step': 0.025837809718840303, 'td_error': 7.00998536095981, 'init_value': -12.735051155090332, 'ave_value': -0.6549142162219899} step=2736
2022-04-20 17:56.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420175456/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:56.19 [info     ] CQL_20220420175456: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.0002922405276382179, 'time_algorithm_update': 0.02458291304738898, 'temp_loss': 1.5137013029633908, 'temp': 0.8577531316475562, 'alpha_loss': 4.15417009104065, 'alpha': 1.0183907642699124, 'critic_loss': 123.30724296792906, 'actor_loss': 4.879288880797158, 'time_step': 0.024967128770393237, 'td_error': 8.013212372480796, 'init_value': -14.69672966003418, 'ave_value': -1.3654616700367885} step=3078
2022-04-20 17:56.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420175456/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:56.28 [info     ] CQL_20220420175456: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.0003133538173653229, 'time_algorithm_update': 0.02537800484930563, 'temp_loss': 1.318467333477143, 'temp': 0.8448819096325434, 'alpha_loss': 4.660123499164804, 'alpha': 0.9835805938257809, 'critic_loss': 139.4796205041004, 'actor_loss': 5.57346889359212, 'time_step': 0.025789720273157308, 'td_error': 9.046751474249607, 'init_value': -17.12807273864746, 'ave_value': -1.887111376384357} step=3420
2022-04-20 17:56.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420175456/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:56.37 [info     ] CQL_20220420175456: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.00030745191183703685, 'time_algorithm_update': 0.025634890411332337, 'temp_loss': 1.1786032395753248, 'temp': 0.832481623915901, 'alpha_loss': 5.000076736623083, 'alpha': 0.9474384979894984, 'critic_loss': 155.029002942537, 'actor_loss': 6.202029745829733, 'time_step': 0.02604020826997813, 'td_error': 8.925702700519754, 'init_value': -18.074602127075195, 'ave_value': -2.207163097987304} step=3762
2022-04-20 17:56.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420175456/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:56.46 [info     ] CQL_20220420175456: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.00030729645176937704, 'time_algorithm_update': 0.025306628461469683, 'temp_loss': 1.0507476003895029, 'temp': 0.8204296544978493, 'alpha_loss': 5.255702734690661, 'alpha': 0.912258336592836, 'critic_loss': 169.82972097118, 'actor_loss': 6.875861624528093, 'time_step': 0.025712886051824917, 'td_error': 9.705439945929168, 'init_value': -21.36220932006836, 'ave_value': -3.309609425384719} step=4104
2022-04-20 17:56.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420175456/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:56.56 [info     ] CQL_20220420175456: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.0003133817025792529, 'time_algorithm_update': 0.025804131351716338, 'temp_loss': 0.9120313996634288, 'temp': 0.8088600762754853, 'alpha_loss': 5.438294409311306, 'alpha': 0.8784630805776831, 'critic_loss': 184.1628271180984, 'actor_loss': 7.514332624206766, 'time_step': 0.026217961171914262, 'td_error': 9.7224260595167, 'init_value': -22.01363754272461, 'ave_value': -2.318991598696322} step=4446
2022-04-20 17:56.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420175456/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:57.05 [info     ] CQL_20220420175456: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.0003149000524777418, 'time_algorithm_update': 0.02565267769216794, 'temp_loss': 0.8027103783791525, 'temp': 0.7976405387036285, 'alpha_loss': 5.6075434245561295, 'alpha': 0.8464491423110516, 'critic_loss': 198.6402706123932, 'actor_loss': 8.190006682747288, 'time_step': 0.026064503959744995, 'td_error': 11.348712678820451, 'init_value': -25.517292022705078, 'ave_value': -4.12534039611752} step=4788
2022-04-20 17:57.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420175456/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:57.15 [info     ] CQL_20220420175456: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.00031078140638027973, 'time_algorithm_update': 0.025851310345164517, 'temp_loss': 0.6975302068810714, 'temp': 0.7867953930681909, 'alpha_loss': 5.712718186322709, 'alpha': 0.8159476872424634, 'critic_loss': 212.09525763082226, 'actor_loss': 8.734655019832633, 'time_step': 0.026262991609629135, 'td_error': 11.652589649729054, 'init_value': -25.72402000427246, 'ave_value': -3.743477433603089} step=5130
2022-04-20 17:57.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420175456/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:57.24 [info     ] CQL_20220420175456: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.0003086558559484649, 'time_algorithm_update': 0.02551023235097963, 'temp_loss': 0.606869105325892, 'temp': 0.7765265642202388, 'alpha_loss': 5.887513436769185, 'alpha': 0.7866975332212727, 'critic_loss': 225.91070355867086, 'actor_loss': 9.503910267562198, 'time_step': 0.025916713023046305, 'td_error': 11.977236629213188, 'init_value': -27.755435943603516, 'ave_value': -4.505707142455083} step=5472
2022-04-20 17:57.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420175456/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:57.33 [info     ] CQL_20220420175456: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.0003129404190688105, 'time_algorithm_update': 0.025629521810520462, 'temp_loss': 0.5098840629366058, 'temp': 0.7668067119274921, 'alpha_loss': 5.939307585097196, 'alpha': 0.7586505822619499, 'critic_loss': 239.4914175111648, 'actor_loss': 10.07813323032089, 'time_step': 0.02604223622216119, 'td_error': 13.087917343400065, 'init_value': -31.046707153320312, 'ave_value': -5.732144789964229} step=5814
2022-04-20 17:57.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420175456/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:57.42 [info     ] CQL_20220420175456: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.0003126260132817497, 'time_algorithm_update': 0.02461563774019654, 'temp_loss': 0.43686754866475946, 'temp': 0.7574434346622891, 'alpha_loss': 5.944221451965689, 'alpha': 0.7322230431420064, 'critic_loss': 252.41702640823453, 'actor_loss': 10.70736757635373, 'time_step': 0.025030624099642212, 'td_error': 13.26741436323173, 'init_value': -31.637928009033203, 'ave_value': -5.148848159130629} step=6156
2022-04-20 17:57.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420175456/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:57.51 [info     ] CQL_20220420175456: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.00030941224237631635, 'time_algorithm_update': 0.024040418758726957, 'temp_loss': 0.3742945098171109, 'temp': 0.7487360380197826, 'alpha_loss': 5.92517772334361, 'alpha': 0.7069772748570693, 'critic_loss': 265.52014820478115, 'actor_loss': 11.293670491168374, 'time_step': 0.024447061165034423, 'td_error': 14.091020143691436, 'init_value': -33.480464935302734, 'ave_value': -5.6070374034063235} step=6498
2022-04-20 17:57.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420175456/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:58.00 [info     ] CQL_20220420175456: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.00031049558293749714, 'time_algorithm_update': 0.024034695318567823, 'temp_loss': 0.3128496094808331, 'temp': 0.7406632098538136, 'alpha_loss': 5.892295633143152, 'alpha': 0.6828405895428351, 'critic_loss': 277.25861586185925, 'actor_loss': 11.881870407807199, 'time_step': 0.024445023453026488, 'td_error': 14.52102163300987, 'init_value': -35.03742218017578, 'ave_value': -6.088162449780885} step=6840
2022-04-20 17:58.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420175456/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:58.08 [info     ] CQL_20220420175456: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.0003084676307544374, 'time_algorithm_update': 0.02421974578098944, 'temp_loss': 0.2573033852491811, 'temp': 0.7329340962290066, 'alpha_loss': 5.8383744439186405, 'alpha': 0.6599227338157899, 'critic_loss': 289.10021954809713, 'actor_loss': 12.485124250601606, 'time_step': 0.024627289576837193, 'td_error': 15.742659967023426, 'init_value': -37.40996170043945, 'ave_value': -6.973741790072338} step=7182
2022-04-20 17:58.08 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420175456/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:58.17 [info     ] CQL_20220420175456: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.0003106893851743107, 'time_algorithm_update': 0.024130807982550725, 'temp_loss': 0.20811111286108258, 'temp': 0.7259161693659443, 'alpha_loss': 5.751403160262526, 'alpha': 0.6381123677680367, 'critic_loss': 301.22004913865476, 'actor_loss': 13.070175191812348, 'time_step': 0.024539536899990506, 'td_error': 16.428317864532975, 'init_value': -38.88530731201172, 'ave_value': -7.167550062182787} step=7524
2022-04-20 17:58.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420175456/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:58.26 [info     ] CQL_20220420175456: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.00031072912160416097, 'time_algorithm_update': 0.02383296880108571, 'temp_loss': 0.15708143799429575, 'temp': 0.7198117176691691, 'alpha_loss': 5.6691382921230025, 'alpha': 0.6172336941225487, 'critic_loss': 311.57952889782644, 'actor_loss': 13.581282020312303, 'time_step': 0.024241986330489667, 'td_error': 17.466216279457036, 'init_value': -40.385009765625, 'ave_value': -7.2788914910146785} step=7866
2022-04-20 17:58.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420175456/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:58.35 [info     ] CQL_20220420175456: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.0003078625216121562, 'time_algorithm_update': 0.02418604859134607, 'temp_loss': 0.12637980923884445, 'temp': 0.7144779456637756, 'alpha_loss': 5.578065410692092, 'alpha': 0.5971274926648502, 'critic_loss': 321.85229831271704, 'actor_loss': 14.076462525373314, 'time_step': 0.02459190184609932, 'td_error': 17.28821636084961, 'init_value': -42.75004196166992, 'ave_value': -8.869062417777808} step=8208
2022-04-20 17:58.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420175456/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:58.44 [info     ] CQL_20220420175456: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.0003125576945076212, 'time_algorithm_update': 0.02422549431784111, 'temp_loss': 0.10620631942558184, 'temp': 0.7095105553928175, 'alpha_loss': 5.480935736009252, 'alpha': 0.5778815673108686, 'critic_loss': 332.40000023200497, 'actor_loss': 14.61461977930794, 'time_step': 0.024638927470870882, 'td_error': 18.120246371709584, 'init_value': -43.32695770263672, 'ave_value': -7.996968624656265} step=8550
2022-04-20 17:58.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420175456/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:58.52 [info     ] CQL_20220420175456: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.00031099891104893377, 'time_algorithm_update': 0.02425489718453926, 'temp_loss': 0.04294481796719003, 'temp': 0.7056994793707865, 'alpha_loss': 5.405476875472487, 'alpha': 0.5594092373959503, 'critic_loss': 341.8824770743387, 'actor_loss': 15.055088996887207, 'time_step': 0.024665866678918315, 'td_error': 18.909230901408517, 'init_value': -44.899696350097656, 'ave_value': -9.38320683354193} step=8892
2022-04-20 17:58.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420175456/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:59.01 [info     ] CQL_20220420175456: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.00030728111490171554, 'time_algorithm_update': 0.024301053487766556, 'temp_loss': 0.043092996557379325, 'temp': 0.7041779268554776, 'alpha_loss': 5.2752063846030435, 'alpha': 0.5414205444486517, 'critic_loss': 351.6126639382881, 'actor_loss': 15.559374499739262, 'time_step': 0.02470643408814369, 'td_error': 19.627473803056354, 'init_value': -47.15467834472656, 'ave_value': -9.274929281795346} step=9234
2022-04-20 17:59.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420175456/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:59.10 [info     ] CQL_20220420175456: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.0003098451603225797, 'time_algorithm_update': 0.024233306360523604, 'temp_loss': 0.009606936766782351, 'temp': 0.7020639900924169, 'alpha_loss': 5.183671885066563, 'alpha': 0.5242898833681966, 'critic_loss': 361.2217980100397, 'actor_loss': 16.108135396277, 'time_step': 0.02464062568039922, 'td_error': 20.979993326110264, 'init_value': -48.66619110107422, 'ave_value': -9.560055652674254} step=9576
2022-04-20 17:59.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420175456/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:59.19 [info     ] CQL_20220420175456: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.00031124220954047307, 'time_algorithm_update': 0.024147045542622172, 'temp_loss': 0.02865500436627377, 'temp': 0.7011025795462535, 'alpha_loss': 5.072556584899188, 'alpha': 0.5077082334380401, 'critic_loss': 370.33949520713406, 'actor_loss': 16.454484657934536, 'time_step': 0.02455984082138329, 'td_error': 21.564396512050738, 'init_value': -49.424102783203125, 'ave_value': -9.207153867536837} step=9918
2022-04-20 17:59.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420175456/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:59.28 [info     ] CQL_20220420175456: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.0003089835072121425, 'time_algorithm_update': 0.02416280975118715, 'temp_loss': 0.0075873734091806136, 'temp': 0.6999316469967714, 'alpha_loss': 4.934747978260643, 'alpha': 0.4916815889334818, 'critic_loss': 379.99207194367347, 'actor_loss': 16.927168352562084, 'time_step': 0.024568532642565276, 'td_error': 21.736338594771517, 'init_value': -52.153343200683594, 'ave_value': -10.792688873066558} step=10260
2022-04-20 17:59.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420175456/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:59.37 [info     ] CQL_20220420175456: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.00031309657626681856, 'time_algorithm_update': 0.025169315393905194, 'temp_loss': 0.003799173114984705, 'temp': 0.6999291805495993, 'alpha_loss': 4.808952523253814, 'alpha': 0.47627581281271597, 'critic_loss': 389.1661351075646, 'actor_loss': 17.42439353535747, 'time_step': 0.025583999198779725, 'td_error': 22.850463748096313, 'init_value': -53.1837272644043, 'ave_value': -10.611360916892687} step=10602
2022-04-20 17:59.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420175456/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:59.46 [info     ] CQL_20220420175456: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.00030848017910070587, 'time_algorithm_update': 0.025231353023595977, 'temp_loss': -0.003353048551675172, 'temp': 0.6996303815939273, 'alpha_loss': 4.680591640416642, 'alpha': 0.461546448849098, 'critic_loss': 397.91442532009546, 'actor_loss': 17.855706195385135, 'time_step': 0.025636508450870624, 'td_error': 23.355541323929184, 'init_value': -55.16776657104492, 'ave_value': -11.290640143816535} step=10944
2022-04-20 17:59.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420175456/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 17:59.55 [info     ] CQL_20220420175456: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.00031321578555636935, 'time_algorithm_update': 0.025200501520034165, 'temp_loss': -0.00034935685822315383, 'temp': 0.6997950258310776, 'alpha_loss': 4.552297664664642, 'alpha': 0.4472239626604214, 'critic_loss': 406.3227277610734, 'actor_loss': 18.228532462092172, 'time_step': 0.02560890139194957, 'td_error': 24.242241891644483, 'init_value': -56.569244384765625, 'ave_value': -11.2356207501405} step=11286
2022-04-20 17:59.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420175456/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:00.04 [info     ] CQL_20220420175456: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.00031626642796031215, 'time_algorithm_update': 0.0252857047911973, 'temp_loss': -0.028159116362139844, 'temp': 0.7003834880234903, 'alpha_loss': 4.443626838817931, 'alpha': 0.4333733011756027, 'critic_loss': 415.9601297657392, 'actor_loss': 18.730964841898423, 'time_step': 0.025703531956812093, 'td_error': 25.024414438942095, 'init_value': -57.801177978515625, 'ave_value': -11.761743214302236} step=11628
2022-04-20 18:00.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420175456/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:00.14 [info     ] CQL_20220420175456: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.0003174146016438802, 'time_algorithm_update': 0.027558494032474987, 'temp_loss': -0.02739616698035371, 'temp': 0.7023280886878744, 'alpha_loss': 4.335144726853621, 'alpha': 0.41999183150759917, 'critic_loss': 423.87776915790045, 'actor_loss': 19.023120675867762, 'time_step': 0.027977139629118623, 'td_error': 25.301694615931094, 'init_value': -59.36865234375, 'ave_value': -12.001128822983922} step=11970
2022-04-20 18:00.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420175456/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:00.24 [info     ] CQL_20220420175456: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.00031813473729362267, 'time_algorithm_update': 0.027052373216863265, 'temp_loss': -0.012434905038177096, 'temp': 0.7044666087069706, 'alpha_loss': 4.21871989163739, 'alpha': 0.40697561683710554, 'critic_loss': 432.79644704004477, 'actor_loss': 19.519255507062052, 'time_step': 0.027475507635819286, 'td_error': 26.52714389404755, 'init_value': -60.369117736816406, 'ave_value': -12.350021847266335} step=12312
2022-04-20 18:00.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420175456/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:00.34 [info     ] CQL_20220420175456: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.00031755960475631623, 'time_algorithm_update': 0.02738400091204727, 'temp_loss': 0.0049668430072958005, 'temp': 0.7053979321529991, 'alpha_loss': 4.085510766994187, 'alpha': 0.3944517564878129, 'critic_loss': 442.07595236259596, 'actor_loss': 19.97004279337431, 'time_step': 0.02780299158821329, 'td_error': 27.630598612624578, 'init_value': -61.56553268432617, 'ave_value': -12.292563347113026} step=12654
2022-04-20 18:00.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420175456/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:00.44 [info     ] CQL_20220420175456: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.00031372817636233325, 'time_algorithm_update': 0.025860577996014156, 'temp_loss': -0.019687556585416807, 'temp': 0.705459010357048, 'alpha_loss': 3.9587617624572844, 'alpha': 0.38245010158123327, 'critic_loss': 449.5741524501154, 'actor_loss': 20.197011451275028, 'time_step': 0.026274139421028003, 'td_error': 28.204066683297004, 'init_value': -63.90299606323242, 'ave_value': -13.540056881496499} step=12996
2022-04-20 18:00.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420175456/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:00.53 [info     ] CQL_20220420175456: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.0003206695032398603, 'time_algorithm_update': 0.026115061246860794, 'temp_loss': 0.013366182568913315, 'temp': 0.7055115060150972, 'alpha_loss': 3.8437517609512595, 'alpha': 0.3707638734091095, 'critic_loss': 458.8114245676855, 'actor_loss': 20.701538596236915, 'time_step': 0.02653626531188251, 'td_error': 28.584171150720298, 'init_value': -64.53746795654297, 'ave_value': -13.421250667013563} step=13338
2022-04-20 18:00.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420175456/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:01.03 [info     ] CQL_20220420175456: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.0003122063408121031, 'time_algorithm_update': 0.0258759169550667, 'temp_loss': -0.000724655772118192, 'temp': 0.705206872252693, 'alpha_loss': 3.732672037436948, 'alpha': 0.359482005238533, 'critic_loss': 466.90407307942706, 'actor_loss': 21.07229095174555, 'time_step': 0.026288390159606934, 'td_error': 29.50537684611958, 'init_value': -66.9576187133789, 'ave_value': -14.184724364656585} step=13680
2022-04-20 18:01.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420175456/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:01.12 [info     ] CQL_20220420175456: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.00031485613326580206, 'time_algorithm_update': 0.02592335597813478, 'temp_loss': -0.004367345383074897, 'temp': 0.7050720315579085, 'alpha_loss': 3.6140595625715646, 'alpha': 0.34853091374126793, 'critic_loss': 475.78684488932294, 'actor_loss': 21.5279250340155, 'time_step': 0.026337509266814295, 'td_error': 30.266741332294544, 'init_value': -67.2114028930664, 'ave_value': -13.96775963913213} step=14022
2022-04-20 18:01.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420175456/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:01.22 [info     ] CQL_20220420175456: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.00031668052338717275, 'time_algorithm_update': 0.026195188711958323, 'temp_loss': 0.015377234038488384, 'temp': 0.7053556980793936, 'alpha_loss': 3.4946959740237187, 'alpha': 0.3379549954021186, 'critic_loss': 483.6733953464798, 'actor_loss': 21.863352357295522, 'time_step': 0.02661264709561889, 'td_error': 31.062998081566516, 'init_value': -67.94532775878906, 'ave_value': -13.411571071072741} step=14364
2022-04-20 18:01.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420175456/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:01.31 [info     ] CQL_20220420175456: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.0003162566681354367, 'time_algorithm_update': 0.025945625109979283, 'temp_loss': -0.01687426017107148, 'temp': 0.7051818710670137, 'alpha_loss': 3.4042225490536606, 'alpha': 0.3276780306944373, 'critic_loss': 490.5279037743284, 'actor_loss': 22.173867013719345, 'time_step': 0.02636363352948462, 'td_error': 31.795527535498856, 'init_value': -71.45750427246094, 'ave_value': -15.308319903085899} step=14706
2022-04-20 18:01.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420175456/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:01.41 [info     ] CQL_20220420175456: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.0003181772622448659, 'time_algorithm_update': 0.02628957458406861, 'temp_loss': 0.0024311596593051626, 'temp': 0.7056248679495695, 'alpha_loss': 3.2850105257062188, 'alpha': 0.3177884266390438, 'critic_loss': 497.60857467205204, 'actor_loss': 22.529602945896617, 'time_step': 0.026710743792572912, 'td_error': 32.533719125032675, 'init_value': -71.07569885253906, 'ave_value': -14.316830643340275} step=15048
2022-04-20 18:01.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420175456/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:01.50 [info     ] CQL_20220420175456: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.00032179536875228434, 'time_algorithm_update': 0.025810984143039638, 'temp_loss': 0.02789407328577244, 'temp': 0.7041373031529766, 'alpha_loss': 3.1777868720523097, 'alpha': 0.30820128975207345, 'critic_loss': 505.6945871275071, 'actor_loss': 22.885228460992288, 'time_step': 0.02623463095280162, 'td_error': 33.1403477237843, 'init_value': -73.94864654541016, 'ave_value': -15.644662290591377} step=15390
2022-04-20 18:01.50 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420175456/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:02.00 [info     ] CQL_20220420175456: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.0003158586067065858, 'time_algorithm_update': 0.026256911238731695, 'temp_loss': 0.02957493311872608, 'temp': 0.702147510665202, 'alpha_loss': 3.0811949696457175, 'alpha': 0.29887664884503123, 'critic_loss': 514.0972799557692, 'actor_loss': 23.29978084285357, 'time_step': 0.026673998749046995, 'td_error': 34.03868838668996, 'init_value': -72.81657409667969, 'ave_value': -15.093106440735292} step=15732
2022-04-20 18:02.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420175456/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:02.09 [info     ] CQL_20220420175456: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.0003150861862807246, 'time_algorithm_update': 0.026009667686551635, 'temp_loss': 0.027687215284384482, 'temp': 0.7001534660308681, 'alpha_loss': 2.9819936016846817, 'alpha': 0.28983881294030195, 'critic_loss': 521.2340103952508, 'actor_loss': 23.59879021059003, 'time_step': 0.02642387674565901, 'td_error': 34.28939685520419, 'init_value': -75.3016357421875, 'ave_value': -15.873270285800771} step=16074
2022-04-20 18:02.09 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420175456/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:02.18 [info     ] CQL_20220420175456: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.00031331826371756215, 'time_algorithm_update': 0.025928115984152632, 'temp_loss': 0.012487130718883018, 'temp': 0.6983013228017684, 'alpha_loss': 2.885515684621376, 'alpha': 0.28116951490703385, 'critic_loss': 527.9085902163857, 'actor_loss': 23.96981482477913, 'time_step': 0.02634088895474261, 'td_error': 35.07873602048866, 'init_value': -76.96500396728516, 'ave_value': -16.344047350588145} step=16416
2022-04-20 18:02.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420175456/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:02.28 [info     ] CQL_20220420175456: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.00032107174745080066, 'time_algorithm_update': 0.02622896188880965, 'temp_loss': 0.04796048230900053, 'temp': 0.6953211229795601, 'alpha_loss': 2.7794612541533352, 'alpha': 0.2727179636209332, 'critic_loss': 536.1727136087696, 'actor_loss': 24.29687615723638, 'time_step': 0.026651743559809458, 'td_error': 35.724609474395194, 'init_value': -77.85074615478516, 'ave_value': -16.619206912130924} step=16758
2022-04-20 18:02.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420175456/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:02.37 [info     ] CQL_20220420175456: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.0003178245142886513, 'time_algorithm_update': 0.025930321704574495, 'temp_loss': 0.045732739125514586, 'temp': 0.6924000943613331, 'alpha_loss': 2.689366673516948, 'alpha': 0.2645307727027358, 'critic_loss': 542.7220163624189, 'actor_loss': 24.571555422063458, 'time_step': 0.026347298371164424, 'td_error': 36.61232093245577, 'init_value': -79.39083099365234, 'ave_value': -17.142334294705776} step=17100
2022-04-20 18:02.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420175456/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.5

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 18:02.39 [info     ] FQE_20220420180238: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00016039394470582525, 'time_algorithm_update': 0.004584657140524991, 'loss': 0.0052585862513844775, 'time_step': 0.004817671086414751, 'init_value': -0.14956197142601013, 'ave_value': -0.12682017342330099, 'soft_opc': nan} step=166




2022-04-20 18:02.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180238/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:02.39 [info     ] FQE_20220420180238: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00016018712376973717, 'time_algorithm_update': 0.004527538655752159, 'loss': 0.004044885106149118, 'time_step': 0.004758984209543251, 'init_value': -0.28345102071762085, 'ave_value': -0.22169170840685298, 'soft_opc': nan} step=332




2022-04-20 18:02.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180238/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:02.40 [info     ] FQE_20220420180238: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00015923919447933334, 'time_algorithm_update': 0.0038202455244868636, 'loss': 0.003764038319918271, 'time_step': 0.004049272422330925, 'init_value': -0.3281143605709076, 'ave_value': -0.2388735182872797, 'soft_opc': nan} step=498




2022-04-20 18:02.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180238/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:02.41 [info     ] FQE_20220420180238: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00016396591462284685, 'time_algorithm_update': 0.0046012257955160485, 'loss': 0.003976885807217963, 'time_step': 0.004842021379126124, 'init_value': -0.39729297161102295, 'ave_value': -0.2663728182002701, 'soft_opc': nan} step=664




2022-04-20 18:02.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180238/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:02.42 [info     ] FQE_20220420180238: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00016691885798810477, 'time_algorithm_update': 0.004622404833874071, 'loss': 0.004018052858412714, 'time_step': 0.004862878696028009, 'init_value': -0.48151350021362305, 'ave_value': -0.3188091451646173, 'soft_opc': nan} step=830




2022-04-20 18:02.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180238/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:02.43 [info     ] FQE_20220420180238: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00015723705291748047, 'time_algorithm_update': 0.0038027691553874188, 'loss': 0.003870913126303938, 'time_step': 0.00403052640248494, 'init_value': -0.5427471399307251, 'ave_value': -0.3483620811056614, 'soft_opc': nan} step=996




2022-04-20 18:02.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180238/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:02.44 [info     ] FQE_20220420180238: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00015890023794518896, 'time_algorithm_update': 0.004649275756744017, 'loss': 0.003921183068108891, 'time_step': 0.004880181278090879, 'init_value': -0.5877739787101746, 'ave_value': -0.3512636807428354, 'soft_opc': nan} step=1162




2022-04-20 18:02.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180238/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:02.44 [info     ] FQE_20220420180238: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.0001607300287269684, 'time_algorithm_update': 0.004632168505565229, 'loss': 0.003995048720950642, 'time_step': 0.004865211176585002, 'init_value': -0.663453996181488, 'ave_value': -0.3970183425339627, 'soft_opc': nan} step=1328




2022-04-20 18:02.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180238/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:02.45 [info     ] FQE_20220420180238: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00016369302588773062, 'time_algorithm_update': 0.004569136952779379, 'loss': 0.004001235344632338, 'time_step': 0.004810535764119711, 'init_value': -0.7153012156486511, 'ave_value': -0.41712146082406376, 'soft_opc': nan} step=1494




2022-04-20 18:02.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180238/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:02.46 [info     ] FQE_20220420180238: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.0001615400773933135, 'time_algorithm_update': 0.003931059894791569, 'loss': 0.004153921020621187, 'time_step': 0.004165861980024591, 'init_value': -0.8108834028244019, 'ave_value': -0.47709073422996845, 'soft_opc': nan} step=1660




2022-04-20 18:02.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180238/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:02.47 [info     ] FQE_20220420180238: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.0001653562109154391, 'time_algorithm_update': 0.0046094081488000345, 'loss': 0.004096566424536777, 'time_step': 0.004852924002222268, 'init_value': -0.8467415571212769, 'ave_value': -0.4997502449430055, 'soft_opc': nan} step=1826




2022-04-20 18:02.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180238/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:02.48 [info     ] FQE_20220420180238: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.0001627091901848115, 'time_algorithm_update': 0.004661957901644419, 'loss': 0.004235192000058042, 'time_step': 0.004899073796099927, 'init_value': -0.9236037731170654, 'ave_value': -0.5457768282609748, 'soft_opc': nan} step=1992




2022-04-20 18:02.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180238/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:02.49 [info     ] FQE_20220420180238: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00016237454242016896, 'time_algorithm_update': 0.0037721309317163675, 'loss': 0.004494785652383713, 'time_step': 0.0040132999420166016, 'init_value': -1.008643388748169, 'ave_value': -0.6048442779360591, 'soft_opc': nan} step=2158




2022-04-20 18:02.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180238/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:02.49 [info     ] FQE_20220420180238: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00016504885202430817, 'time_algorithm_update': 0.004601461341582149, 'loss': 0.004558180590687567, 'time_step': 0.004842532686440341, 'init_value': -1.0376447439193726, 'ave_value': -0.6057208073870824, 'soft_opc': nan} step=2324




2022-04-20 18:02.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180238/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:02.50 [info     ] FQE_20220420180238: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00016434077756950655, 'time_algorithm_update': 0.004652857780456543, 'loss': 0.0047236831896624205, 'time_step': 0.004890549613768796, 'init_value': -1.0796868801116943, 'ave_value': -0.6291366907793122, 'soft_opc': nan} step=2490




2022-04-20 18:02.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180238/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:02.51 [info     ] FQE_20220420180238: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.000162137560097568, 'time_algorithm_update': 0.004553086786385042, 'loss': 0.005039313330500196, 'time_step': 0.004790057618933988, 'init_value': -1.1311559677124023, 'ave_value': -0.6394562061473325, 'soft_opc': nan} step=2656




2022-04-20 18:02.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180238/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:02.52 [info     ] FQE_20220420180238: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00015988263739160746, 'time_algorithm_update': 0.0038542819310383625, 'loss': 0.005098991525914879, 'time_step': 0.0040865475872913035, 'init_value': -1.1973308324813843, 'ave_value': -0.6803586817271, 'soft_opc': nan} step=2822




2022-04-20 18:02.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180238/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:02.53 [info     ] FQE_20220420180238: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00016374616737825325, 'time_algorithm_update': 0.004582060388771884, 'loss': 0.005181126777610057, 'time_step': 0.0048207346215305555, 'init_value': -1.2318058013916016, 'ave_value': -0.6998216098258355, 'soft_opc': nan} step=2988




2022-04-20 18:02.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180238/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:02.54 [info     ] FQE_20220420180238: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00016936193029564548, 'time_algorithm_update': 0.004536281149071383, 'loss': 0.005265333594372833, 'time_step': 0.004780212080622294, 'init_value': -1.3083609342575073, 'ave_value': -0.741993047927951, 'soft_opc': nan} step=3154




2022-04-20 18:02.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180238/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:02.54 [info     ] FQE_20220420180238: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00016368297209222633, 'time_algorithm_update': 0.003946893186454314, 'loss': 0.0057509230240809185, 'time_step': 0.00418640332049634, 'init_value': -1.4336662292480469, 'ave_value': -0.8356747623264521, 'soft_opc': nan} step=3320




2022-04-20 18:02.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180238/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:02.55 [info     ] FQE_20220420180238: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00016606572162674134, 'time_algorithm_update': 0.004600817898669875, 'loss': 0.005740347881492856, 'time_step': 0.00484583894890475, 'init_value': -1.438870906829834, 'ave_value': -0.8050898870604264, 'soft_opc': nan} step=3486




2022-04-20 18:02.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180238/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:02.56 [info     ] FQE_20220420180238: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.0001630739993359669, 'time_algorithm_update': 0.004589212946144931, 'loss': 0.006197361081294401, 'time_step': 0.0048285162592508705, 'init_value': -1.4996821880340576, 'ave_value': -0.8175038276626183, 'soft_opc': nan} step=3652




2022-04-20 18:02.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180238/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:02.57 [info     ] FQE_20220420180238: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00016031064182878976, 'time_algorithm_update': 0.0045982915234852985, 'loss': 0.006688089940293008, 'time_step': 0.004832682839359145, 'init_value': -1.6082673072814941, 'ave_value': -0.8728043210063432, 'soft_opc': nan} step=3818




2022-04-20 18:02.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180238/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:02.58 [info     ] FQE_20220420180238: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.0001608233853995082, 'time_algorithm_update': 0.0038484363670808724, 'loss': 0.006943478083507304, 'time_step': 0.004087327474571136, 'init_value': -1.6763598918914795, 'ave_value': -0.9140058705566434, 'soft_opc': nan} step=3984




2022-04-20 18:02.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180238/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:02.59 [info     ] FQE_20220420180238: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00016838814838823066, 'time_algorithm_update': 0.004582498447004571, 'loss': 0.006956600020246872, 'time_step': 0.0048268085502716435, 'init_value': -1.7926056385040283, 'ave_value': -1.0050482266825866, 'soft_opc': nan} step=4150




2022-04-20 18:02.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180238/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:03.00 [info     ] FQE_20220420180238: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00015611677284700325, 'time_algorithm_update': 0.004521318228848009, 'loss': 0.007395158025312675, 'time_step': 0.004753820867423552, 'init_value': -1.8315385580062866, 'ave_value': -0.9886641527886864, 'soft_opc': nan} step=4316




2022-04-20 18:03.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180238/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:03.00 [info     ] FQE_20220420180238: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00016810520585761014, 'time_algorithm_update': 0.003969583166650979, 'loss': 0.00803674471491656, 'time_step': 0.004211888255843197, 'init_value': -1.8998181819915771, 'ave_value': -1.0390812272424097, 'soft_opc': nan} step=4482




2022-04-20 18:03.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180238/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:03.01 [info     ] FQE_20220420180238: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00016265030366828642, 'time_algorithm_update': 0.004735321883695671, 'loss': 0.008125280401057911, 'time_step': 0.004973711737667222, 'init_value': -1.9751964807510376, 'ave_value': -1.0595416136488722, 'soft_opc': nan} step=4648




2022-04-20 18:03.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180238/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:03.02 [info     ] FQE_20220420180238: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00016249518796622036, 'time_algorithm_update': 0.004602100475724921, 'loss': 0.008683798479825169, 'time_step': 0.004836924104805452, 'init_value': -2.071342945098877, 'ave_value': -1.1232140807082525, 'soft_opc': nan} step=4814




2022-04-20 18:03.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180238/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:03.03 [info     ] FQE_20220420180238: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.0001644786581935653, 'time_algorithm_update': 0.00457672038710261, 'loss': 0.009216890907046912, 'time_step': 0.004819940371685718, 'init_value': -2.1092448234558105, 'ave_value': -1.1395872936860936, 'soft_opc': nan} step=4980




2022-04-20 18:03.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180238/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:03.04 [info     ] FQE_20220420180238: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.0001567444169377706, 'time_algorithm_update': 0.002868220030543316, 'loss': 0.008774696153950462, 'time_step': 0.003096715513482151, 'init_value': -2.2643322944641113, 'ave_value': -1.2800893576295527, 'soft_opc': nan} step=5146




2022-04-20 18:03.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180238/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:03.04 [info     ] FQE_20220420180238: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00016324922262904155, 'time_algorithm_update': 0.0036745545375778013, 'loss': 0.009692819895649171, 'time_step': 0.00391348011522408, 'init_value': -2.3624348640441895, 'ave_value': -1.3528405913630048, 'soft_opc': nan} step=5312




2022-04-20 18:03.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180238/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:03.05 [info     ] FQE_20220420180238: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00016046719378735646, 'time_algorithm_update': 0.0036460793161966713, 'loss': 0.010418033923011228, 'time_step': 0.003880882837686194, 'init_value': -2.370396614074707, 'ave_value': -1.3451853739919963, 'soft_opc': nan} step=5478




2022-04-20 18:03.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180238/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:03.06 [info     ] FQE_20220420180238: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00016386537666780403, 'time_algorithm_update': 0.0036051646772637426, 'loss': 0.010971174334142223, 'time_step': 0.0038423868547002955, 'init_value': -2.465742588043213, 'ave_value': -1.3846844759341832, 'soft_opc': nan} step=5644




2022-04-20 18:03.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180238/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:03.06 [info     ] FQE_20220420180238: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00016572389257959574, 'time_algorithm_update': 0.0035643721201333656, 'loss': 0.011248451902345946, 'time_step': 0.0038009278745536343, 'init_value': -2.48720645904541, 'ave_value': -1.4027585546846861, 'soft_opc': nan} step=5810




2022-04-20 18:03.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180238/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:03.07 [info     ] FQE_20220420180238: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00015980938831007625, 'time_algorithm_update': 0.003668316875595644, 'loss': 0.011402488027626357, 'time_step': 0.003902412322630365, 'init_value': -2.567939043045044, 'ave_value': -1.441213175077159, 'soft_opc': nan} step=5976




2022-04-20 18:03.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180238/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:03.08 [info     ] FQE_20220420180238: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00016378494630376976, 'time_algorithm_update': 0.0035503197865313792, 'loss': 0.012549993457238987, 'time_step': 0.0037876482469489776, 'init_value': -2.665861129760742, 'ave_value': -1.5001749867925773, 'soft_opc': nan} step=6142




2022-04-20 18:03.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180238/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:03.09 [info     ] FQE_20220420180238: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00016421007822795087, 'time_algorithm_update': 0.00360257654304964, 'loss': 0.012278831277760756, 'time_step': 0.0038411028413887485, 'init_value': -2.70265531539917, 'ave_value': -1.5174946267258476, 'soft_opc': nan} step=6308




2022-04-20 18:03.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180238/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:03.09 [info     ] FQE_20220420180238: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00016351636633815537, 'time_algorithm_update': 0.003535881099930729, 'loss': 0.013029077052452927, 'time_step': 0.003773057317159262, 'init_value': -2.770625352859497, 'ave_value': -1.5663020300502712, 'soft_opc': nan} step=6474




2022-04-20 18:03.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180238/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:03.10 [info     ] FQE_20220420180238: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00016338423074009907, 'time_algorithm_update': 0.0035444110272878624, 'loss': 0.013580822376730707, 'time_step': 0.0037806263889174864, 'init_value': -2.8494150638580322, 'ave_value': -1.6382010540043985, 'soft_opc': nan} step=6640




2022-04-20 18:03.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180238/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:03.11 [info     ] FQE_20220420180238: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00015931387981736516, 'time_algorithm_update': 0.0035230510206107632, 'loss': 0.01413396250313507, 'time_step': 0.003758068544318877, 'init_value': -2.9125118255615234, 'ave_value': -1.6775368703915192, 'soft_opc': nan} step=6806




2022-04-20 18:03.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180238/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:03.11 [info     ] FQE_20220420180238: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00016303091164094857, 'time_algorithm_update': 0.0034628733094916285, 'loss': 0.014655160665770162, 'time_step': 0.003699291183287839, 'init_value': -2.9166224002838135, 'ave_value': -1.6567652873359284, 'soft_opc': nan} step=6972




2022-04-20 18:03.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180238/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:03.12 [info     ] FQE_20220420180238: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00016143953943827065, 'time_algorithm_update': 0.003514094525072948, 'loss': 0.01508035381529368, 'time_step': 0.003744592149573636, 'init_value': -2.998112678527832, 'ave_value': -1.6903262629366673, 'soft_opc': nan} step=7138




2022-04-20 18:03.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180238/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:03.13 [info     ] FQE_20220420180238: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00016031064182878976, 'time_algorithm_update': 0.0036191509430667005, 'loss': 0.015437734243880778, 'time_step': 0.003851723958210773, 'init_value': -2.9892609119415283, 'ave_value': -1.7160205966113387, 'soft_opc': nan} step=7304




2022-04-20 18:03.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180238/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:03.13 [info     ] FQE_20220420180238: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00016475154692868152, 'time_algorithm_update': 0.003565624535801899, 'loss': 0.014218492552649275, 'time_step': 0.0038072474031563266, 'init_value': -3.0506949424743652, 'ave_value': -1.7249946858767453, 'soft_opc': nan} step=7470




2022-04-20 18:03.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180238/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:03.14 [info     ] FQE_20220420180238: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00016196233680449337, 'time_algorithm_update': 0.003571093800556229, 'loss': 0.015569951612479895, 'time_step': 0.003808152244751712, 'init_value': -3.0383565425872803, 'ave_value': -1.7274340368538827, 'soft_opc': nan} step=7636




2022-04-20 18:03.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180238/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:03.15 [info     ] FQE_20220420180238: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.0001636413206537086, 'time_algorithm_update': 0.0035729580614940228, 'loss': 0.016027375554991324, 'time_step': 0.003810483289052205, 'init_value': -3.093775749206543, 'ave_value': -1.7668647076833892, 'soft_opc': nan} step=7802




2022-04-20 18:03.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180238/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:03.15 [info     ] FQE_20220420180238: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00015854835510253906, 'time_algorithm_update': 0.0034694183303649166, 'loss': 0.016440516935958785, 'time_step': 0.0037018491561154284, 'init_value': -3.192660331726074, 'ave_value': -1.8696000309476444, 'soft_opc': nan} step=7968




2022-04-20 18:03.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180238/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:03.16 [info     ] FQE_20220420180238: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00016358961541968655, 'time_algorithm_update': 0.0034447649875319146, 'loss': 0.0164054455593262, 'time_step': 0.00367978538375303, 'init_value': -3.299888849258423, 'ave_value': -1.9365565023779332, 'soft_opc': nan} step=8134




2022-04-20 18:03.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180238/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:03.17 [info     ] FQE_20220420180238: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00016039394470582525, 'time_algorithm_update': 0.0034740287137318806, 'loss': 0.017005345298830105, 'time_step': 0.0037069320678710938, 'init_value': -3.266649007797241, 'ave_value': -1.9433361916794434, 'soft_opc': nan} step=8300




2022-04-20 18:03.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180238/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

2022-04-20 18:03.17 [info     ] Directory is created at d3rlpy_logs/FQE_20220420180317
2022-04-20 18:03.17 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 18:03.17 [debug    ] Building models...
2022-04-20 18:03.17 [debug    ] Models have been built.
2022-04-20 18:03.17 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420180317/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 18:03.19 [info     ] FQE_20220420180317: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00016663518062857695, 'time_algorithm_update': 0.0035148193669873613, 'loss': 0.026241647913445566, 'time_step': 0.0037576265113298283, 'init_value': -0.9564388394355774, 'ave_value': -0.9137922008295317, 'soft_opc': nan} step=344




2022-04-20 18:03.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180317/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:03.20 [info     ] FQE_20220420180317: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00016320167585860852, 'time_algorithm_update': 0.003488016683001851, 'loss': 0.02358222576562142, 'time_step': 0.0037238660246826883, 'init_value': -1.6222152709960938, 'ave_value': -1.5764650435866536, 'soft_opc': nan} step=688




2022-04-20 18:03.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180317/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:03.21 [info     ] FQE_20220420180317: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00016519981761311376, 'time_algorithm_update': 0.0034985015558642012, 'loss': 0.028929553904332394, 'time_step': 0.0037367995395216833, 'init_value': -2.4455347061157227, 'ave_value': -2.43424716099157, 'soft_opc': nan} step=1032




2022-04-20 18:03.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180317/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:03.23 [info     ] FQE_20220420180317: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00016518803529961165, 'time_algorithm_update': 0.003544858028722364, 'loss': 0.03227912826019577, 'time_step': 0.0037883686464886333, 'init_value': -2.989307403564453, 'ave_value': -3.0230223709801294, 'soft_opc': nan} step=1376




2022-04-20 18:03.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180317/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:03.24 [info     ] FQE_20220420180317: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.0001685342123342115, 'time_algorithm_update': 0.0035000422666239183, 'loss': 0.03933200055765811, 'time_step': 0.003742213166037271, 'init_value': -3.7639358043670654, 'ave_value': -3.833679048663324, 'soft_opc': nan} step=1720




2022-04-20 18:03.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180317/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:03.26 [info     ] FQE_20220420180317: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.0001714430576147035, 'time_algorithm_update': 0.0035323050133017606, 'loss': 0.04682602958082287, 'time_step': 0.003782589075177215, 'init_value': -4.173238754272461, 'ave_value': -4.277466513458136, 'soft_opc': nan} step=2064




2022-04-20 18:03.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180317/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:03.27 [info     ] FQE_20220420180317: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00016377554383388784, 'time_algorithm_update': 0.00353583624196607, 'loss': 0.057507529662553826, 'time_step': 0.003775168296902679, 'init_value': -4.848013401031494, 'ave_value': -4.996045723517199, 'soft_opc': nan} step=2408




2022-04-20 18:03.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180317/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:03.29 [info     ] FQE_20220420180317: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00016901867334232775, 'time_algorithm_update': 0.0036285727523094, 'loss': 0.07056133443392207, 'time_step': 0.0038733759591745775, 'init_value': -5.226376056671143, 'ave_value': -5.377941618195257, 'soft_opc': nan} step=2752




2022-04-20 18:03.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180317/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:03.30 [info     ] FQE_20220420180317: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00016738578330638797, 'time_algorithm_update': 0.0035556949848352475, 'loss': 0.08281439871169886, 'time_step': 0.0037977217241775157, 'init_value': -5.646529197692871, 'ave_value': -5.812454881029086, 'soft_opc': nan} step=3096




2022-04-20 18:03.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180317/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:03.31 [info     ] FQE_20220420180317: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00016657280367474224, 'time_algorithm_update': 0.0035405256027399106, 'loss': 0.10286889738825605, 'time_step': 0.003782151050345842, 'init_value': -6.20967960357666, 'ave_value': -6.4454901179080615, 'soft_opc': nan} step=3440




2022-04-20 18:03.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180317/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:03.33 [info     ] FQE_20220420180317: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00016911362492760948, 'time_algorithm_update': 0.0035369846709938937, 'loss': 0.12137869573577285, 'time_step': 0.003781351932259493, 'init_value': -6.5898356437683105, 'ave_value': -6.759666767521753, 'soft_opc': nan} step=3784




2022-04-20 18:03.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180317/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:03.34 [info     ] FQE_20220420180317: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00017102235971495162, 'time_algorithm_update': 0.003530538359353709, 'loss': 0.14383282922316604, 'time_step': 0.0037766986115034236, 'init_value': -7.082664489746094, 'ave_value': -7.341697031548163, 'soft_opc': nan} step=4128




2022-04-20 18:03.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180317/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:03.36 [info     ] FQE_20220420180317: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00016657211059747741, 'time_algorithm_update': 0.0035183332687200503, 'loss': 0.1651643926580978, 'time_step': 0.003758264835490737, 'init_value': -7.413859844207764, 'ave_value': -7.719320995625746, 'soft_opc': nan} step=4472




2022-04-20 18:03.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180317/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:03.37 [info     ] FQE_20220420180317: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00016782519429229026, 'time_algorithm_update': 0.0035491440185280733, 'loss': 0.19137869645884736, 'time_step': 0.0037905864937360896, 'init_value': -7.818493843078613, 'ave_value': -8.19934034821334, 'soft_opc': nan} step=4816




2022-04-20 18:03.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180317/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:03.38 [info     ] FQE_20220420180317: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00016719726629035417, 'time_algorithm_update': 0.003512713105179543, 'loss': 0.21950058558371005, 'time_step': 0.0037561086721198504, 'init_value': -7.854238510131836, 'ave_value': -8.278497936277072, 'soft_opc': nan} step=5160




2022-04-20 18:03.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180317/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:03.40 [info     ] FQE_20220420180317: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00016996888227240983, 'time_algorithm_update': 0.0034970100535902868, 'loss': 0.25199640873104856, 'time_step': 0.003741733556570009, 'init_value': -8.247336387634277, 'ave_value': -8.812040283041796, 'soft_opc': nan} step=5504




2022-04-20 18:03.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180317/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:03.41 [info     ] FQE_20220420180317: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00016567596169405206, 'time_algorithm_update': 0.002855663382729819, 'loss': 0.28048169219238295, 'time_step': 0.0030973324942034346, 'init_value': -8.288050651550293, 'ave_value': -8.996956203247814, 'soft_opc': nan} step=5848




2022-04-20 18:03.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180317/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:03.42 [info     ] FQE_20220420180317: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.0001701962116152741, 'time_algorithm_update': 0.003140671308650527, 'loss': 0.31597319622199205, 'time_step': 0.0033860622450362803, 'init_value': -8.743377685546875, 'ave_value': -9.603477777118652, 'soft_opc': nan} step=6192




2022-04-20 18:03.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180317/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:03.44 [info     ] FQE_20220420180317: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00016312335812768272, 'time_algorithm_update': 0.0034704305404840512, 'loss': 0.34720806858697256, 'time_step': 0.00370933496674826, 'init_value': -8.66403579711914, 'ave_value': -9.687918013334274, 'soft_opc': nan} step=6536




2022-04-20 18:03.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180317/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:03.45 [info     ] FQE_20220420180317: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.0001710611720417821, 'time_algorithm_update': 0.0035324110541232797, 'loss': 0.3716540524443655, 'time_step': 0.0037791347780893017, 'init_value': -8.627737045288086, 'ave_value': -9.83759106731982, 'soft_opc': nan} step=6880




2022-04-20 18:03.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180317/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:03.47 [info     ] FQE_20220420180317: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.0001674440017966337, 'time_algorithm_update': 0.003524807996528093, 'loss': 0.40065524247366674, 'time_step': 0.003770129625187364, 'init_value': -8.710384368896484, 'ave_value': -10.236062398871306, 'soft_opc': nan} step=7224




2022-04-20 18:03.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180317/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:03.48 [info     ] FQE_20220420180317: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00016791945280030717, 'time_algorithm_update': 0.00348204305005628, 'loss': 0.43216135232102904, 'time_step': 0.0037270389324010806, 'init_value': -8.79132080078125, 'ave_value': -10.578329518556528, 'soft_opc': nan} step=7568




2022-04-20 18:03.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180317/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:03.49 [info     ] FQE_20220420180317: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.0001680698505667753, 'time_algorithm_update': 0.003499509983284529, 'loss': 0.46648448870820536, 'time_step': 0.0037433179311974103, 'init_value': -9.071756362915039, 'ave_value': -11.014431435905129, 'soft_opc': nan} step=7912




2022-04-20 18:03.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180317/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:03.51 [info     ] FQE_20220420180317: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00016603636187176372, 'time_algorithm_update': 0.0035242805647295577, 'loss': 0.5036285156438257, 'time_step': 0.0037666649319404778, 'init_value': -9.032835006713867, 'ave_value': -11.25089150552747, 'soft_opc': nan} step=8256




2022-04-20 18:03.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180317/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:03.52 [info     ] FQE_20220420180317: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.0001665970613790113, 'time_algorithm_update': 0.003561814164006433, 'loss': 0.5283309568077066, 'time_step': 0.0038021941517674645, 'init_value': -9.448902130126953, 'ave_value': -11.882617715622407, 'soft_opc': nan} step=8600




2022-04-20 18:03.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180317/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:03.54 [info     ] FQE_20220420180317: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.0001698441283647404, 'time_algorithm_update': 0.003543252861777017, 'loss': 0.5630907225043534, 'time_step': 0.0037891324176344762, 'init_value': -9.247986793518066, 'ave_value': -11.972977399193002, 'soft_opc': nan} step=8944




2022-04-20 18:03.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180317/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:03.55 [info     ] FQE_20220420180317: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.0001688523347987685, 'time_algorithm_update': 0.003604035737902619, 'loss': 0.5771290027688063, 'time_step': 0.003847996855891028, 'init_value': -9.360405921936035, 'ave_value': -12.266122828626182, 'soft_opc': nan} step=9288




2022-04-20 18:03.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180317/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:03.56 [info     ] FQE_20220420180317: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.0001716773177302161, 'time_algorithm_update': 0.003574265297069106, 'loss': 0.6060294259426205, 'time_step': 0.0038218685360841974, 'init_value': -9.5706787109375, 'ave_value': -12.734029375704694, 'soft_opc': nan} step=9632




2022-04-20 18:03.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180317/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:03.58 [info     ] FQE_20220420180317: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.0001655595247135606, 'time_algorithm_update': 0.0035579800605773926, 'loss': 0.6297645683476139, 'time_step': 0.003797320432441179, 'init_value': -9.777433395385742, 'ave_value': -13.161315938208656, 'soft_opc': nan} step=9976




2022-04-20 18:03.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180317/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:03.59 [info     ] FQE_20220420180317: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00016947471818258596, 'time_algorithm_update': 0.003588309814763624, 'loss': 0.6485926167292217, 'time_step': 0.0038365749425666278, 'init_value': -9.640209197998047, 'ave_value': -13.138229328605371, 'soft_opc': nan} step=10320




2022-04-20 18:03.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180317/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:04.01 [info     ] FQE_20220420180317: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00016905609951462855, 'time_algorithm_update': 0.0035658735175465427, 'loss': 0.6619963233283352, 'time_step': 0.0038114300993985907, 'init_value': -9.815343856811523, 'ave_value': -13.592017917100105, 'soft_opc': nan} step=10664




2022-04-20 18:04.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180317/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:04.02 [info     ] FQE_20220420180317: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00016933679580688477, 'time_algorithm_update': 0.0035442765368971715, 'loss': 0.6849400885294863, 'time_step': 0.003788956376009209, 'init_value': -9.684733390808105, 'ave_value': -13.729634002862111, 'soft_opc': nan} step=11008




2022-04-20 18:04.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180317/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:04.04 [info     ] FQE_20220420180317: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.0001678903435551843, 'time_algorithm_update': 0.0035251975059509277, 'loss': 0.7101853852008664, 'time_step': 0.003771997468416081, 'init_value': -9.844709396362305, 'ave_value': -14.11578921974796, 'soft_opc': nan} step=11352




2022-04-20 18:04.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180317/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:04.05 [info     ] FQE_20220420180317: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00016884609710338504, 'time_algorithm_update': 0.0035538507062335347, 'loss': 0.7208496473491365, 'time_step': 0.0038006943325663723, 'init_value': -9.39062213897705, 'ave_value': -13.771928516406197, 'soft_opc': nan} step=11696




2022-04-20 18:04.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180317/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:04.06 [info     ] FQE_20220420180317: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00016721112783565077, 'time_algorithm_update': 0.0035598790922830273, 'loss': 0.749783142475238, 'time_step': 0.0038044515044190165, 'init_value': -9.80573558807373, 'ave_value': -14.21126519874376, 'soft_opc': nan} step=12040




2022-04-20 18:04.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180317/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:04.08 [info     ] FQE_20220420180317: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00016872342242751012, 'time_algorithm_update': 0.003523976303810297, 'loss': 0.7620296446054221, 'time_step': 0.0037700027920478997, 'init_value': -9.662766456604004, 'ave_value': -14.236030337207758, 'soft_opc': nan} step=12384




2022-04-20 18:04.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180317/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:04.09 [info     ] FQE_20220420180317: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00017034037168635877, 'time_algorithm_update': 0.0035797510036202364, 'loss': 0.7677403717128517, 'time_step': 0.003824353911155878, 'init_value': -9.93189811706543, 'ave_value': -14.65497549224712, 'soft_opc': nan} step=12728




2022-04-20 18:04.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180317/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:04.11 [info     ] FQE_20220420180317: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.0001706813657006552, 'time_algorithm_update': 0.003587435151255408, 'loss': 0.7796314187932673, 'time_step': 0.0038350425487340884, 'init_value': -9.998326301574707, 'ave_value': -14.80018417460609, 'soft_opc': nan} step=13072




2022-04-20 18:04.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180317/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:04.12 [info     ] FQE_20220420180317: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00016850579616635344, 'time_algorithm_update': 0.0035724688407986665, 'loss': 0.8015452880780537, 'time_step': 0.003816998975221501, 'init_value': -10.140121459960938, 'ave_value': -14.891060388348325, 'soft_opc': nan} step=13416




2022-04-20 18:04.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180317/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:04.14 [info     ] FQE_20220420180317: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00017200722250827524, 'time_algorithm_update': 0.003526380588841993, 'loss': 0.8188084719789236, 'time_step': 0.003775570974793545, 'init_value': -10.169656753540039, 'ave_value': -15.070717856409615, 'soft_opc': nan} step=13760




2022-04-20 18:04.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180317/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:04.15 [info     ] FQE_20220420180317: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00016755004261815272, 'time_algorithm_update': 0.00355399209399556, 'loss': 0.8297408906049853, 'time_step': 0.003797039736148923, 'init_value': -10.438636779785156, 'ave_value': -15.316935440517023, 'soft_opc': nan} step=14104




2022-04-20 18:04.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180317/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:04.16 [info     ] FQE_20220420180317: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00016979214756987815, 'time_algorithm_update': 0.003567161948181862, 'loss': 0.8330904306562315, 'time_step': 0.0038135044796522273, 'init_value': -10.574161529541016, 'ave_value': -15.45014674629563, 'soft_opc': nan} step=14448




2022-04-20 18:04.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180317/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:04.18 [info     ] FQE_20220420180317: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.0001654077407925628, 'time_algorithm_update': 0.0036057635795238405, 'loss': 0.8397942237707592, 'time_step': 0.003846912189971569, 'init_value': -10.651097297668457, 'ave_value': -15.63267331642879, 'soft_opc': nan} step=14792




2022-04-20 18:04.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180317/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:04.19 [info     ] FQE_20220420180317: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00016985313836918322, 'time_algorithm_update': 0.003619232843088549, 'loss': 0.8547179865553368, 'time_step': 0.003867312226184579, 'init_value': -10.2747802734375, 'ave_value': -15.28810567007744, 'soft_opc': nan} step=15136




2022-04-20 18:04.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180317/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:04.21 [info     ] FQE_20220420180317: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00016754657723182855, 'time_algorithm_update': 0.003604317820349405, 'loss': 0.8669621775489907, 'time_step': 0.0038492353849632795, 'init_value': -10.25668716430664, 'ave_value': -15.399868671205063, 'soft_opc': nan} step=15480




2022-04-20 18:04.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180317/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:04.22 [info     ] FQE_20220420180317: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00017047552175300065, 'time_algorithm_update': 0.0036307982234067694, 'loss': 0.8653684619186056, 'time_step': 0.003877722939779592, 'init_value': -10.416030883789062, 'ave_value': -15.522175411664385, 'soft_opc': nan} step=15824




2022-04-20 18:04.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180317/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:04.24 [info     ] FQE_20220420180317: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00016849124154379202, 'time_algorithm_update': 0.003560504941053169, 'loss': 0.868478957980568, 'time_step': 0.003806999256444532, 'init_value': -10.77906608581543, 'ave_value': -16.061803334677876, 'soft_opc': nan} step=16168




2022-04-20 18:04.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180317/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:04.25 [info     ] FQE_20220420180317: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00016844827075337255, 'time_algorithm_update': 0.0035862215729646906, 'loss': 0.8585294333264925, 'time_step': 0.0038318807302519333, 'init_value': -10.565271377563477, 'ave_value': -15.937449895454622, 'soft_opc': nan} step=16512




2022-04-20 18:04.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180317/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:04.26 [info     ] FQE_20220420180317: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00016754241876823958, 'time_algorithm_update': 0.003557854613592458, 'loss': 0.8701206799271668, 'time_step': 0.0038004649239917134, 'init_value': -10.92175579071045, 'ave_value': -16.1952546598524, 'soft_opc': nan} step=16856




2022-04-20 18:04.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180317/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:04.28 [info     ] FQE_20220420180317: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.0001665471598159435, 'time_algorithm_update': 0.003588630016459975, 'loss': 0.8676322942144822, 'time_step': 0.0038338858027790867, 'init_value': -10.88469409942627, 'ave_value': -16.147504291780763, 'soft_opc': nan} step=17200




2022-04-20 18:04.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420180317/model_17200.pt
search iteration:  17
using hyper params:  [0.005415143038231499, 0.00218197500256875, 7.183412691667276e-05, 1]
2022-04-20 18:04.28 [debug    ] RoundIterator is selected.
2022-04-20 18:04.28 [info     ] Directory is created at d3rlpy_logs/CQL_20220420180428
2022-04-20 18:04.28 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 18:04.28 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 18:04.28 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220420180428/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.005415143038231499, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'wei

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:04.37 [info     ] CQL_20220420180428: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.00030899117564597324, 'time_algorithm_update': 0.024641918857195223, 'temp_loss': 4.74214985670402, 'temp': 0.987900894287734, 'alpha_loss': -14.728902761002033, 'alpha': 1.0165567771036026, 'critic_loss': 19.22937053546571, 'actor_loss': -1.5937152530191934, 'time_step': 0.02505398005769964, 'td_error': 3.1385274805953745, 'init_value': -0.5041046142578125, 'ave_value': 0.6204075233869859} step=342
2022-04-20 18:04.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420180428/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:04.46 [info     ] CQL_20220420180428: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.00031680042980707177, 'time_algorithm_update': 0.024778987232007478, 'temp_loss': 4.121818037758096, 'temp': 0.9652154482247537, 'alpha_loss': -8.693311560223675, 'alpha': 1.044555550081688, 'critic_loss': 19.887761774118882, 'actor_loss': -0.6294869404253468, 'time_step': 0.025199409116778457, 'td_error': 4.831928542340197, 'init_value': -2.874692440032959, 'ave_value': 0.634500472825524} step=684
2022-04-20 18:04.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420180428/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:04.55 [info     ] CQL_20220420180428: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.00031304568575139634, 'time_algorithm_update': 0.024874644669872974, 'temp_loss': 3.380417113415679, 'temp': 0.9449753072875285, 'alpha_loss': -4.4894582144698205, 'alpha': 1.0647014881435193, 'critic_loss': 33.31582326498645, 'actor_loss': 0.6331252320097727, 'time_step': 0.025290426455046002, 'td_error': 3.957350708152074, 'init_value': -3.891340970993042, 'ave_value': 0.543649072984049} step=1026
2022-04-20 18:04.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420180428/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:05.04 [info     ] CQL_20220420180428: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.0003141366947464078, 'time_algorithm_update': 0.024742583782352202, 'temp_loss': 2.734707354105007, 'temp': 0.9273273305237641, 'alpha_loss': -1.3733950277616145, 'alpha': 1.0761461062738074, 'critic_loss': 51.02305736876371, 'actor_loss': 1.5358704376900405, 'time_step': 0.025157276649921262, 'td_error': 4.694574302957958, 'init_value': -5.994006156921387, 'ave_value': 0.14795364770534877} step=1368
2022-04-20 18:05.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420180428/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:05.13 [info     ] CQL_20220420180428: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.00031635775203593295, 'time_algorithm_update': 0.02605202811503271, 'temp_loss': 2.260786894120668, 'temp': 0.9113612884317922, 'alpha_loss': 1.1585017667405242, 'alpha': 1.0763611748204593, 'critic_loss': 70.3977405592712, 'actor_loss': 2.382838851352881, 'time_step': 0.026470587267513165, 'td_error': 5.9699855399320345, 'init_value': -7.689083099365234, 'ave_value': 0.2058224776199272} step=1710
2022-04-20 18:05.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420180428/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:05.23 [info     ] CQL_20220420180428: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.0003185558040239658, 'time_algorithm_update': 0.025932569949947602, 'temp_loss': 1.924283144069694, 'temp': 0.8965499150125604, 'alpha_loss': 3.2168265534074685, 'alpha': 1.0626141554430912, 'critic_loss': 91.18980057476557, 'actor_loss': 3.3506192211170642, 'time_step': 0.026348670323689777, 'td_error': 7.543282598073726, 'init_value': -10.259560585021973, 'ave_value': -0.25401763600003613} step=2052
2022-04-20 18:05.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420180428/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:05.32 [info     ] CQL_20220420180428: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.00031129031153450233, 'time_algorithm_update': 0.026057900741086368, 'temp_loss': 1.6350465483135648, 'temp': 0.8827272125503474, 'alpha_loss': 4.810935482295633, 'alpha': 1.0346887160462943, 'critic_loss': 113.35260505007024, 'actor_loss': 4.356961105302063, 'time_step': 0.02646793468653807, 'td_error': 8.441308720183674, 'init_value': -12.570202827453613, 'ave_value': -1.207700468860231} step=2394
2022-04-20 18:05.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420180428/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:05.42 [info     ] CQL_20220420180428: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.00031477735753644975, 'time_algorithm_update': 0.025752128216258267, 'temp_loss': 1.3688237023980994, 'temp': 0.8695671342269718, 'alpha_loss': 5.901694635898746, 'alpha': 0.9987377567946563, 'critic_loss': 135.1241806877984, 'actor_loss': 5.286324479426557, 'time_step': 0.026166454393264146, 'td_error': 9.742305918772576, 'init_value': -14.685861587524414, 'ave_value': -1.9826158596642383} step=2736
2022-04-20 18:05.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420180428/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:05.51 [info     ] CQL_20220420180428: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.0003113321393553974, 'time_algorithm_update': 0.02590208931973106, 'temp_loss': 1.1626748775529583, 'temp': 0.8574605930966941, 'alpha_loss': 6.74395033281449, 'alpha': 0.9597083762375235, 'critic_loss': 156.03713306627776, 'actor_loss': 6.257734624265927, 'time_step': 0.026309079594082303, 'td_error': 11.430512664493115, 'init_value': -16.999826431274414, 'ave_value': -2.056503989073607} step=3078
2022-04-20 18:05.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420180428/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:06.00 [info     ] CQL_20220420180428: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.00030154373213561655, 'time_algorithm_update': 0.02477321778124536, 'temp_loss': 1.0101799337487471, 'temp': 0.8458482314620102, 'alpha_loss': 7.351914693040458, 'alpha': 0.9212196720273871, 'critic_loss': 178.35118638423452, 'actor_loss': 7.32133050062503, 'time_step': 0.0251682968864664, 'td_error': 11.922667524933699, 'init_value': -20.718700408935547, 'ave_value': -4.008441322596224} step=3420
2022-04-20 18:06.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420180428/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:06.09 [info     ] CQL_20220420180428: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.00030044644896747073, 'time_algorithm_update': 0.024984437122679594, 'temp_loss': 0.8656165481310839, 'temp': 0.8346435621119382, 'alpha_loss': 7.8563507523453024, 'alpha': 0.8844882457228432, 'critic_loss': 201.23883882042958, 'actor_loss': 8.422230215797647, 'time_step': 0.025382610092386168, 'td_error': 13.172887802242355, 'init_value': -23.096914291381836, 'ave_value': -4.1614537933280875} step=3762
2022-04-20 18:06.09 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420180428/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:06.18 [info     ] CQL_20220420180428: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.00028950150249994287, 'time_algorithm_update': 0.024042650970102053, 'temp_loss': 0.7283350626627604, 'temp': 0.8240386491630509, 'alpha_loss': 8.199483214763173, 'alpha': 0.8500025676704986, 'critic_loss': 225.07887196680258, 'actor_loss': 9.596639286007797, 'time_step': 0.02442304711592825, 'td_error': 14.396745973004407, 'init_value': -24.95840072631836, 'ave_value': -4.7112621741037115} step=4104
2022-04-20 18:06.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420180428/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:06.27 [info     ] CQL_20220420180428: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.0003149342118648061, 'time_algorithm_update': 0.025762558680528787, 'temp_loss': 0.5984218869554369, 'temp': 0.8139182085182235, 'alpha_loss': 8.515303491848952, 'alpha': 0.8176832226981894, 'critic_loss': 248.7213780364098, 'actor_loss': 10.743713063803332, 'time_step': 0.026174872939349614, 'td_error': 17.69486311694344, 'init_value': -26.705724716186523, 'ave_value': -5.560333332161646} step=4446
2022-04-20 18:06.27 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420180428/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:06.37 [info     ] CQL_20220420180428: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.00030976220181113794, 'time_algorithm_update': 0.025346230345162733, 'temp_loss': 0.49969609995639463, 'temp': 0.8047580572596768, 'alpha_loss': 8.893080788049085, 'alpha': 0.786748897262484, 'critic_loss': 274.00014428925095, 'actor_loss': 12.052660946260419, 'time_step': 0.02575367235997964, 'td_error': 20.092884970489006, 'init_value': -30.073516845703125, 'ave_value': -6.627280375796396} step=4788
2022-04-20 18:06.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420180428/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:06.46 [info     ] CQL_20220420180428: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.00031695310135333856, 'time_algorithm_update': 0.02567672102074874, 'temp_loss': 0.36912688879808137, 'temp': 0.7964938786643291, 'alpha_loss': 9.153974593034265, 'alpha': 0.7573521386461648, 'critic_loss': 300.83162354586415, 'actor_loss': 13.31623193395068, 'time_step': 0.026092219771000378, 'td_error': 24.27106520676686, 'init_value': -33.36670684814453, 'ave_value': -8.154834873096362} step=5130
2022-04-20 18:06.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420180428/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:06.55 [info     ] CQL_20220420180428: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.0003099866777832745, 'time_algorithm_update': 0.025623468627706605, 'temp_loss': 0.27210751049525556, 'temp': 0.7895519356978568, 'alpha_loss': 9.236988377152828, 'alpha': 0.7297469581428327, 'critic_loss': 327.21927736516585, 'actor_loss': 14.652610003599646, 'time_step': 0.026032674382304586, 'td_error': 25.372559609691965, 'init_value': -36.35108184814453, 'ave_value': -8.99167256447646} step=5472
2022-04-20 18:06.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420180428/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:07.05 [info     ] CQL_20220420180428: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.00030995670117829973, 'time_algorithm_update': 0.025583470076845402, 'temp_loss': 0.18487930430742036, 'temp': 0.7840790283261684, 'alpha_loss': 9.303916076470538, 'alpha': 0.7035602094137181, 'critic_loss': 354.92058692619815, 'actor_loss': 15.95317903039051, 'time_step': 0.02599241092191105, 'td_error': 28.09767128457072, 'init_value': -38.53649139404297, 'ave_value': -10.618689807771563} step=5814
2022-04-20 18:07.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420180428/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:07.14 [info     ] CQL_20220420180428: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.00031033175730565835, 'time_algorithm_update': 0.02580722661046257, 'temp_loss': 0.1243861259086526, 'temp': 0.7796534298107638, 'alpha_loss': 9.378172662523058, 'alpha': 0.6789032890085589, 'critic_loss': 384.47808052643, 'actor_loss': 17.479240607099925, 'time_step': 0.026215872569390904, 'td_error': 29.993129750207796, 'init_value': -41.300880432128906, 'ave_value': -11.058997343971923} step=6156
2022-04-20 18:07.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420180428/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:07.23 [info     ] CQL_20220420180428: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.0003150032277692828, 'time_algorithm_update': 0.025485146115397848, 'temp_loss': 0.061522104178299335, 'temp': 0.7765416163101531, 'alpha_loss': 9.555261147649665, 'alpha': 0.6550639111396165, 'critic_loss': 415.2376964189853, 'actor_loss': 19.01506719533463, 'time_step': 0.025897014210795797, 'td_error': 37.18181531936909, 'init_value': -44.00154113769531, 'ave_value': -12.787883439944672} step=6498
2022-04-20 18:07.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420180428/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:07.33 [info     ] CQL_20220420180428: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.00030775307214748093, 'time_algorithm_update': 0.02573398679320575, 'temp_loss': 0.01588075430464675, 'temp': 0.7753943190937154, 'alpha_loss': 9.699361955910398, 'alpha': 0.6321426060116082, 'critic_loss': 446.1943320112619, 'actor_loss': 20.57484191202978, 'time_step': 0.026137777239258527, 'td_error': 38.14451912331393, 'init_value': -48.37389373779297, 'ave_value': -15.688670524949426} step=6840
2022-04-20 18:07.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420180428/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:07.42 [info     ] CQL_20220420180428: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.00030280484093560115, 'time_algorithm_update': 0.025315054675988984, 'temp_loss': -0.045228551926189346, 'temp': 0.7757918318809821, 'alpha_loss': 9.563041235271253, 'alpha': 0.6102584340767554, 'critic_loss': 480.21672709504065, 'actor_loss': 22.21869833706415, 'time_step': 0.025716284562272634, 'td_error': 38.44564382137725, 'init_value': -50.653629302978516, 'ave_value': -15.31203346741092} step=7182
2022-04-20 18:07.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420180428/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:07.51 [info     ] CQL_20220420180428: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.0003065372768201326, 'time_algorithm_update': 0.025770597987704806, 'temp_loss': -0.07038570012439761, 'temp': 0.7785675229384885, 'alpha_loss': 9.419854716250772, 'alpha': 0.5897948165037479, 'critic_loss': 512.7004679183514, 'actor_loss': 23.7922105064169, 'time_step': 0.0261766903581675, 'td_error': 42.006818867497216, 'init_value': -54.524635314941406, 'ave_value': -17.19872032125791} step=7524
2022-04-20 18:07.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420180428/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:08.01 [info     ] CQL_20220420180428: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.00030923517126786083, 'time_algorithm_update': 0.025528888953359502, 'temp_loss': -0.062264112701192934, 'temp': 0.7816835120756026, 'alpha_loss': 9.48651455299199, 'alpha': 0.5700751983979989, 'critic_loss': 546.4569954676934, 'actor_loss': 25.56790338761625, 'time_step': 0.025935696579559506, 'td_error': 40.335895296918416, 'init_value': -57.16828536987305, 'ave_value': -18.175840949582625} step=7866
2022-04-20 18:08.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420180428/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:08.10 [info     ] CQL_20220420180428: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.00031473413545485827, 'time_algorithm_update': 0.02509642065617076, 'temp_loss': -0.08418423820243419, 'temp': 0.7858655857063873, 'alpha_loss': 9.572402375483373, 'alpha': 0.5508023793237251, 'critic_loss': 582.6386419820507, 'actor_loss': 27.355540264419645, 'time_step': 0.025509769456428393, 'td_error': 59.508023470929544, 'init_value': -61.520423889160156, 'ave_value': -20.485271076280792} step=8208
2022-04-20 18:08.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420180428/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:08.18 [info     ] CQL_20220420180428: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.00030626400172361853, 'time_algorithm_update': 0.02390285402710675, 'temp_loss': -0.09759319886260213, 'temp': 0.7920182995628893, 'alpha_loss': 9.335805029896965, 'alpha': 0.5324124869895958, 'critic_loss': 619.617882265682, 'actor_loss': 29.180508591278254, 'time_step': 0.024306089557402315, 'td_error': 54.10353607630687, 'init_value': -64.23194122314453, 'ave_value': -20.700133746250255} step=8550
2022-04-20 18:08.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420180428/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:08.27 [info     ] CQL_20220420180428: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.00030858544578329163, 'time_algorithm_update': 0.02404017824875681, 'temp_loss': -0.1073219460211912, 'temp': 0.7993878631563912, 'alpha_loss': 9.45345757997524, 'alpha': 0.5147926141644081, 'critic_loss': 657.1843597233644, 'actor_loss': 31.01148780465823, 'time_step': 0.024441961656536972, 'td_error': 65.06749563372526, 'init_value': -68.40438079833984, 'ave_value': -22.96492277160421} step=8892
2022-04-20 18:08.27 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420180428/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:08.36 [info     ] CQL_20220420180428: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.0003103115405255591, 'time_algorithm_update': 0.023945559535110204, 'temp_loss': -0.11105994920502281, 'temp': 0.806473175684611, 'alpha_loss': 9.49847676321777, 'alpha': 0.4976322612567255, 'critic_loss': 696.4458421852156, 'actor_loss': 33.003822538587784, 'time_step': 0.024355303474337037, 'td_error': 82.39865506196476, 'init_value': -75.399169921875, 'ave_value': -26.869319605483664} step=9234
2022-04-20 18:08.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420180428/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:08.45 [info     ] CQL_20220420180428: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.00030711031796639424, 'time_algorithm_update': 0.02384462203198706, 'temp_loss': -0.13904466268699073, 'temp': 0.8145576311133759, 'alpha_loss': 9.610399492999964, 'alpha': 0.48076568308629486, 'critic_loss': 739.3287105448762, 'actor_loss': 35.194177192554136, 'time_step': 0.024249886211596038, 'td_error': 85.83205484580536, 'init_value': -76.07539367675781, 'ave_value': -26.02179446754155} step=9576
2022-04-20 18:08.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420180428/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:08.53 [info     ] CQL_20220420180428: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.0003007204211943331, 'time_algorithm_update': 0.024028050272088302, 'temp_loss': -0.07170220055993189, 'temp': 0.8223545683754815, 'alpha_loss': 9.012985980998703, 'alpha': 0.46502383053302765, 'critic_loss': 783.8983891358849, 'actor_loss': 37.285984502201195, 'time_step': 0.02442393386573122, 'td_error': 92.31626714949314, 'init_value': -82.4333267211914, 'ave_value': -29.209169706643166} step=9918
2022-04-20 18:08.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420180428/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:09.02 [info     ] CQL_20220420180428: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.0003053995600917883, 'time_algorithm_update': 0.023934512110481487, 'temp_loss': -0.12249502439422218, 'temp': 0.8306363418436887, 'alpha_loss': 9.297163248062134, 'alpha': 0.44986070387544685, 'critic_loss': 825.1433096545481, 'actor_loss': 39.36904708126135, 'time_step': 0.024340137403610854, 'td_error': 96.36304910701783, 'init_value': -87.07843017578125, 'ave_value': -31.326611981391906} step=10260
2022-04-20 18:09.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420180428/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:09.11 [info     ] CQL_20220420180428: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.0003084948188380191, 'time_algorithm_update': 0.02415742790489866, 'temp_loss': -0.10619023749441431, 'temp': 0.8381843171273059, 'alpha_loss': 9.115398291258783, 'alpha': 0.4350194802061159, 'critic_loss': 869.8221123232479, 'actor_loss': 41.5381475136294, 'time_step': 0.024565011437176265, 'td_error': 108.13991822603515, 'init_value': -91.67588806152344, 'ave_value': -32.989727386826864} step=10602
2022-04-20 18:09.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420180428/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:09.20 [info     ] CQL_20220420180428: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.00031173159504494473, 'time_algorithm_update': 0.024279411773235476, 'temp_loss': -0.10514453999502094, 'temp': 0.84491208509395, 'alpha_loss': 8.822866734008343, 'alpha': 0.42118425580144625, 'critic_loss': 918.2194624337536, 'actor_loss': 43.904843943858005, 'time_step': 0.024690080804434435, 'td_error': 120.9483926354291, 'init_value': -95.92898559570312, 'ave_value': -35.33126463048093} step=10944
2022-04-20 18:09.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420180428/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:09.28 [info     ] CQL_20220420180428: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.00031064198031062967, 'time_algorithm_update': 0.02410896200882761, 'temp_loss': -0.10996131425756112, 'temp': 0.8534100465258659, 'alpha_loss': 9.027320723784598, 'alpha': 0.40737324900794447, 'critic_loss': 964.775219476711, 'actor_loss': 46.08009221818712, 'time_step': 0.02451964219411214, 'td_error': 124.40621165143682, 'init_value': -102.8924789428711, 'ave_value': -37.258296017045375} step=11286
2022-04-20 18:09.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420180428/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:09.37 [info     ] CQL_20220420180428: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.00031198604762205603, 'time_algorithm_update': 0.024261581270318282, 'temp_loss': -0.13422809946432449, 'temp': 0.8621878263197447, 'alpha_loss': 8.920215025282742, 'alpha': 0.39406343963411117, 'critic_loss': 1009.3210402817754, 'actor_loss': 48.30886544121636, 'time_step': 0.024671162081043623, 'td_error': 141.57340807481762, 'init_value': -108.38374328613281, 'ave_value': -39.760517225233286} step=11628
2022-04-20 18:09.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420180428/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:09.46 [info     ] CQL_20220420180428: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.000309057403029057, 'time_algorithm_update': 0.02418822363803261, 'temp_loss': -0.07176916186332877, 'temp': 0.870463122052756, 'alpha_loss': 8.355039450160245, 'alpha': 0.38136825305328037, 'critic_loss': 1055.5829107273391, 'actor_loss': 50.45288808722245, 'time_step': 0.024597952240391782, 'td_error': 133.58060614382308, 'init_value': -112.31672668457031, 'ave_value': -42.06031490269008} step=11970
2022-04-20 18:09.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420180428/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:09.55 [info     ] CQL_20220420180428: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.0003084167402390151, 'time_algorithm_update': 0.024012891172665602, 'temp_loss': -0.1090104497468088, 'temp': 0.8762063119146559, 'alpha_loss': 8.357633870944642, 'alpha': 0.3695180827414083, 'critic_loss': 1096.636014347188, 'actor_loss': 52.43237035874038, 'time_step': 0.02441802986881189, 'td_error': 147.92796409893185, 'init_value': -117.5121078491211, 'ave_value': -43.607166529915354} step=12312
2022-04-20 18:09.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420180428/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:10.04 [info     ] CQL_20220420180428: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.0003099253303126285, 'time_algorithm_update': 0.024320193898608112, 'temp_loss': -0.07137283841311409, 'temp': 0.8846470459162841, 'alpha_loss': 8.315647394336455, 'alpha': 0.35763659367435857, 'critic_loss': 1141.3465808176854, 'actor_loss': 54.66330488104569, 'time_step': 0.024726362953409117, 'td_error': 140.7500030429161, 'init_value': -122.47003173828125, 'ave_value': -46.107720490468516} step=12654
2022-04-20 18:10.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420180428/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:10.12 [info     ] CQL_20220420180428: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.00030983609762805247, 'time_algorithm_update': 0.024308157246015226, 'temp_loss': -0.01731460618824638, 'temp': 0.8857222586347345, 'alpha_loss': 7.298528991944608, 'alpha': 0.3468813387274045, 'critic_loss': 1184.3033142089844, 'actor_loss': 56.48546367221408, 'time_step': 0.024717525431984348, 'td_error': 128.94542834167632, 'init_value': -127.47499084472656, 'ave_value': -48.43440462781502} step=12996
2022-04-20 18:10.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420180428/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:10.21 [info     ] CQL_20220420180428: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.00030603882862113374, 'time_algorithm_update': 0.024073112080668844, 'temp_loss': -0.04019444580707285, 'temp': 0.8883793578858961, 'alpha_loss': 7.472034239629556, 'alpha': 0.3364493722281261, 'critic_loss': 1223.8618162277846, 'actor_loss': 58.49132996274714, 'time_step': 0.024473111531887835, 'td_error': 183.98153361284776, 'init_value': -130.87185668945312, 'ave_value': -49.39674605471594} step=13338
2022-04-20 18:10.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420180428/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:10.30 [info     ] CQL_20220420180428: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.000313694016975269, 'time_algorithm_update': 0.02415531281142207, 'temp_loss': -0.031166242958422293, 'temp': 0.8912713262769911, 'alpha_loss': 7.3609018130609165, 'alpha': 0.32610600419908936, 'critic_loss': 1264.7169553522479, 'actor_loss': 60.47175133019163, 'time_step': 0.02456630391684192, 'td_error': 171.5258291673373, 'init_value': -136.84420776367188, 'ave_value': -51.0981329469423} step=13680
2022-04-20 18:10.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420180428/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:10.39 [info     ] CQL_20220420180428: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.0003110212192200778, 'time_algorithm_update': 0.024964957906488786, 'temp_loss': -0.08029668329948048, 'temp': 0.8958264449883623, 'alpha_loss': 7.70945073568333, 'alpha': 0.3157535109952179, 'critic_loss': 1301.5661228469937, 'actor_loss': 62.38407378168831, 'time_step': 0.02537411555909274, 'td_error': 200.81835431040474, 'init_value': -140.9559326171875, 'ave_value': -53.61263036070643} step=14022
2022-04-20 18:10.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420180428/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:10.48 [info     ] CQL_20220420180428: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.0003098367947584007, 'time_algorithm_update': 0.025623185592785216, 'temp_loss': -0.024402367953232855, 'temp': 0.8988018565707736, 'alpha_loss': 7.3239951642633185, 'alpha': 0.3055333014991548, 'critic_loss': 1347.3233121459248, 'actor_loss': 64.5943332136723, 'time_step': 0.02603218011688768, 'td_error': 181.6176562139508, 'init_value': -146.8651580810547, 'ave_value': -55.65253132993037} step=14364
2022-04-20 18:10.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420180428/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:10.58 [info     ] CQL_20220420180428: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.0003088022533215974, 'time_algorithm_update': 0.02573748569042362, 'temp_loss': -0.04145103789408479, 'temp': 0.9022369459707137, 'alpha_loss': 7.334089545478598, 'alpha': 0.29588958594882697, 'critic_loss': 1386.8980944895604, 'actor_loss': 66.46967036821688, 'time_step': 0.02614246056093807, 'td_error': 200.7275113787684, 'init_value': -153.63137817382812, 'ave_value': -58.492078630645} step=14706
2022-04-20 18:10.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420180428/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:11.07 [info     ] CQL_20220420180428: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.00031184453016136124, 'time_algorithm_update': 0.02571516357667265, 'temp_loss': -0.016225207956475123, 'temp': 0.9047336789251071, 'alpha_loss': 6.530157920909904, 'alpha': 0.28659313368169886, 'critic_loss': 1428.0921677260371, 'actor_loss': 68.39427341215792, 'time_step': 0.026129611054359125, 'td_error': 168.03018048391232, 'init_value': -157.98257446289062, 'ave_value': -60.08247020547454} step=15048
2022-04-20 18:11.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420180428/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:11.16 [info     ] CQL_20220420180428: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.00030888102905094974, 'time_algorithm_update': 0.025618687707778306, 'temp_loss': 0.005453434549489914, 'temp': 0.9042964727557891, 'alpha_loss': 6.610480724022403, 'alpha': 0.27825717819713014, 'critic_loss': 1469.3577309770194, 'actor_loss': 70.4271334597939, 'time_step': 0.026026423911602176, 'td_error': 226.9416951109679, 'init_value': -162.9724884033203, 'ave_value': -61.98918771153098} step=15390
2022-04-20 18:11.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420180428/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:11.26 [info     ] CQL_20220420180428: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.00030735710210967485, 'time_algorithm_update': 0.025652069794504267, 'temp_loss': -0.018046840605985003, 'temp': 0.9054902753634759, 'alpha_loss': 6.727833807816979, 'alpha': 0.2694836494169737, 'critic_loss': 1509.0615159419544, 'actor_loss': 72.28281118158708, 'time_step': 0.02605868919551024, 'td_error': 206.13880457341818, 'init_value': -164.21319580078125, 'ave_value': -61.33932658008627} step=15732
2022-04-20 18:11.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420180428/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:11.35 [info     ] CQL_20220420180428: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.0003098549201474552, 'time_algorithm_update': 0.025688602910404316, 'temp_loss': -0.018792659087836394, 'temp': 0.9063362995086358, 'alpha_loss': 6.444880518996925, 'alpha': 0.26110269965832694, 'critic_loss': 1548.5207076937133, 'actor_loss': 74.27041634899831, 'time_step': 0.026098071483143588, 'td_error': 234.67037889614312, 'init_value': -171.5958251953125, 'ave_value': -65.16801049082129} step=16074
2022-04-20 18:11.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420180428/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:11.44 [info     ] CQL_20220420180428: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.00031182501051161025, 'time_algorithm_update': 0.02567849800600643, 'temp_loss': -0.004614735359859745, 'temp': 0.9073843565600658, 'alpha_loss': 6.321442369132014, 'alpha': 0.2529429493156093, 'critic_loss': 1587.5831380922195, 'actor_loss': 76.02277778045476, 'time_step': 0.02609140621988397, 'td_error': 253.1417403078492, 'init_value': -177.21067810058594, 'ave_value': -67.15342374012278} step=16416
2022-04-20 18:11.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420180428/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:11.54 [info     ] CQL_20220420180428: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.00030867886124995715, 'time_algorithm_update': 0.02556559495758592, 'temp_loss': -0.01416016159829689, 'temp': 0.9085121048472778, 'alpha_loss': 6.138972642128928, 'alpha': 0.24507057122145479, 'critic_loss': 1626.2017686631943, 'actor_loss': 78.0156792757804, 'time_step': 0.025974465392486393, 'td_error': 230.83108095243693, 'init_value': -181.7858428955078, 'ave_value': -68.6812989826138} step=16758
2022-04-20 18:11.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420180428/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:12.03 [info     ] CQL_20220420180428: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.00030993021022506623, 'time_algorithm_update': 0.025972926128677458, 'temp_loss': 0.04003137688299543, 'temp': 0.907205796555469, 'alpha_loss': 5.751391559316401, 'alpha': 0.23766747169327318, 'critic_loss': 1667.4069260268184, 'actor_loss': 79.92229133739806, 'time_step': 0.026381378285368982, 'td_error': 249.63713147991697, 'init_value': -186.51052856445312, 'ave_value': -71.11112707868352} step=17100
2022-04-20 18:12.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420180428/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 18:12.04 [info     ] FQE_20220420181203: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00015759037201663098, 'time_algorithm_update': 0.003864361579159656, 'loss': 0.007182935984939875, 'time_step': 0.004092306975858757, 'init_value': -0.6052467823028564, 'ave_value': -0.5556265398859978, 'soft_opc': nan} step=166




2022-04-20 18:12.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181203/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:12.05 [info     ] FQE_20220420181203: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00015591138816741575, 'time_algorithm_update': 0.0045448110764285165, 'loss': 0.006036171828486384, 'time_step': 0.004774000271257147, 'init_value': -0.7380263209342957, 'ave_value': -0.6390516607208295, 'soft_opc': nan} step=332




2022-04-20 18:12.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181203/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:12.06 [info     ] FQE_20220420181203: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00015412324882415403, 'time_algorithm_update': 0.004499673843383789, 'loss': 0.005362736798979971, 'time_step': 0.0047242153121764404, 'init_value': -0.7720921039581299, 'ave_value': -0.6502759668808262, 'soft_opc': nan} step=498




2022-04-20 18:12.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181203/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:12.07 [info     ] FQE_20220420181203: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00015645860189414886, 'time_algorithm_update': 0.004523495593702936, 'loss': 0.0051782346843932585, 'time_step': 0.004755917801914445, 'init_value': -0.8355947732925415, 'ave_value': -0.6784385355944569, 'soft_opc': nan} step=664




2022-04-20 18:12.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181203/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:12.07 [info     ] FQE_20220420181203: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00015104822365634413, 'time_algorithm_update': 0.0037835190095097185, 'loss': 0.004867278392909162, 'time_step': 0.003999589437461761, 'init_value': -0.9035918712615967, 'ave_value': -0.7242740502362852, 'soft_opc': nan} step=830




2022-04-20 18:12.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181203/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:12.08 [info     ] FQE_20220420181203: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00015846648848200418, 'time_algorithm_update': 0.004528868629271726, 'loss': 0.004686753940207502, 'time_step': 0.004757148673735469, 'init_value': -0.9336686134338379, 'ave_value': -0.733867177212829, 'soft_opc': nan} step=996




2022-04-20 18:12.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181203/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:12.09 [info     ] FQE_20220420181203: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00015677170581128224, 'time_algorithm_update': 0.004434713398117617, 'loss': 0.004580415013885821, 'time_step': 0.004662299730691565, 'init_value': -0.9821861982345581, 'ave_value': -0.7623713669610453, 'soft_opc': nan} step=1162




2022-04-20 18:12.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181203/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:12.10 [info     ] FQE_20220420181203: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00016074582754847515, 'time_algorithm_update': 0.0037255172269890107, 'loss': 0.004318591598589647, 'time_step': 0.0039605088980801135, 'init_value': -1.029706358909607, 'ave_value': -0.7976950098735255, 'soft_opc': nan} step=1328




2022-04-20 18:12.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181203/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:12.11 [info     ] FQE_20220420181203: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.0001560981015124953, 'time_algorithm_update': 0.004490589521017419, 'loss': 0.004171397022240941, 'time_step': 0.0047195374247539475, 'init_value': -1.045454502105713, 'ave_value': -0.7901294390293392, 'soft_opc': nan} step=1494




2022-04-20 18:12.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181203/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:12.12 [info     ] FQE_20220420181203: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00015604208750897143, 'time_algorithm_update': 0.004509064088384789, 'loss': 0.004237198204234108, 'time_step': 0.004737680216869676, 'init_value': -1.1129329204559326, 'ave_value': -0.8218151204623617, 'soft_opc': nan} step=1660




2022-04-20 18:12.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181203/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:12.12 [info     ] FQE_20220420181203: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.0001554201884442065, 'time_algorithm_update': 0.004449150648461767, 'loss': 0.004032601003467767, 'time_step': 0.0046767527798572215, 'init_value': -1.150589108467102, 'ave_value': -0.8337718176680642, 'soft_opc': nan} step=1826




2022-04-20 18:12.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181203/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:12.13 [info     ] FQE_20220420181203: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.0001552492739206337, 'time_algorithm_update': 0.0037015461059937992, 'loss': 0.004120876554663417, 'time_step': 0.003928134240299822, 'init_value': -1.1700923442840576, 'ave_value': -0.8280857515294809, 'soft_opc': nan} step=1992




2022-04-20 18:12.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181203/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:12.14 [info     ] FQE_20220420181203: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00015534837561917593, 'time_algorithm_update': 0.004519416625241199, 'loss': 0.004394633910080126, 'time_step': 0.004748197923223656, 'init_value': -1.2535591125488281, 'ave_value': -0.8933261649297164, 'soft_opc': nan} step=2158




2022-04-20 18:12.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181203/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:12.15 [info     ] FQE_20220420181203: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00015833004411444607, 'time_algorithm_update': 0.004573316459196159, 'loss': 0.004400411148061864, 'time_step': 0.004806877618812653, 'init_value': -1.3420199155807495, 'ave_value': -0.9689859406636642, 'soft_opc': nan} step=2324




2022-04-20 18:12.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181203/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:12.16 [info     ] FQE_20220420181203: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00015657781118369964, 'time_algorithm_update': 0.003844212336712573, 'loss': 0.0045504347626007646, 'time_step': 0.004073543720934765, 'init_value': -1.386974573135376, 'ave_value': -0.9840693563491375, 'soft_opc': nan} step=2490




2022-04-20 18:12.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181203/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:12.17 [info     ] FQE_20220420181203: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00016107473028711527, 'time_algorithm_update': 0.0045037686106670334, 'loss': 0.004719490238678563, 'time_step': 0.004736574299364205, 'init_value': -1.4803035259246826, 'ave_value': -1.073978785210633, 'soft_opc': nan} step=2656




2022-04-20 18:12.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181203/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:12.17 [info     ] FQE_20220420181203: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.0001542912908347256, 'time_algorithm_update': 0.0044858053506138815, 'loss': 0.004823665953022484, 'time_step': 0.004708371966718191, 'init_value': -1.5237551927566528, 'ave_value': -1.0716449613976586, 'soft_opc': nan} step=2822




2022-04-20 18:12.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181203/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:12.18 [info     ] FQE_20220420181203: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00015807582671383777, 'time_algorithm_update': 0.004576760602284627, 'loss': 0.005046435348487583, 'time_step': 0.004805682653404144, 'init_value': -1.5559096336364746, 'ave_value': -1.0790937442231823, 'soft_opc': nan} step=2988




2022-04-20 18:12.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181203/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:12.19 [info     ] FQE_20220420181203: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.0001552736902811441, 'time_algorithm_update': 0.0037875448364809335, 'loss': 0.005541878363891807, 'time_step': 0.004013228129191571, 'init_value': -1.6268993616104126, 'ave_value': -1.118208501105373, 'soft_opc': nan} step=3154




2022-04-20 18:12.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181203/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:12.20 [info     ] FQE_20220420181203: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00016088658068553512, 'time_algorithm_update': 0.004491472818765296, 'loss': 0.005624067910009973, 'time_step': 0.004723574741777167, 'init_value': -1.680408239364624, 'ave_value': -1.1496890029525972, 'soft_opc': nan} step=3320




2022-04-20 18:12.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181203/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:12.21 [info     ] FQE_20220420181203: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00015638678906911826, 'time_algorithm_update': 0.004526929682995899, 'loss': 0.005833826964042228, 'time_step': 0.004758511681154549, 'init_value': -1.7503094673156738, 'ave_value': -1.1952008022448501, 'soft_opc': nan} step=3486




2022-04-20 18:12.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181203/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:12.21 [info     ] FQE_20220420181203: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00015709055475441805, 'time_algorithm_update': 0.0037980711603739174, 'loss': 0.006492532997992428, 'time_step': 0.004030662846852498, 'init_value': -1.8052864074707031, 'ave_value': -1.2160401948400446, 'soft_opc': nan} step=3652




2022-04-20 18:12.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181203/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:12.22 [info     ] FQE_20220420181203: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.0001583702592964632, 'time_algorithm_update': 0.004602850201618241, 'loss': 0.006567967671996364, 'time_step': 0.004834443689828895, 'init_value': -1.900676965713501, 'ave_value': -1.3046393561067882, 'soft_opc': nan} step=3818




2022-04-20 18:12.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181203/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:12.23 [info     ] FQE_20220420181203: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00016101009874458773, 'time_algorithm_update': 0.004500610282622188, 'loss': 0.007080800505916038, 'time_step': 0.004733793706779021, 'init_value': -1.9003030061721802, 'ave_value': -1.252616199246935, 'soft_opc': nan} step=3984




2022-04-20 18:12.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181203/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:12.24 [info     ] FQE_20220420181203: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00015999466539865517, 'time_algorithm_update': 0.004535117781305888, 'loss': 0.0073891200163087485, 'time_step': 0.004770543201860175, 'init_value': -1.9759167432785034, 'ave_value': -1.300056173122144, 'soft_opc': nan} step=4150




2022-04-20 18:12.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181203/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:12.25 [info     ] FQE_20220420181203: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00015700294310788074, 'time_algorithm_update': 0.0037799226232321866, 'loss': 0.007665763855266598, 'time_step': 0.004010497805583908, 'init_value': -2.0887444019317627, 'ave_value': -1.381507192417845, 'soft_opc': nan} step=4316




2022-04-20 18:12.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181203/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:12.26 [info     ] FQE_20220420181203: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00015729450317750494, 'time_algorithm_update': 0.004526452845837696, 'loss': 0.007951906241942197, 'time_step': 0.004755564482815294, 'init_value': -2.129716157913208, 'ave_value': -1.3813245114375343, 'soft_opc': nan} step=4482




2022-04-20 18:12.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181203/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:12.26 [info     ] FQE_20220420181203: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.0001570201781858881, 'time_algorithm_update': 0.0044850197183080465, 'loss': 0.008678645519708025, 'time_step': 0.004715643733380789, 'init_value': -2.2179722785949707, 'ave_value': -1.4378972915311654, 'soft_opc': nan} step=4648




2022-04-20 18:12.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181203/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:12.27 [info     ] FQE_20220420181203: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00015662808016122105, 'time_algorithm_update': 0.0037609898900411217, 'loss': 0.009351269826601562, 'time_step': 0.00398789543703378, 'init_value': -2.306058883666992, 'ave_value': -1.504954463001844, 'soft_opc': nan} step=4814




2022-04-20 18:12.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181203/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:12.28 [info     ] FQE_20220420181203: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00015974044799804688, 'time_algorithm_update': 0.004541316664362529, 'loss': 0.009808998752009482, 'time_step': 0.00477348896394293, 'init_value': -2.3551459312438965, 'ave_value': -1.5409031486457532, 'soft_opc': nan} step=4980




2022-04-20 18:12.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181203/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:12.29 [info     ] FQE_20220420181203: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00016215910394507717, 'time_algorithm_update': 0.004506445792784174, 'loss': 0.010321492402174746, 'time_step': 0.00474264248307929, 'init_value': -2.463615894317627, 'ave_value': -1.5992642747120813, 'soft_opc': nan} step=5146




2022-04-20 18:12.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181203/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:12.30 [info     ] FQE_20220420181203: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00016206143850303558, 'time_algorithm_update': 0.004582043153693877, 'loss': 0.010841227906576571, 'time_step': 0.0048178965786853465, 'init_value': -2.5838794708251953, 'ave_value': -1.6917846298902421, 'soft_opc': nan} step=5312




2022-04-20 18:12.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181203/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:12.31 [info     ] FQE_20220420181203: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.000159292335969856, 'time_algorithm_update': 0.003790566720158221, 'loss': 0.011490678207118469, 'time_step': 0.004021327179598521, 'init_value': -2.64168119430542, 'ave_value': -1.7136212127769852, 'soft_opc': nan} step=5478




2022-04-20 18:12.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181203/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:12.31 [info     ] FQE_20220420181203: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.0001629389912249094, 'time_algorithm_update': 0.004530167005148278, 'loss': 0.01242927024358356, 'time_step': 0.004765425819948495, 'init_value': -2.6948599815368652, 'ave_value': -1.713457720082354, 'soft_opc': nan} step=5644




2022-04-20 18:12.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181203/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:12.32 [info     ] FQE_20220420181203: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00015760186206863587, 'time_algorithm_update': 0.004585892321115516, 'loss': 0.013236425265793252, 'time_step': 0.004815661763570395, 'init_value': -2.8183441162109375, 'ave_value': -1.8074704264668193, 'soft_opc': nan} step=5810




2022-04-20 18:12.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181203/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:12.33 [info     ] FQE_20220420181203: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00015620294823703995, 'time_algorithm_update': 0.0038124050002500236, 'loss': 0.013683662215438503, 'time_step': 0.004042509090469544, 'init_value': -2.8920950889587402, 'ave_value': -1.8369926295033445, 'soft_opc': nan} step=5976




2022-04-20 18:12.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181203/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:12.34 [info     ] FQE_20220420181203: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.0001590137022087373, 'time_algorithm_update': 0.004536309874201396, 'loss': 0.013930402903716606, 'time_step': 0.004769720226885325, 'init_value': -2.910815477371216, 'ave_value': -1.839549657762856, 'soft_opc': nan} step=6142




2022-04-20 18:12.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181203/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:12.35 [info     ] FQE_20220420181203: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00015832573534494424, 'time_algorithm_update': 0.004503459815519402, 'loss': 0.014534384006611913, 'time_step': 0.0047324005379734265, 'init_value': -2.964846134185791, 'ave_value': -1.8482835896715926, 'soft_opc': nan} step=6308




2022-04-20 18:12.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181203/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:12.36 [info     ] FQE_20220420181203: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00016397596841835114, 'time_algorithm_update': 0.004537323871290827, 'loss': 0.01529672201821042, 'time_step': 0.004773048033197242, 'init_value': -2.9931623935699463, 'ave_value': -1.8587860791398598, 'soft_opc': nan} step=6474




2022-04-20 18:12.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181203/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:12.36 [info     ] FQE_20220420181203: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00015671281929475716, 'time_algorithm_update': 0.003689945462238358, 'loss': 0.015901846339202386, 'time_step': 0.003919783845005265, 'init_value': -3.071364402770996, 'ave_value': -1.9179934783696055, 'soft_opc': nan} step=6640




2022-04-20 18:12.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181203/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:12.37 [info     ] FQE_20220420181203: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00015888443912368222, 'time_algorithm_update': 0.004511142351541175, 'loss': 0.01616283963190347, 'time_step': 0.0047453009938619225, 'init_value': -3.145328998565674, 'ave_value': -1.9744554196929072, 'soft_opc': nan} step=6806




2022-04-20 18:12.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181203/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:12.38 [info     ] FQE_20220420181203: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00015794369111578148, 'time_algorithm_update': 0.004557421408503889, 'loss': 0.016992356519478495, 'time_step': 0.004789269114115152, 'init_value': -3.1752049922943115, 'ave_value': -1.9906310648598649, 'soft_opc': nan} step=6972




2022-04-20 18:12.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181203/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:12.39 [info     ] FQE_20220420181203: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.000154167772775673, 'time_algorithm_update': 0.003928632621305534, 'loss': 0.017580715855948222, 'time_step': 0.004154491137309247, 'init_value': -3.1899075508117676, 'ave_value': -1.9679956843858366, 'soft_opc': nan} step=7138




2022-04-20 18:12.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181203/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:12.40 [info     ] FQE_20220420181203: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.0001641052315034062, 'time_algorithm_update': 0.004504781171499965, 'loss': 0.017757958388909787, 'time_step': 0.004743130810289498, 'init_value': -3.291861057281494, 'ave_value': -2.01983623142178, 'soft_opc': nan} step=7304




2022-04-20 18:12.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181203/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:12.41 [info     ] FQE_20220420181203: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00016130165881421193, 'time_algorithm_update': 0.004573764571224351, 'loss': 0.01858777872494321, 'time_step': 0.004809730024222868, 'init_value': -3.2747859954833984, 'ave_value': -1.9769730509871297, 'soft_opc': nan} step=7470




2022-04-20 18:12.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181203/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:12.41 [info     ] FQE_20220420181203: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00015806002789233104, 'time_algorithm_update': 0.004583861454423651, 'loss': 0.01900287242241608, 'time_step': 0.004816450268389231, 'init_value': -3.337326765060425, 'ave_value': -1.983201674090044, 'soft_opc': nan} step=7636




2022-04-20 18:12.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181203/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:12.42 [info     ] FQE_20220420181203: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00016379643635577466, 'time_algorithm_update': 0.0037748397114765212, 'loss': 0.019256598393275707, 'time_step': 0.004009143415703831, 'init_value': -3.420499801635742, 'ave_value': -2.0413401495497507, 'soft_opc': nan} step=7802




2022-04-20 18:12.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181203/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:12.43 [info     ] FQE_20220420181203: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.0001556744058448148, 'time_algorithm_update': 0.004530447075165898, 'loss': 0.020817212259021015, 'time_step': 0.004759758351797081, 'init_value': -3.380537748336792, 'ave_value': -1.9779014400802217, 'soft_opc': nan} step=7968




2022-04-20 18:12.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181203/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:12.44 [info     ] FQE_20220420181203: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00016229267579963408, 'time_algorithm_update': 0.004574317529977086, 'loss': 0.02142660465955465, 'time_step': 0.004809566290981798, 'init_value': -3.540722370147705, 'ave_value': -2.1354927492974043, 'soft_opc': nan} step=8134




2022-04-20 18:12.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181203/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:12.45 [info     ] FQE_20220420181203: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00015251033277396695, 'time_algorithm_update': 0.003805531076638095, 'loss': 0.021700171257947088, 'time_step': 0.004028739699398179, 'init_value': -3.4977712631225586, 'ave_value': -2.0444731068033892, 'soft_opc': nan} step=8300




2022-04-20 18:12.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181203/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

start
[ 0.00000000e+00  7.95731469e+08  4.27108923e-02  1.24000047e-02
  1.42999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.34732792e-01  6.00000000e-01  3.37421461e-01]
Read chunk # 39 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.38489108e-01  4.94000047e-02
 -1.56000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -7.49080829e-02  7.04145269e-02]
Read chunk # 40 out of 4999
torch.Size([44400, 6])
2022-04-20 18:12.45 [debug    ] RoundIterator is selected.
2022-04-20 18:12.45 [info     ] Directory is created at d3rlpy_logs/FQE_20220420181245
2022-04-20 18:12.45 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 18:12.45 [debug    ] Building models...
2022-04-20 18:12.45 [debug    ] Models have been built.
2022-04-20 18:12.45 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420181245/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 18:12.47 [info     ] FQE_20220420181245: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.0001657528932704482, 'time_algorithm_update': 0.004590065673340199, 'loss': 0.026623992872056225, 'time_step': 0.004831214283787927, 'init_value': -0.615872859954834, 'ave_value': -0.6578802895941981, 'soft_opc': nan} step=344




2022-04-20 18:12.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181245/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:12.49 [info     ] FQE_20220420181245: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00016549229621887207, 'time_algorithm_update': 0.004123963588892028, 'loss': 0.02256004809033732, 'time_step': 0.004360548978628114, 'init_value': -1.3532395362854004, 'ave_value': -1.4233435177521125, 'soft_opc': nan} step=688




2022-04-20 18:12.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181245/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:12.50 [info     ] FQE_20220420181245: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.0001654146715652111, 'time_algorithm_update': 0.004159877466600995, 'loss': 0.024442480211037884, 'time_step': 0.004397063754325689, 'init_value': -2.2090206146240234, 'ave_value': -2.3312446195531535, 'soft_opc': nan} step=1032




2022-04-20 18:12.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181245/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:12.52 [info     ] FQE_20220420181245: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.0001653848692428234, 'time_algorithm_update': 0.004467851200769114, 'loss': 0.026671084393963738, 'time_step': 0.0047053202640178595, 'init_value': -2.7466721534729004, 'ave_value': -2.9056468769706583, 'soft_opc': nan} step=1376




2022-04-20 18:12.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181245/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:12.54 [info     ] FQE_20220420181245: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00016741281331971635, 'time_algorithm_update': 0.004098046657650969, 'loss': 0.033286524067502904, 'time_step': 0.004338044066761815, 'init_value': -3.508521556854248, 'ave_value': -3.741367918279794, 'soft_opc': nan} step=1720




2022-04-20 18:12.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181245/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:12.55 [info     ] FQE_20220420181245: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00016478951587233434, 'time_algorithm_update': 0.004481224126594011, 'loss': 0.03958241708121847, 'time_step': 0.004718666159829428, 'init_value': -3.9673352241516113, 'ave_value': -4.240291709634098, 'soft_opc': nan} step=2064




2022-04-20 18:12.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181245/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:12.57 [info     ] FQE_20220420181245: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00016858342082001442, 'time_algorithm_update': 0.004103365332581276, 'loss': 0.049048615497710224, 'time_step': 0.004344054432802422, 'init_value': -4.602118968963623, 'ave_value': -4.925072318221535, 'soft_opc': nan} step=2408




2022-04-20 18:12.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181245/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:12.59 [info     ] FQE_20220420181245: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00016714805780455123, 'time_algorithm_update': 0.004474440979403119, 'loss': 0.06079144826223857, 'time_step': 0.004713123620942582, 'init_value': -4.854635715484619, 'ave_value': -5.256418270597587, 'soft_opc': nan} step=2752




2022-04-20 18:12.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181245/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:13.00 [info     ] FQE_20220420181245: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00016986353452815565, 'time_algorithm_update': 0.004245809344358222, 'loss': 0.0722083100198963, 'time_step': 0.004489860562391059, 'init_value': -5.120675086975098, 'ave_value': -5.564939188480646, 'soft_opc': nan} step=3096




2022-04-20 18:13.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181245/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:13.02 [info     ] FQE_20220420181245: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00016689577768015307, 'time_algorithm_update': 0.0041633684967839445, 'loss': 0.08712560163139431, 'time_step': 0.004402667977089106, 'init_value': -5.647125720977783, 'ave_value': -6.203335561685742, 'soft_opc': nan} step=3440




2022-04-20 18:13.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181245/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:13.04 [info     ] FQE_20220420181245: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00016426347022832825, 'time_algorithm_update': 0.004497650057770485, 'loss': 0.09759882857568215, 'time_step': 0.004733604054118312, 'init_value': -5.973702430725098, 'ave_value': -6.608194855231632, 'soft_opc': nan} step=3784




2022-04-20 18:13.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181245/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:13.05 [info     ] FQE_20220420181245: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.0001684288645899573, 'time_algorithm_update': 0.0041161997373714, 'loss': 0.11544515296470287, 'time_step': 0.004354062468506569, 'init_value': -6.3940277099609375, 'ave_value': -7.202152478806506, 'soft_opc': nan} step=4128




2022-04-20 18:13.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181245/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:13.07 [info     ] FQE_20220420181245: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00016448456187580907, 'time_algorithm_update': 0.004482979691305826, 'loss': 0.1301760236938419, 'time_step': 0.004717458126156829, 'init_value': -6.681342124938965, 'ave_value': -7.578976575224786, 'soft_opc': nan} step=4472




2022-04-20 18:13.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181245/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:13.09 [info     ] FQE_20220420181245: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.0001661812150201132, 'time_algorithm_update': 0.004080879826878392, 'loss': 0.15110525963719673, 'time_step': 0.004317146401072658, 'init_value': -7.137371063232422, 'ave_value': -8.126750474013933, 'soft_opc': nan} step=4816




2022-04-20 18:13.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181245/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:13.11 [info     ] FQE_20220420181245: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.0001630748427191446, 'time_algorithm_update': 0.004462959461433943, 'loss': 0.16541669334198328, 'time_step': 0.004695784907008327, 'init_value': -7.543946266174316, 'ave_value': -8.600076150584982, 'soft_opc': nan} step=5160




2022-04-20 18:13.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181245/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:13.12 [info     ] FQE_20220420181245: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00016243651855823605, 'time_algorithm_update': 0.004260087429090988, 'loss': 0.18095049683849307, 'time_step': 0.004496809355048246, 'init_value': -7.819049835205078, 'ave_value': -8.948596349992027, 'soft_opc': nan} step=5504




2022-04-20 18:13.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181245/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:13.14 [info     ] FQE_20220420181245: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00016900065333344216, 'time_algorithm_update': 0.004113116236620171, 'loss': 0.20128881382799252, 'time_step': 0.0043578140957410945, 'init_value': -8.043251991271973, 'ave_value': -9.220096581064894, 'soft_opc': nan} step=5848




2022-04-20 18:13.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181245/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:13.16 [info     ] FQE_20220420181245: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00016898748486541038, 'time_algorithm_update': 0.00454279572464699, 'loss': 0.216213071621348, 'time_step': 0.0047868379326753835, 'init_value': -8.28930377960205, 'ave_value': -9.586404870967339, 'soft_opc': nan} step=6192




2022-04-20 18:13.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181245/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:13.17 [info     ] FQE_20220420181245: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00016592962797297987, 'time_algorithm_update': 0.003469816474027412, 'loss': 0.23059802510515723, 'time_step': 0.003709902597028156, 'init_value': -8.462909698486328, 'ave_value': -9.928634131976471, 'soft_opc': nan} step=6536




2022-04-20 18:13.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181245/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:13.18 [info     ] FQE_20220420181245: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00016194997831832532, 'time_algorithm_update': 0.0033579524173293004, 'loss': 0.23853683024428265, 'time_step': 0.0035943000815635505, 'init_value': -8.42490291595459, 'ave_value': -10.001822727632936, 'soft_opc': nan} step=6880




2022-04-20 18:13.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181245/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:13.20 [info     ] FQE_20220420181245: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00016229374464168104, 'time_algorithm_update': 0.0033626189065534013, 'loss': 0.2529313427905097, 'time_step': 0.0035970543706139854, 'init_value': -8.838643074035645, 'ave_value': -10.457065516666653, 'soft_opc': nan} step=7224




2022-04-20 18:13.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181245/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:13.21 [info     ] FQE_20220420181245: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00016290503878926122, 'time_algorithm_update': 0.003438075614529987, 'loss': 0.2681489069079764, 'time_step': 0.0036731639573740404, 'init_value': -9.167562484741211, 'ave_value': -10.85856353199164, 'soft_opc': nan} step=7568




2022-04-20 18:13.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181245/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:13.22 [info     ] FQE_20220420181245: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00016429604485977526, 'time_algorithm_update': 0.003441863974859548, 'loss': 0.2895855885865383, 'time_step': 0.0036807919657507607, 'init_value': -9.350418090820312, 'ave_value': -11.107321040575636, 'soft_opc': nan} step=7912




2022-04-20 18:13.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181245/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:13.24 [info     ] FQE_20220420181245: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.000166553397511327, 'time_algorithm_update': 0.0034888553064922954, 'loss': 0.30154623143743114, 'time_step': 0.003727482501850572, 'init_value': -9.684087753295898, 'ave_value': -11.513552354791344, 'soft_opc': nan} step=8256




2022-04-20 18:13.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181245/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:13.25 [info     ] FQE_20220420181245: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00016391485236411872, 'time_algorithm_update': 0.0033964251363000205, 'loss': 0.3152681471790772, 'time_step': 0.0036338574664537297, 'init_value': -10.114828109741211, 'ave_value': -11.890907490488377, 'soft_opc': nan} step=8600




2022-04-20 18:13.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181245/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:13.27 [info     ] FQE_20220420181245: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.0001635766306588816, 'time_algorithm_update': 0.0033597564974496533, 'loss': 0.3363884010048973, 'time_step': 0.003595777722292168, 'init_value': -10.556925773620605, 'ave_value': -12.417951558088163, 'soft_opc': nan} step=8944




2022-04-20 18:13.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181245/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:13.28 [info     ] FQE_20220420181245: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00016700181850167207, 'time_algorithm_update': 0.003435692814893501, 'loss': 0.3468091803559557, 'time_step': 0.003676711819892706, 'init_value': -10.708775520324707, 'ave_value': -12.59460671162988, 'soft_opc': nan} step=9288




2022-04-20 18:13.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181245/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:13.29 [info     ] FQE_20220420181245: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00016588249871897143, 'time_algorithm_update': 0.0033865952214529346, 'loss': 0.36421476366258293, 'time_step': 0.003623755172241566, 'init_value': -10.525362014770508, 'ave_value': -12.378114612264609, 'soft_opc': nan} step=9632




2022-04-20 18:13.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181245/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:13.31 [info     ] FQE_20220420181245: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00016355237295461256, 'time_algorithm_update': 0.003433457640714424, 'loss': 0.37897514398642923, 'time_step': 0.003669144109238026, 'init_value': -10.876922607421875, 'ave_value': -12.725988553869664, 'soft_opc': nan} step=9976




2022-04-20 18:13.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181245/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:13.32 [info     ] FQE_20220420181245: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.0001657022986301156, 'time_algorithm_update': 0.0034279781718586765, 'loss': 0.39119940089754934, 'time_step': 0.0036655747613241504, 'init_value': -11.023911476135254, 'ave_value': -12.948111159792003, 'soft_opc': nan} step=10320




2022-04-20 18:13.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181245/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:13.33 [info     ] FQE_20220420181245: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00016713211702746014, 'time_algorithm_update': 0.003435468257859696, 'loss': 0.4095244595705163, 'time_step': 0.003675172495287518, 'init_value': -10.85886287689209, 'ave_value': -12.887049767882395, 'soft_opc': nan} step=10664




2022-04-20 18:13.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181245/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:13.35 [info     ] FQE_20220420181245: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.0001668867676757103, 'time_algorithm_update': 0.003435103699218395, 'loss': 0.4201881969889063, 'time_step': 0.0036720508752867234, 'init_value': -11.383216857910156, 'ave_value': -13.428423480509764, 'soft_opc': nan} step=11008




2022-04-20 18:13.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181245/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:13.36 [info     ] FQE_20220420181245: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00016443604646727096, 'time_algorithm_update': 0.00347253680229187, 'loss': 0.4311957699679878, 'time_step': 0.0037098838839420053, 'init_value': -11.715800285339355, 'ave_value': -13.78318383334255, 'soft_opc': nan} step=11352




2022-04-20 18:13.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181245/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:13.38 [info     ] FQE_20220420181245: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00016368128532587097, 'time_algorithm_update': 0.003448299197263496, 'loss': 0.4444779049345227, 'time_step': 0.0036865666855213255, 'init_value': -11.773609161376953, 'ave_value': -13.910335641264336, 'soft_opc': nan} step=11696




2022-04-20 18:13.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181245/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:13.39 [info     ] FQE_20220420181245: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00016610220421192258, 'time_algorithm_update': 0.0034528222194937772, 'loss': 0.4554828146667501, 'time_step': 0.0036896259285682854, 'init_value': -12.000334739685059, 'ave_value': -14.241508743337247, 'soft_opc': nan} step=12040




2022-04-20 18:13.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181245/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:13.40 [info     ] FQE_20220420181245: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00016367158224416334, 'time_algorithm_update': 0.003397459207579147, 'loss': 0.4574923133964889, 'time_step': 0.0036338990510896194, 'init_value': -12.03245735168457, 'ave_value': -14.35593564133055, 'soft_opc': nan} step=12384




2022-04-20 18:13.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181245/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:13.42 [info     ] FQE_20220420181245: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00016307622887367425, 'time_algorithm_update': 0.0034224099891130315, 'loss': 0.47544667659175777, 'time_step': 0.0036584187385647797, 'init_value': -12.231802940368652, 'ave_value': -14.795855816082005, 'soft_opc': nan} step=12728




2022-04-20 18:13.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181245/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:13.43 [info     ] FQE_20220420181245: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00016436119412266932, 'time_algorithm_update': 0.0034311191980228866, 'loss': 0.48469417509731166, 'time_step': 0.003669605005619138, 'init_value': -12.14278507232666, 'ave_value': -14.738653286845793, 'soft_opc': nan} step=13072




2022-04-20 18:13.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181245/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:13.44 [info     ] FQE_20220420181245: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00016459614731544671, 'time_algorithm_update': 0.0034017091573670852, 'loss': 0.48770226010251355, 'time_step': 0.003638168407040973, 'init_value': -12.005845069885254, 'ave_value': -14.648258189130827, 'soft_opc': nan} step=13416




2022-04-20 18:13.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181245/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:13.46 [info     ] FQE_20220420181245: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00016423782636952955, 'time_algorithm_update': 0.003415277530980665, 'loss': 0.5001307422901655, 'time_step': 0.003654211066489996, 'init_value': -12.122688293457031, 'ave_value': -14.798116318518423, 'soft_opc': nan} step=13760




2022-04-20 18:13.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181245/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:13.47 [info     ] FQE_20220420181245: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00016512843065483626, 'time_algorithm_update': 0.0034559389879537184, 'loss': 0.5133456168750431, 'time_step': 0.0036951421305190684, 'init_value': -12.453926086425781, 'ave_value': -15.175607516349771, 'soft_opc': nan} step=14104




2022-04-20 18:13.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181245/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:13.49 [info     ] FQE_20220420181245: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.0001681412375250528, 'time_algorithm_update': 0.0035371988318687263, 'loss': 0.5340405980796489, 'time_step': 0.0037809735120728958, 'init_value': -12.780641555786133, 'ave_value': -15.611879196244098, 'soft_opc': nan} step=14448




2022-04-20 18:13.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181245/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:13.50 [info     ] FQE_20220420181245: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.0001661625019339628, 'time_algorithm_update': 0.0034908777059510696, 'loss': 0.5455836984437219, 'time_step': 0.0037303934263628585, 'init_value': -13.050060272216797, 'ave_value': -15.817699409831055, 'soft_opc': nan} step=14792




2022-04-20 18:13.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181245/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:13.51 [info     ] FQE_20220420181245: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.0001643792141315549, 'time_algorithm_update': 0.003448535536610803, 'loss': 0.5679909167161514, 'time_step': 0.0036849545878033306, 'init_value': -13.35671329498291, 'ave_value': -16.017508844373463, 'soft_opc': nan} step=15136




2022-04-20 18:13.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181245/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:13.53 [info     ] FQE_20220420181245: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.0001641331717025402, 'time_algorithm_update': 0.0034278873787369837, 'loss': 0.5801324451039004, 'time_step': 0.003666233184725739, 'init_value': -13.883377075195312, 'ave_value': -16.4771231007112, 'soft_opc': nan} step=15480




2022-04-20 18:13.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181245/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:13.54 [info     ] FQE_20220420181245: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00016520466915396757, 'time_algorithm_update': 0.0029957058817841288, 'loss': 0.5977563579122774, 'time_step': 0.0032310284847436948, 'init_value': -14.354198455810547, 'ave_value': -16.752090712058617, 'soft_opc': nan} step=15824




2022-04-20 18:13.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181245/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:13.55 [info     ] FQE_20220420181245: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00016596081644989724, 'time_algorithm_update': 0.0034271395483682324, 'loss': 0.6167680615028583, 'time_step': 0.00366700527279876, 'init_value': -15.033340454101562, 'ave_value': -17.55923609593925, 'soft_opc': nan} step=16168




2022-04-20 18:13.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181245/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:13.57 [info     ] FQE_20220420181245: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00016393703083659327, 'time_algorithm_update': 0.003404672755751499, 'loss': 0.6306469167019565, 'time_step': 0.0036391574282978855, 'init_value': -14.960283279418945, 'ave_value': -17.337969285206082, 'soft_opc': nan} step=16512




2022-04-20 18:13.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181245/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:13.58 [info     ] FQE_20220420181245: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00016182938287424487, 'time_algorithm_update': 0.0034609450850375863, 'loss': 0.6321569965437575, 'time_step': 0.0036965296712032584, 'init_value': -15.162269592285156, 'ave_value': -17.377166847012905, 'soft_opc': nan} step=16856




2022-04-20 18:13.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181245/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:14.00 [info     ] FQE_20220420181245: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00016760202341301497, 'time_algorithm_update': 0.003453615099884743, 'loss': 0.6388811899652315, 'time_step': 0.0036950568820154944, 'init_value': -15.672141075134277, 'ave_value': -17.800022828478266, 'soft_opc': nan} step=17200




2022-04-20 18:14.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420181245/model_17200.pt
search iteration:  18
using hyper params:  [0.008861440678467232, 0.009829241946167565, 5.900023514589116e-05, 5]
2022-04-20 18:14.00 [debug    ] RoundIterator is selected.
2022-04-20 18:14.00 [info     ] Directory is created at d3rlpy_logs/CQL_20220420181400
2022-04-20 18:14.00 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 18:14.00 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 18:14.00 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220420181400/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.008861440678467232, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'we

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:14.08 [info     ] CQL_20220420181400: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.00035937278591401394, 'time_algorithm_update': 0.024118609595717044, 'temp_loss': 4.542811706749319, 'temp': 0.9903984327762447, 'alpha_loss': -13.458434301510191, 'alpha': 1.0155528131981342, 'critic_loss': 32.14184312095419, 'actor_loss': 4.554454176136625, 'time_step': 0.02457431673306471, 'td_error': 5.048883072370332, 'init_value': -11.772903442382812, 'ave_value': -7.388086926656531} step=342
2022-04-20 18:14.08 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420181400/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:14.17 [info     ] CQL_20220420181400: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.0003598621714184856, 'time_algorithm_update': 0.024211797100758693, 'temp_loss': 3.574476018286588, 'temp': 0.9721697549722348, 'alpha_loss': -4.420560254285123, 'alpha': 1.0356051109687627, 'critic_loss': 27.08594941256339, 'actor_loss': 11.27311004672134, 'time_step': 0.024674073994508265, 'td_error': 6.449936006427842, 'init_value': -20.553714752197266, 'ave_value': -12.093146583555384} step=684
2022-04-20 18:14.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420181400/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:14.26 [info     ] CQL_20220420181400: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.0003557832617508738, 'time_algorithm_update': 0.023995845638520537, 'temp_loss': 2.65638610284928, 'temp': 0.9571658904440918, 'alpha_loss': 0.8160534209097948, 'alpha': 1.0413432504698548, 'critic_loss': 54.723499141938504, 'actor_loss': 17.874848273762485, 'time_step': 0.024454629909225374, 'td_error': 10.137149905424856, 'init_value': -29.742284774780273, 'ave_value': -17.13406857888441} step=1026
2022-04-20 18:14.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420181400/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:14.35 [info     ] CQL_20220420181400: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.0003637877124094824, 'time_algorithm_update': 0.02410443623860677, 'temp_loss': 2.0089218058781317, 'temp': 0.9443332725449612, 'alpha_loss': 4.756832312073624, 'alpha': 1.0270342771072833, 'critic_loss': 90.31077419526396, 'actor_loss': 24.319838997913383, 'time_step': 0.024565164805852878, 'td_error': 17.122177114358855, 'init_value': -39.67021560668945, 'ave_value': -23.07927923853937} step=1368
2022-04-20 18:14.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420181400/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:14.44 [info     ] CQL_20220420181400: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.00035652500844141196, 'time_algorithm_update': 0.024022382602357027, 'temp_loss': 1.506709991491329, 'temp': 0.9331828406331135, 'alpha_loss': 7.80790370528461, 'alpha': 0.9939296304831031, 'critic_loss': 129.92297291895102, 'actor_loss': 30.610651930870368, 'time_step': 0.024479052476715623, 'td_error': 22.45996010121521, 'init_value': -49.17795181274414, 'ave_value': -27.99383482688048} step=1710
2022-04-20 18:14.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420181400/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:14.52 [info     ] CQL_20220420181400: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.00036016193746823315, 'time_algorithm_update': 0.024344917626408804, 'temp_loss': 1.1199509824751412, 'temp': 0.9236262829331626, 'alpha_loss': 9.730457749283104, 'alpha': 0.9528582568405665, 'critic_loss': 172.68738752220108, 'actor_loss': 36.70274967617459, 'time_step': 0.024803286407426087, 'td_error': 26.39476962918808, 'init_value': -57.346153259277344, 'ave_value': -33.10424428605684} step=2052
2022-04-20 18:14.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420181400/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:15.01 [info     ] CQL_20220420181400: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.00035587179730510156, 'time_algorithm_update': 0.0243728488509418, 'temp_loss': 0.8173679021889704, 'temp': 0.9154425927421503, 'alpha_loss': 11.154187159231531, 'alpha': 0.9121310141002923, 'critic_loss': 219.76049238059952, 'actor_loss': 42.63564271536487, 'time_step': 0.024830183090522276, 'td_error': 46.01285499433187, 'init_value': -70.3509292602539, 'ave_value': -39.185451921301116} step=2394
2022-04-20 18:15.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420181400/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:15.10 [info     ] CQL_20220420181400: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.0003601988853766904, 'time_algorithm_update': 0.024009394366838778, 'temp_loss': 0.5250336396785682, 'temp': 0.908959365204761, 'alpha_loss': 12.158454447461848, 'alpha': 0.8731499052884286, 'critic_loss': 271.79363223962616, 'actor_loss': 48.585623568261575, 'time_step': 0.02446682341614662, 'td_error': 51.85002608940512, 'init_value': -77.32666015625, 'ave_value': -43.67647926140476} step=2736
2022-04-20 18:15.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420181400/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:15.19 [info     ] CQL_20220420181400: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.0003540578641389546, 'time_algorithm_update': 0.023798948142960755, 'temp_loss': 0.26553358457307197, 'temp': 0.9043746806724727, 'alpha_loss': 13.652004110882855, 'alpha': 0.8371848648751689, 'critic_loss': 328.4814872518618, 'actor_loss': 54.4166078846357, 'time_step': 0.024255839007639744, 'td_error': 72.87564020517678, 'init_value': -86.9939193725586, 'ave_value': -49.002118112999305} step=3078
2022-04-20 18:15.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420181400/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:15.28 [info     ] CQL_20220420181400: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.00035645668966728346, 'time_algorithm_update': 0.024105565589770936, 'temp_loss': 0.12663751411965193, 'temp': 0.9019086740867436, 'alpha_loss': 14.888614311552884, 'alpha': 0.8019872255492628, 'critic_loss': 401.9489782679151, 'actor_loss': 61.01153341371413, 'time_step': 0.024561242053383274, 'td_error': 91.8870706804323, 'init_value': -99.9032974243164, 'ave_value': -53.220691125380164} step=3420
2022-04-20 18:15.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420181400/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:15.37 [info     ] CQL_20220420181400: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.00036163706528513054, 'time_algorithm_update': 0.025139933441117493, 'temp_loss': -0.12972366907878927, 'temp': 0.9018120467662811, 'alpha_loss': 17.121497260199654, 'alpha': 0.7690227136277316, 'critic_loss': 480.51558832536665, 'actor_loss': 67.5886484893442, 'time_step': 0.025601327070715833, 'td_error': 163.08288319694628, 'init_value': -119.945068359375, 'ave_value': -63.14781791790246} step=3762
2022-04-20 18:15.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420181400/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:15.46 [info     ] CQL_20220420181400: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.00033713641919587786, 'time_algorithm_update': 0.02432502780044288, 'temp_loss': -0.2928045032959845, 'temp': 0.9061023951971043, 'alpha_loss': 20.20978340070847, 'alpha': 0.7351461472915627, 'critic_loss': 597.4612620392738, 'actor_loss': 76.0288954840766, 'time_step': 0.024752222306547108, 'td_error': 302.82356886682135, 'init_value': -138.73092651367188, 'ave_value': -69.66595864254276} step=4104
2022-04-20 18:15.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420181400/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:15.54 [info     ] CQL_20220420181400: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.00034276783814904285, 'time_algorithm_update': 0.024385228491666026, 'temp_loss': -0.0814569058517615, 'temp': 0.9104109584239491, 'alpha_loss': 21.435706665641383, 'alpha': 0.7045923682332736, 'critic_loss': 744.4717653508771, 'actor_loss': 85.03722341437089, 'time_step': 0.024822952454550226, 'td_error': 1026.0927720888194, 'init_value': -182.45669555664062, 'ave_value': -82.93395266253668} step=4446
2022-04-20 18:15.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420181400/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:16.04 [info     ] CQL_20220420181400: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.00035084688175491424, 'time_algorithm_update': 0.0250810078013013, 'temp_loss': -0.06350187052651281, 'temp': 0.9114298803067347, 'alpha_loss': 20.381081404044615, 'alpha': 0.6759586496311322, 'critic_loss': 929.6520721257082, 'actor_loss': 94.33186857881601, 'time_step': 0.02553024905466894, 'td_error': 704.9227058028694, 'init_value': -205.5321502685547, 'ave_value': -87.9333406963233} step=4788
2022-04-20 18:16.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420181400/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:16.13 [info     ] CQL_20220420181400: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.000358528561062283, 'time_algorithm_update': 0.025504676919234425, 'temp_loss': -0.05695558248948284, 'temp': 0.9137867364967078, 'alpha_loss': 19.00881372976024, 'alpha': 0.6535069994759142, 'critic_loss': 1075.733895820484, 'actor_loss': 102.21428700497276, 'time_step': 0.02596658642529047, 'td_error': 1142.6086188284594, 'init_value': -231.87973022460938, 'ave_value': -93.43713556182144} step=5130
2022-04-20 18:16.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420181400/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:16.22 [info     ] CQL_20220420181400: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.00036578917363930865, 'time_algorithm_update': 0.025658185021919116, 'temp_loss': -0.052226027849604166, 'temp': 0.9158458997283065, 'alpha_loss': 19.05196199640196, 'alpha': 0.6308051856637699, 'critic_loss': 1258.9798119974416, 'actor_loss': 112.05891623692206, 'time_step': 0.026123909922371135, 'td_error': 679.6019957115176, 'init_value': -263.9901428222656, 'ave_value': -99.1999093560747} step=5472
2022-04-20 18:16.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420181400/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:16.32 [info     ] CQL_20220420181400: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.00035968091752794053, 'time_algorithm_update': 0.025499239999648423, 'temp_loss': -0.08588238525046417, 'temp': 0.9180210964721546, 'alpha_loss': 18.459643387480785, 'alpha': 0.6114740340333236, 'critic_loss': 1425.9430492021884, 'actor_loss': 121.24610570717974, 'time_step': 0.025960948035033824, 'td_error': 2797.1334369002925, 'init_value': -335.6651916503906, 'ave_value': -115.5432417281952} step=5814
2022-04-20 18:16.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420181400/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:16.41 [info     ] CQL_20220420181400: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.00036149973060652525, 'time_algorithm_update': 0.02558471515164738, 'temp_loss': 0.06461753606883405, 'temp': 0.91769573319028, 'alpha_loss': 19.088714327379975, 'alpha': 0.5934024115054928, 'critic_loss': 1587.6297132703994, 'actor_loss': 129.35932487353944, 'time_step': 0.02604657307005765, 'td_error': 2602.412743749819, 'init_value': -358.42041015625, 'ave_value': -114.5024840031148} step=6156
2022-04-20 18:16.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420181400/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:16.50 [info     ] CQL_20220420181400: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.0003623028247677095, 'time_algorithm_update': 0.025680744159988493, 'temp_loss': 0.011336055720410151, 'temp': 0.917216926638843, 'alpha_loss': 18.302098565544302, 'alpha': 0.5732048607011985, 'critic_loss': 1781.0073370682567, 'actor_loss': 138.51981962772837, 'time_step': 0.02614426612854004, 'td_error': 1736.7305759902379, 'init_value': -394.5504455566406, 'ave_value': -119.90083015672526} step=6498
2022-04-20 18:16.50 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420181400/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:17.00 [info     ] CQL_20220420181400: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.0003664388991238778, 'time_algorithm_update': 0.025329172959801746, 'temp_loss': 0.02497900162458594, 'temp': 0.9150320697597593, 'alpha_loss': 14.0969415456928, 'alpha': 0.5587986568261308, 'critic_loss': 1880.4419255842242, 'actor_loss': 142.17742844074093, 'time_step': 0.02579528616185774, 'td_error': 4165.671982775328, 'init_value': -452.2767028808594, 'ave_value': -131.80320255692143} step=6840
2022-04-20 18:17.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420181400/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:17.09 [info     ] CQL_20220420181400: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.0003609692144115069, 'time_algorithm_update': 0.025614831182691786, 'temp_loss': 0.026917662890900296, 'temp': 0.9156363738907708, 'alpha_loss': 14.017568777375228, 'alpha': 0.5431039056234193, 'critic_loss': 1977.3607920150312, 'actor_loss': 147.94364164028948, 'time_step': 0.026075543018809537, 'td_error': 1933.8166168483394, 'init_value': -415.1709899902344, 'ave_value': -126.19008387982443} step=7182
2022-04-20 18:17.09 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420181400/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:17.18 [info     ] CQL_20220420181400: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.0003596781290065475, 'time_algorithm_update': 0.025578523937024568, 'temp_loss': 0.0761248755884188, 'temp': 0.913630842988254, 'alpha_loss': 9.514760471226877, 'alpha': 0.5321313052149544, 'critic_loss': 1945.6131745277091, 'actor_loss': 143.87087680303563, 'time_step': 0.026040570777759217, 'td_error': 1015.971624890188, 'init_value': -403.8075866699219, 'ave_value': -128.86209097443938} step=7524
2022-04-20 18:17.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420181400/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:17.28 [info     ] CQL_20220420181400: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.00035648039209912395, 'time_algorithm_update': 0.025647703667133176, 'temp_loss': 0.049314313726118435, 'temp': 0.9103573022181528, 'alpha_loss': 9.375639871198532, 'alpha': 0.5217923971644619, 'critic_loss': 1893.44621160853, 'actor_loss': 142.24799489696125, 'time_step': 0.026106671980249952, 'td_error': 724.7956738226545, 'init_value': -372.2362060546875, 'ave_value': -123.08039959730195} step=7866
2022-04-20 18:17.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420181400/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:17.37 [info     ] CQL_20220420181400: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.00036214527330900495, 'time_algorithm_update': 0.02532097540403667, 'temp_loss': 0.04684029494504831, 'temp': 0.9086157048654835, 'alpha_loss': 8.920853179797792, 'alpha': 0.5108799014175147, 'critic_loss': 1836.0408771358736, 'actor_loss': 140.04060533311633, 'time_step': 0.025782811014275802, 'td_error': 675.7341136185016, 'init_value': -357.1087341308594, 'ave_value': -123.54502821231546} step=8208
2022-04-20 18:17.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420181400/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:17.46 [info     ] CQL_20220420181400: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.000366026895088062, 'time_algorithm_update': 0.025666062594854343, 'temp_loss': -0.06337783949678405, 'temp': 0.9088828403350205, 'alpha_loss': 9.474636991818747, 'alpha': 0.4989934098302272, 'critic_loss': 1768.3182879888523, 'actor_loss': 137.90546435082865, 'time_step': 0.026129141188504405, 'td_error': 434.89470312824966, 'init_value': -340.8251647949219, 'ave_value': -121.98212009201984} step=8550
2022-04-20 18:17.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420181400/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:17.56 [info     ] CQL_20220420181400: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.0003639118016114709, 'time_algorithm_update': 0.025499965015210603, 'temp_loss': -0.1236255363130953, 'temp': 0.9134513805484215, 'alpha_loss': 10.673992517398812, 'alpha': 0.48572370026543826, 'critic_loss': 1722.3656030844527, 'actor_loss': 137.3640142965038, 'time_step': 0.025965571403503418, 'td_error': 563.7379070963908, 'init_value': -329.9361572265625, 'ave_value': -121.22136950661034} step=8892
2022-04-20 18:17.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420181400/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:18.05 [info     ] CQL_20220420181400: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.0003641551001030102, 'time_algorithm_update': 0.025695459884509705, 'temp_loss': -0.12710721638293293, 'temp': 0.9196356451302244, 'alpha_loss': 10.24370172706961, 'alpha': 0.4715796410166032, 'critic_loss': 1688.856801172446, 'actor_loss': 137.03503736975597, 'time_step': 0.026160650783114962, 'td_error': 730.8346935583186, 'init_value': -337.9192810058594, 'ave_value': -120.57314021042943} step=9234
2022-04-20 18:18.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420181400/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:18.14 [info     ] CQL_20220420181400: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.0003630382972851134, 'time_algorithm_update': 0.02571607263464677, 'temp_loss': -0.0898526776473076, 'temp': 0.9255712851446275, 'alpha_loss': 10.618450546473788, 'alpha': 0.45754358494839475, 'critic_loss': 1665.8542830260874, 'actor_loss': 136.98767899630363, 'time_step': 0.026180136970609252, 'td_error': 761.8134286388622, 'init_value': -319.1946716308594, 'ave_value': -120.61088339349406} step=9576
2022-04-20 18:18.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420181400/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:18.24 [info     ] CQL_20220420181400: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.0003596007475378918, 'time_algorithm_update': 0.025423846746745863, 'temp_loss': -0.09717611015409405, 'temp': 0.9308270514011383, 'alpha_loss': 9.778830348748214, 'alpha': 0.4438553133554626, 'critic_loss': 1652.7999674479167, 'actor_loss': 137.1362797921164, 'time_step': 0.02588586291374519, 'td_error': 638.553723420129, 'init_value': -307.99554443359375, 'ave_value': -121.1581092553485} step=9918
2022-04-20 18:18.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420181400/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:18.33 [info     ] CQL_20220420181400: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.0003589252282304373, 'time_algorithm_update': 0.024956633473000332, 'temp_loss': -0.05276292559216943, 'temp': 0.9351734532947429, 'alpha_loss': 9.97104785595721, 'alpha': 0.43091737814465464, 'critic_loss': 1640.3987462450887, 'actor_loss': 137.2118847718713, 'time_step': 0.025412995215745, 'td_error': 680.6396897077367, 'init_value': -308.0318908691406, 'ave_value': -123.00450795976965} step=10260
2022-04-20 18:18.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420181400/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:18.41 [info     ] CQL_20220420181400: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.00035561664759764197, 'time_algorithm_update': 0.02382554715139824, 'temp_loss': -0.019988398683088566, 'temp': 0.936409926902481, 'alpha_loss': 9.6933662019975, 'alpha': 0.4181053983537774, 'critic_loss': 1635.8277102464822, 'actor_loss': 137.78306724592957, 'time_step': 0.024280528576053374, 'td_error': 604.3079317090912, 'init_value': -299.8945617675781, 'ave_value': -123.30839244338306} step=10602
2022-04-20 18:18.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420181400/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:18.50 [info     ] CQL_20220420181400: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.00035836961534288194, 'time_algorithm_update': 0.0239982709550021, 'temp_loss': -0.016829519118699763, 'temp': 0.9376920749220932, 'alpha_loss': 9.774548250332213, 'alpha': 0.4055375305184147, 'critic_loss': 1633.3964426141035, 'actor_loss': 138.09761281599077, 'time_step': 0.024452628447995547, 'td_error': 801.4492472889153, 'init_value': -288.21368408203125, 'ave_value': -122.12547750082538} step=10944
2022-04-20 18:18.50 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420181400/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:18.59 [info     ] CQL_20220420181400: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.0003532073651140893, 'time_algorithm_update': 0.02388371919330798, 'temp_loss': -0.029058766153733633, 'temp': 0.9391073300475963, 'alpha_loss': 9.937858273411354, 'alpha': 0.39316417520854907, 'critic_loss': 1633.834648623104, 'actor_loss': 138.3214734618427, 'time_step': 0.024339483495344195, 'td_error': 1042.3913810439412, 'init_value': -279.79547119140625, 'ave_value': -120.07470721236724} step=11286
2022-04-20 18:18.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420181400/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:19.08 [info     ] CQL_20220420181400: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.00035624824769315666, 'time_algorithm_update': 0.024027738654822633, 'temp_loss': -0.1639798719446823, 'temp': 0.9451043501234891, 'alpha_loss': 12.871107766502782, 'alpha': 0.37904872008931567, 'critic_loss': 1650.9333656712581, 'actor_loss': 140.72556659631562, 'time_step': 0.024480744412070828, 'td_error': 573.5356890070182, 'init_value': -287.01287841796875, 'ave_value': -124.86837984232216} step=11628
2022-04-20 18:19.08 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420181400/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:19.16 [info     ] CQL_20220420181400: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.00035160884522555166, 'time_algorithm_update': 0.02383121342686882, 'temp_loss': -0.04140147940413645, 'temp': 0.9510441600230702, 'alpha_loss': 9.575077123460714, 'alpha': 0.36661759942595723, 'critic_loss': 1675.4503773471765, 'actor_loss': 140.7777748330992, 'time_step': 0.02428323553319563, 'td_error': 369.0939118911142, 'init_value': -276.47662353515625, 'ave_value': -123.60319047306942} step=11970
2022-04-20 18:19.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420181400/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:19.25 [info     ] CQL_20220420181400: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.00035362982610512896, 'time_algorithm_update': 0.02365200421963519, 'temp_loss': 0.0020864672512251733, 'temp': 0.9526274375051086, 'alpha_loss': 7.64572094476711, 'alpha': 0.3570928153587364, 'critic_loss': 1665.797884400128, 'actor_loss': 139.76977518985146, 'time_step': 0.024105974805285358, 'td_error': 574.9432224174053, 'init_value': -263.55859375, 'ave_value': -121.99060218389708} step=12312
2022-04-20 18:19.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420181400/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:19.34 [info     ] CQL_20220420181400: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.0003568561453568308, 'time_algorithm_update': 0.024103449102033648, 'temp_loss': -0.03879024782245271, 'temp': 0.9538695339919531, 'alpha_loss': 7.066894964516512, 'alpha': 0.34856876216785254, 'critic_loss': 1647.8636360391538, 'actor_loss': 139.12819444087515, 'time_step': 0.0245608349292599, 'td_error': 676.34610915913, 'init_value': -262.4252014160156, 'ave_value': -121.32713347384656} step=12654
2022-04-20 18:19.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420181400/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:19.42 [info     ] CQL_20220420181400: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.00036467515934280486, 'time_algorithm_update': 0.02406467610632467, 'temp_loss': 0.04606949120803534, 'temp': 0.9543390612156071, 'alpha_loss': 7.402532307028073, 'alpha': 0.33986637684685445, 'critic_loss': 1627.656060469778, 'actor_loss': 138.41397293269284, 'time_step': 0.024526342313889175, 'td_error': 486.8066896347882, 'init_value': -245.28250122070312, 'ave_value': -117.0891322757485} step=12996
2022-04-20 18:19.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420181400/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:19.51 [info     ] CQL_20220420181400: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.0003538989184195535, 'time_algorithm_update': 0.024048618405883074, 'temp_loss': 0.019635460203328327, 'temp': 0.9513796674229248, 'alpha_loss': 6.95673069288159, 'alpha': 0.33084238673511307, 'critic_loss': 1609.9114140739218, 'actor_loss': 137.60641635649387, 'time_step': 0.024501147325973065, 'td_error': 667.7688182931822, 'init_value': -246.6095733642578, 'ave_value': -121.78171093000955} step=13338
2022-04-20 18:19.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420181400/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:20.00 [info     ] CQL_20220420181400: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.0003596446667498315, 'time_algorithm_update': 0.02432093215964691, 'temp_loss': 0.04479311961048876, 'temp': 0.9490668667338745, 'alpha_loss': 6.8375351624182095, 'alpha': 0.3220930122144041, 'critic_loss': 1592.4584860996893, 'actor_loss': 137.07178048920213, 'time_step': 0.02477939714465225, 'td_error': 416.6974865530847, 'init_value': -235.77529907226562, 'ave_value': -119.44830223057319} step=13680
2022-04-20 18:20.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420181400/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:20.09 [info     ] CQL_20220420181400: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.0003573657476414017, 'time_algorithm_update': 0.0240468560603627, 'temp_loss': -0.019989430806354472, 'temp': 0.9484388547682623, 'alpha_loss': 6.416591582242509, 'alpha': 0.31343770698148604, 'critic_loss': 1567.279879385965, 'actor_loss': 135.81749379565144, 'time_step': 0.024504275349845662, 'td_error': 373.2207025459618, 'init_value': -226.5624542236328, 'ave_value': -117.27406722748803} step=14022
2022-04-20 18:20.09 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420181400/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:20.18 [info     ] CQL_20220420181400: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.0003632425564771507, 'time_algorithm_update': 0.024226620880483885, 'temp_loss': -0.02402898108815415, 'temp': 0.9507079671698007, 'alpha_loss': 5.877229355929191, 'alpha': 0.30529709230040947, 'critic_loss': 1522.1925116644966, 'actor_loss': 133.54058217723468, 'time_step': 0.02468920730010808, 'td_error': 638.3721100490544, 'init_value': -223.9407196044922, 'ave_value': -119.1764066103} step=14364
2022-04-20 18:20.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420181400/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:20.27 [info     ] CQL_20220420181400: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.0003637214850263986, 'time_algorithm_update': 0.024971667088960346, 'temp_loss': 0.01227032318057721, 'temp': 0.9504585032574615, 'alpha_loss': 5.785332821963126, 'alpha': 0.2971728675023854, 'critic_loss': 1492.9614946688826, 'actor_loss': 132.43908894410606, 'time_step': 0.025436775029054164, 'td_error': 301.5315639451801, 'init_value': -218.80441284179688, 'ave_value': -118.03860546776168} step=14706
2022-04-20 18:20.27 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420181400/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:20.36 [info     ] CQL_20220420181400: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.0003733418838322511, 'time_algorithm_update': 0.02540841437222665, 'temp_loss': 0.021406737820664695, 'temp': 0.9489177156958664, 'alpha_loss': 5.567919470413386, 'alpha': 0.2893930029625084, 'critic_loss': 1461.4281566240634, 'actor_loss': 131.0121911701403, 'time_step': 0.025888210151627747, 'td_error': 533.1299070404224, 'init_value': -203.4578399658203, 'ave_value': -113.38601273261783} step=15048
2022-04-20 18:20.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420181400/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:20.45 [info     ] CQL_20220420181400: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.00036294279042740316, 'time_algorithm_update': 0.025267177855062207, 'temp_loss': 0.028308973473371468, 'temp': 0.9482258092938808, 'alpha_loss': 5.923915323545361, 'alpha': 0.2809930905612589, 'critic_loss': 1448.7852718955592, 'actor_loss': 130.8482917874877, 'time_step': 0.02573261274928935, 'td_error': 532.2834786483608, 'init_value': -200.2907257080078, 'ave_value': -112.12727288883414} step=15390
2022-04-20 18:20.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420181400/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:20.54 [info     ] CQL_20220420181400: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.0003717621864631162, 'time_algorithm_update': 0.025116794290598373, 'temp_loss': 0.038949067083497844, 'temp': 0.9452800546821795, 'alpha_loss': 5.526510838876691, 'alpha': 0.2729877449615657, 'critic_loss': 1439.1452326189008, 'actor_loss': 130.15725520619173, 'time_step': 0.0255899659374304, 'td_error': 678.2486496013967, 'init_value': -202.9537353515625, 'ave_value': -113.64518212126183} step=15732
2022-04-20 18:20.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420181400/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:21.04 [info     ] CQL_20220420181400: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.00036969031506811666, 'time_algorithm_update': 0.02530750196579604, 'temp_loss': 0.023505961092199847, 'temp': 0.9440511456358502, 'alpha_loss': 5.051830106311375, 'alpha': 0.26526543898889193, 'critic_loss': 1421.0023935775312, 'actor_loss': 129.16099568417198, 'time_step': 0.025778472074988294, 'td_error': 200.32917687339582, 'init_value': -200.38552856445312, 'ave_value': -114.26287460793917} step=16074
2022-04-20 18:21.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420181400/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:21.13 [info     ] CQL_20220420181400: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.0003699412819934867, 'time_algorithm_update': 0.02613699505900779, 'temp_loss': 0.03423819362801331, 'temp': 0.9416061041996493, 'alpha_loss': 4.338775174659595, 'alpha': 0.2586181487082041, 'critic_loss': 1383.3986623663652, 'actor_loss': 127.48744119119922, 'time_step': 0.02661254670884874, 'td_error': 168.90986456218505, 'init_value': -193.86582946777344, 'ave_value': -110.66795139103523} step=16416
2022-04-20 18:21.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420181400/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:21.23 [info     ] CQL_20220420181400: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.0003734889783357319, 'time_algorithm_update': 0.026269727283053927, 'temp_loss': 0.021310319895284216, 'temp': 0.941449265033878, 'alpha_loss': 3.83150451434286, 'alpha': 0.25236569188143076, 'critic_loss': 1345.4291396113167, 'actor_loss': 125.85757127282216, 'time_step': 0.026742265238399395, 'td_error': 100.89081875141773, 'init_value': -188.91336059570312, 'ave_value': -111.65914493570155} step=16758
2022-04-20 18:21.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420181400/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:21.32 [info     ] CQL_20220420181400: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.00036259422525327804, 'time_algorithm_update': 0.02614687409317284, 'temp_loss': -0.001339594457756009, 'temp': 0.939692303102616, 'alpha_loss': 3.6170704120780988, 'alpha': 0.24643925132981517, 'critic_loss': 1314.8420784933526, 'actor_loss': 124.58570884124578, 'time_step': 0.026610683976558216, 'td_error': 76.72713767417933, 'init_value': -179.61077880859375, 'ave_value': -106.24142779268895} step=17100
2022-04-20 18:21.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420181400/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 18:21.33 [info     ] FQE_20220420182133: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00015781873680022825, 'time_algorithm_update': 0.004595146121748959, 'loss': 0.00833436873101865, 'time_step': 0.004829942461955978, 'init_value': -0.35077226161956787, 'ave_value': -0.28908392860982063, 'soft_opc': nan} step=166




2022-04-20 18:21.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182133/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:21.34 [info     ] FQE_20220420182133: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00015658642872270332, 'time_algorithm_update': 0.0036560569901064217, 'loss': 0.0063015427230015755, 'time_step': 0.0038830199873591043, 'init_value': -0.5079942941665649, 'ave_value': -0.39004339745251443, 'soft_opc': nan} step=332




2022-04-20 18:21.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182133/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:21.35 [info     ] FQE_20220420182133: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00015505681554955173, 'time_algorithm_update': 0.00457165040165545, 'loss': 0.00611396324231727, 'time_step': 0.004792990454708238, 'init_value': -0.5848913192749023, 'ave_value': -0.43984252062064033, 'soft_opc': nan} step=498




2022-04-20 18:21.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182133/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:21.36 [info     ] FQE_20220420182133: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00015674728945077183, 'time_algorithm_update': 0.004677426384156008, 'loss': 0.006382879331225462, 'time_step': 0.004903039300298116, 'init_value': -0.6618578433990479, 'ave_value': -0.45050746299125055, 'soft_opc': nan} step=664




2022-04-20 18:21.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182133/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:21.37 [info     ] FQE_20220420182133: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00015826254005891732, 'time_algorithm_update': 0.00422208424074104, 'loss': 0.00647992576915681, 'time_step': 0.004455297826284386, 'init_value': -0.7760939598083496, 'ave_value': -0.5117043226017608, 'soft_opc': nan} step=830




2022-04-20 18:21.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182133/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:21.38 [info     ] FQE_20220420182133: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00015815482082137143, 'time_algorithm_update': 0.004460455423378083, 'loss': 0.006551376269030643, 'time_step': 0.004689572805381683, 'init_value': -0.8244643211364746, 'ave_value': -0.5319763454768035, 'soft_opc': nan} step=996




2022-04-20 18:21.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182133/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:21.38 [info     ] FQE_20220420182133: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00016552999795201313, 'time_algorithm_update': 0.00466343868209655, 'loss': 0.0066807555506028325, 'time_step': 0.0049012999936758755, 'init_value': -0.9175467491149902, 'ave_value': -0.5703278966550086, 'soft_opc': nan} step=1162




2022-04-20 18:21.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182133/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:21.39 [info     ] FQE_20220420182133: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.0001589289630752012, 'time_algorithm_update': 0.004630469414125006, 'loss': 0.006464558896182262, 'time_step': 0.004866712064628142, 'init_value': -1.0071790218353271, 'ave_value': -0.5706672551383072, 'soft_opc': nan} step=1328




2022-04-20 18:21.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182133/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:21.40 [info     ] FQE_20220420182133: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00015886002276317183, 'time_algorithm_update': 0.003826070980853345, 'loss': 0.006004018549167787, 'time_step': 0.004052796995783427, 'init_value': -1.075162649154663, 'ave_value': -0.5865191466175019, 'soft_opc': nan} step=1494




2022-04-20 18:21.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182133/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:21.41 [info     ] FQE_20220420182133: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00015881980758115468, 'time_algorithm_update': 0.004567676279918257, 'loss': 0.006165851728081793, 'time_step': 0.004797925432044339, 'init_value': -1.1887034177780151, 'ave_value': -0.631296673490926, 'soft_opc': nan} step=1660




2022-04-20 18:21.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182133/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:21.42 [info     ] FQE_20220420182133: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00015721981783947312, 'time_algorithm_update': 0.004627042506114546, 'loss': 0.005936265674823648, 'time_step': 0.00485527659037027, 'init_value': -1.2803398370742798, 'ave_value': -0.6516107450247751, 'soft_opc': nan} step=1826




2022-04-20 18:21.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182133/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:21.43 [info     ] FQE_20220420182133: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.0001567774508372847, 'time_algorithm_update': 0.00415217445557376, 'loss': 0.005858709332982011, 'time_step': 0.004381205662187323, 'init_value': -1.3368786573410034, 'ave_value': -0.671334148248708, 'soft_opc': nan} step=1992




2022-04-20 18:21.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182133/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:21.43 [info     ] FQE_20220420182133: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00015847797853400908, 'time_algorithm_update': 0.004298039229519396, 'loss': 0.005994222073602569, 'time_step': 0.004536160503525332, 'init_value': -1.4357869625091553, 'ave_value': -0.6873926739669867, 'soft_opc': nan} step=2158




2022-04-20 18:21.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182133/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:21.44 [info     ] FQE_20220420182133: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.0001641770443284368, 'time_algorithm_update': 0.004661587347467262, 'loss': 0.005849836263749524, 'time_step': 0.004896751369338438, 'init_value': -1.4966503381729126, 'ave_value': -0.6880572095077049, 'soft_opc': nan} step=2324




2022-04-20 18:21.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182133/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:21.45 [info     ] FQE_20220420182133: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00015749270657458938, 'time_algorithm_update': 0.004592697304415415, 'loss': 0.00580107244937849, 'time_step': 0.004824988813285368, 'init_value': -1.5688164234161377, 'ave_value': -0.7158389838529868, 'soft_opc': nan} step=2490




2022-04-20 18:21.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182133/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:21.46 [info     ] FQE_20220420182133: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.0001581174781523555, 'time_algorithm_update': 0.003768883555768484, 'loss': 0.0061158219686181965, 'time_step': 0.003998283880302705, 'init_value': -1.7467103004455566, 'ave_value': -0.8130141000355686, 'soft_opc': nan} step=2656




2022-04-20 18:21.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182133/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:21.47 [info     ] FQE_20220420182133: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00016400038477886155, 'time_algorithm_update': 0.00453554578574307, 'loss': 0.006245450489784041, 'time_step': 0.004778682467449142, 'init_value': -1.8318712711334229, 'ave_value': -0.8328173068628923, 'soft_opc': nan} step=2822




2022-04-20 18:21.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182133/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:21.48 [info     ] FQE_20220420182133: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00016040830727083138, 'time_algorithm_update': 0.004596221877867917, 'loss': 0.006329223590044597, 'time_step': 0.004828320928366788, 'init_value': -1.8827303647994995, 'ave_value': -0.8730212536615294, 'soft_opc': nan} step=2988




2022-04-20 18:21.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182133/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:21.49 [info     ] FQE_20220420182133: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00016019861382174204, 'time_algorithm_update': 0.004240430981279856, 'loss': 0.006936487309769334, 'time_step': 0.004472373479820159, 'init_value': -1.9946517944335938, 'ave_value': -0.9067724184409992, 'soft_opc': nan} step=3154




2022-04-20 18:21.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182133/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:21.49 [info     ] FQE_20220420182133: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.0001599013087261154, 'time_algorithm_update': 0.00421980633793107, 'loss': 0.006978251485305513, 'time_step': 0.004455373947878918, 'init_value': -2.1336429119110107, 'ave_value': -0.9972004420719706, 'soft_opc': nan} step=3320




2022-04-20 18:21.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182133/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:21.50 [info     ] FQE_20220420182133: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.0001606668334409415, 'time_algorithm_update': 0.004620628184582813, 'loss': 0.007619339996578553, 'time_step': 0.004852286304335996, 'init_value': -2.1464786529541016, 'ave_value': -0.9740144871074605, 'soft_opc': nan} step=3486




2022-04-20 18:21.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182133/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:21.51 [info     ] FQE_20220420182133: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00016601976141872177, 'time_algorithm_update': 0.004645748310778515, 'loss': 0.008033846718180611, 'time_step': 0.004884402435946177, 'init_value': -2.2657015323638916, 'ave_value': -1.0479110381221985, 'soft_opc': nan} step=3652




2022-04-20 18:21.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182133/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:21.52 [info     ] FQE_20220420182133: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00015792501978127352, 'time_algorithm_update': 0.003829546721584826, 'loss': 0.008348068918791566, 'time_step': 0.004056679197104581, 'init_value': -2.3890042304992676, 'ave_value': -1.1131521239876747, 'soft_opc': nan} step=3818




2022-04-20 18:21.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182133/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:21.53 [info     ] FQE_20220420182133: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00016850592142128083, 'time_algorithm_update': 0.00465127933456237, 'loss': 0.008780415594179452, 'time_step': 0.004894780825419599, 'init_value': -2.454413414001465, 'ave_value': -1.1509490814630512, 'soft_opc': nan} step=3984




2022-04-20 18:21.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182133/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:21.54 [info     ] FQE_20220420182133: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.0001655501055430217, 'time_algorithm_update': 0.004624180046908827, 'loss': 0.009356201711217368, 'time_step': 0.004860333649508925, 'init_value': -2.5478339195251465, 'ave_value': -1.2288317733855398, 'soft_opc': nan} step=4150




2022-04-20 18:21.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182133/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:21.54 [info     ] FQE_20220420182133: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00016000041042465762, 'time_algorithm_update': 0.004167604159159833, 'loss': 0.009706794318647674, 'time_step': 0.004398765334163804, 'init_value': -2.6034798622131348, 'ave_value': -1.2351015075235754, 'soft_opc': nan} step=4316




2022-04-20 18:21.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182133/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:21.55 [info     ] FQE_20220420182133: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.0001637519124042557, 'time_algorithm_update': 0.004268802792192942, 'loss': 0.00964904801796628, 'time_step': 0.004508334470082478, 'init_value': -2.693789482116699, 'ave_value': -1.2990963044064539, 'soft_opc': nan} step=4482




2022-04-20 18:21.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182133/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:21.56 [info     ] FQE_20220420182133: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00015980220702757318, 'time_algorithm_update': 0.004590367696371423, 'loss': 0.010011212191269565, 'time_step': 0.004821347903056317, 'init_value': -2.7924232482910156, 'ave_value': -1.3849460004082135, 'soft_opc': nan} step=4648




2022-04-20 18:21.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182133/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:21.57 [info     ] FQE_20220420182133: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00016678241362054664, 'time_algorithm_update': 0.004600692944354321, 'loss': 0.01044603098388649, 'time_step': 0.004839907209557223, 'init_value': -2.887901782989502, 'ave_value': -1.4247535354785017, 'soft_opc': nan} step=4814




2022-04-20 18:21.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182133/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:21.58 [info     ] FQE_20220420182133: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00016123415475868317, 'time_algorithm_update': 0.003809002508600074, 'loss': 0.01068698804045702, 'time_step': 0.00404209975736687, 'init_value': -2.9268500804901123, 'ave_value': -1.454389751038036, 'soft_opc': nan} step=4980




2022-04-20 18:21.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182133/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:21.59 [info     ] FQE_20220420182133: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00016550845410450395, 'time_algorithm_update': 0.0047131144856832115, 'loss': 0.01096739747466138, 'time_step': 0.004951936652861446, 'init_value': -3.0347533226013184, 'ave_value': -1.5300199948519737, 'soft_opc': nan} step=5146




2022-04-20 18:21.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182133/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:22.00 [info     ] FQE_20220420182133: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.0001605921481029097, 'time_algorithm_update': 0.004666299705045769, 'loss': 0.011329295247630497, 'time_step': 0.00489978474306773, 'init_value': -3.1096017360687256, 'ave_value': -1.5774208688856781, 'soft_opc': nan} step=5312




2022-04-20 18:22.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182133/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:22.00 [info     ] FQE_20220420182133: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00016012967350971267, 'time_algorithm_update': 0.00430875370301396, 'loss': 0.01146727978989639, 'time_step': 0.004542530301105545, 'init_value': -3.139514207839966, 'ave_value': -1.5696367720695765, 'soft_opc': nan} step=5478




2022-04-20 18:22.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182133/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:22.01 [info     ] FQE_20220420182133: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00015952931829245695, 'time_algorithm_update': 0.00412858107003821, 'loss': 0.011653160104101681, 'time_step': 0.0043561645300991565, 'init_value': -3.2229318618774414, 'ave_value': -1.5976208052858039, 'soft_opc': nan} step=5644




2022-04-20 18:22.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182133/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:22.02 [info     ] FQE_20220420182133: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.0001595379358314606, 'time_algorithm_update': 0.004614005605858493, 'loss': 0.012025557336375028, 'time_step': 0.004844889583357845, 'init_value': -3.2767128944396973, 'ave_value': -1.6180242687866495, 'soft_opc': nan} step=5810




2022-04-20 18:22.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182133/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:22.03 [info     ] FQE_20220420182133: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00016133756522672722, 'time_algorithm_update': 0.004567318652049604, 'loss': 0.012364267321101513, 'time_step': 0.004801234567021749, 'init_value': -3.2902822494506836, 'ave_value': -1.5977740525528117, 'soft_opc': nan} step=5976




2022-04-20 18:22.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182133/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:22.04 [info     ] FQE_20220420182133: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00015948048557143613, 'time_algorithm_update': 0.0038072445306433253, 'loss': 0.012714610782911978, 'time_step': 0.004033878625157368, 'init_value': -3.3248448371887207, 'ave_value': -1.628441465491647, 'soft_opc': nan} step=6142




2022-04-20 18:22.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182133/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:22.05 [info     ] FQE_20220420182133: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.0001614811908767884, 'time_algorithm_update': 0.004666790904768978, 'loss': 0.012698587079964444, 'time_step': 0.004900001617799322, 'init_value': -3.3319954872131348, 'ave_value': -1.6351395219564437, 'soft_opc': nan} step=6308




2022-04-20 18:22.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182133/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:22.05 [info     ] FQE_20220420182133: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.0001583602055009589, 'time_algorithm_update': 0.004687836371272443, 'loss': 0.01335231197229306, 'time_step': 0.004914200449564371, 'init_value': -3.4715735912323, 'ave_value': -1.7248384276086146, 'soft_opc': nan} step=6474




2022-04-20 18:22.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182133/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:22.06 [info     ] FQE_20220420182133: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00015669271170374858, 'time_algorithm_update': 0.0022055657513170355, 'loss': 0.013235153269207845, 'time_step': 0.0024280806621873236, 'init_value': -3.4708428382873535, 'ave_value': -1.7244401191470322, 'soft_opc': nan} step=6640




2022-04-20 18:22.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182133/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:22.07 [info     ] FQE_20220420182133: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.0001593081347913627, 'time_algorithm_update': 0.003961420920958002, 'loss': 0.013763630851761001, 'time_step': 0.004190531121679099, 'init_value': -3.5589771270751953, 'ave_value': -1.8080826731146993, 'soft_opc': nan} step=6806




2022-04-20 18:22.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182133/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:22.07 [info     ] FQE_20220420182133: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00016038245465382035, 'time_algorithm_update': 0.0038345578205154604, 'loss': 0.014019014576803443, 'time_step': 0.004066980028726968, 'init_value': -3.526310682296753, 'ave_value': -1.723598905867553, 'soft_opc': nan} step=6972




2022-04-20 18:22.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182133/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:22.08 [info     ] FQE_20220420182133: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.0001645303634275873, 'time_algorithm_update': 0.00463165288948151, 'loss': 0.014652732672051987, 'time_step': 0.004867453172982457, 'init_value': -3.659184455871582, 'ave_value': -1.837993314540064, 'soft_opc': nan} step=7138




2022-04-20 18:22.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182133/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:22.09 [info     ] FQE_20220420182133: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00016484921237072312, 'time_algorithm_update': 0.004633349108408733, 'loss': 0.014740358164999634, 'time_step': 0.004874964794480657, 'init_value': -3.7315008640289307, 'ave_value': -1.9172112214269939, 'soft_opc': nan} step=7304




2022-04-20 18:22.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182133/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:22.10 [info     ] FQE_20220420182133: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.0001561928944415357, 'time_algorithm_update': 0.003900540880410068, 'loss': 0.015230181128353285, 'time_step': 0.004127271204109651, 'init_value': -3.73799467086792, 'ave_value': -1.901907404971955, 'soft_opc': nan} step=7470




2022-04-20 18:22.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182133/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:22.11 [info     ] FQE_20220420182133: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00016705242984266165, 'time_algorithm_update': 0.004523412290825902, 'loss': 0.015388484227931104, 'time_step': 0.004764019724834396, 'init_value': -3.7169623374938965, 'ave_value': -1.8836049208434316, 'soft_opc': nan} step=7636




2022-04-20 18:22.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182133/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:22.12 [info     ] FQE_20220420182133: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00016295335378991552, 'time_algorithm_update': 0.004689124693353492, 'loss': 0.01582237402417596, 'time_step': 0.0049258197646543204, 'init_value': -3.8515148162841797, 'ave_value': -2.024918970331415, 'soft_opc': nan} step=7802




2022-04-20 18:22.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182133/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:22.13 [info     ] FQE_20220420182133: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.0001630467104624553, 'time_algorithm_update': 0.004668064864285021, 'loss': 0.016630309931260634, 'time_step': 0.004905127617250006, 'init_value': -3.8781075477600098, 'ave_value': -1.999417149523894, 'soft_opc': nan} step=7968




2022-04-20 18:22.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182133/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:22.13 [info     ] FQE_20220420182133: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00015809593430484635, 'time_algorithm_update': 0.0038452004811849938, 'loss': 0.017079886316226412, 'time_step': 0.004074243177850562, 'init_value': -3.9783146381378174, 'ave_value': -2.0941974148288502, 'soft_opc': nan} step=8134




2022-04-20 18:22.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182133/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:22.14 [info     ] FQE_20220420182133: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00015813614948686347, 'time_algorithm_update': 0.004628546266670686, 'loss': 0.017289764565522277, 'time_step': 0.004856974245553993, 'init_value': -3.96221923828125, 'ave_value': -2.063900808365764, 'soft_opc': nan} step=8300




2022-04-20 18:22.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182133/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

2022-04-20 18:22.14 [info     ] Directory is created at d3rlpy_logs/FQE_20220420182214
2022-04-20 18:22.15 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 18:22.15 [debug    ] Building models...
2022-04-20 18:22.15 [debug    ] Models have been built.
2022-04-20 18:22.15 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420182214/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 18:22.16 [info     ] FQE_20220420182214: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00016313514044118482, 'time_algorithm_update': 0.004089061603989712, 'loss': 0.02400937381871911, 'time_step': 0.0043229190416114276, 'init_value': -0.9746185541152954, 'ave_value': -0.959200939032677, 'soft_opc': nan} step=344




2022-04-20 18:22.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182214/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:22.18 [info     ] FQE_20220420182214: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.0001681849013927371, 'time_algorithm_update': 0.004619436208591904, 'loss': 0.02225024439394474, 'time_step': 0.004861647999563882, 'init_value': -1.668870210647583, 'ave_value': -1.6181489862568743, 'soft_opc': nan} step=688




2022-04-20 18:22.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182214/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:22.20 [info     ] FQE_20220420182214: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.0001636868299439896, 'time_algorithm_update': 0.004220383804897929, 'loss': 0.028118038116845975, 'time_step': 0.0044582014860108845, 'init_value': -2.6028404235839844, 'ave_value': -2.524001544056175, 'soft_opc': nan} step=1032




2022-04-20 18:22.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182214/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:22.21 [info     ] FQE_20220420182214: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00016536684923393783, 'time_algorithm_update': 0.004600192918333896, 'loss': 0.03065668565199472, 'time_step': 0.004842009655264921, 'init_value': -3.1681861877441406, 'ave_value': -3.070446121229513, 'soft_opc': nan} step=1376




2022-04-20 18:22.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182214/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:22.23 [info     ] FQE_20220420182214: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00016771707423897676, 'time_algorithm_update': 0.004203528858894526, 'loss': 0.039626252596526475, 'time_step': 0.0044434015140977015, 'init_value': -3.996582508087158, 'ave_value': -3.9354498790660957, 'soft_opc': nan} step=1720




2022-04-20 18:22.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182214/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:22.25 [info     ] FQE_20220420182214: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00016341583673344103, 'time_algorithm_update': 0.0043090408624604695, 'loss': 0.04761114481533336, 'time_step': 0.004545953384665556, 'init_value': -4.536805152893066, 'ave_value': -4.520568798891865, 'soft_opc': nan} step=2064




2022-04-20 18:22.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182214/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:22.27 [info     ] FQE_20220420182214: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.0001650099144425503, 'time_algorithm_update': 0.0045953178128530815, 'loss': 0.05988330421373681, 'time_step': 0.004836681970330172, 'init_value': -5.379227638244629, 'ave_value': -5.431585669104715, 'soft_opc': nan} step=2408




2022-04-20 18:22.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182214/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:22.28 [info     ] FQE_20220420182214: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.0001649731813475143, 'time_algorithm_update': 0.004191736842310706, 'loss': 0.07297908045557262, 'time_step': 0.004430233739143194, 'init_value': -5.780407905578613, 'ave_value': -5.9274680996733204, 'soft_opc': nan} step=2752




2022-04-20 18:22.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182214/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:22.30 [info     ] FQE_20220420182214: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00016809965288916299, 'time_algorithm_update': 0.004599213600158691, 'loss': 0.08502043213771093, 'time_step': 0.004845601181651271, 'init_value': -6.300980091094971, 'ave_value': -6.489776751172502, 'soft_opc': nan} step=3096




2022-04-20 18:22.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182214/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:22.32 [info     ] FQE_20220420182214: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00016479298125865848, 'time_algorithm_update': 0.004217234461806541, 'loss': 0.09829307570349598, 'time_step': 0.004456048094949057, 'init_value': -6.914388656616211, 'ave_value': -7.175758783521365, 'soft_opc': nan} step=3440




2022-04-20 18:22.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182214/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:22.33 [info     ] FQE_20220420182214: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.0001641699047975762, 'time_algorithm_update': 0.004607938056768373, 'loss': 0.11312491116168108, 'time_step': 0.004847287438636602, 'init_value': -7.385030746459961, 'ave_value': -7.677098523724723, 'soft_opc': nan} step=3784




2022-04-20 18:22.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182214/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:22.35 [info     ] FQE_20220420182214: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00016644804976707282, 'time_algorithm_update': 0.004269571498382923, 'loss': 0.12677794678927248, 'time_step': 0.004510271687840306, 'init_value': -7.927953720092773, 'ave_value': -8.345493023350482, 'soft_opc': nan} step=4128




2022-04-20 18:22.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182214/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:22.37 [info     ] FQE_20220420182214: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00016173858975255213, 'time_algorithm_update': 0.004223513741825902, 'loss': 0.14288252596418524, 'time_step': 0.004458333170691202, 'init_value': -8.283681869506836, 'ave_value': -8.820462371989548, 'soft_opc': nan} step=4472




2022-04-20 18:22.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182214/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:22.38 [info     ] FQE_20220420182214: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00016553318777749705, 'time_algorithm_update': 0.004593643338181252, 'loss': 0.153858560359452, 'time_step': 0.004831499831621037, 'init_value': -8.86425495147705, 'ave_value': -9.490179251929796, 'soft_opc': nan} step=4816




2022-04-20 18:22.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182214/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:22.40 [info     ] FQE_20220420182214: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00016381297000618868, 'time_algorithm_update': 0.004099958857824636, 'loss': 0.16712569904535315, 'time_step': 0.004342257976531982, 'init_value': -9.075188636779785, 'ave_value': -9.795715394455872, 'soft_opc': nan} step=5160




2022-04-20 18:22.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182214/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:22.42 [info     ] FQE_20220420182214: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00016860144082890002, 'time_algorithm_update': 0.00461266415063725, 'loss': 0.17885388858443083, 'time_step': 0.004856521307035934, 'init_value': -9.653560638427734, 'ave_value': -10.492199141220903, 'soft_opc': nan} step=5504




2022-04-20 18:22.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182214/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:22.44 [info     ] FQE_20220420182214: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00016386079233746196, 'time_algorithm_update': 0.004111219977223596, 'loss': 0.19313556960307407, 'time_step': 0.0043488422105478685, 'init_value': -9.944427490234375, 'ave_value': -10.853795787607329, 'soft_opc': nan} step=5848




2022-04-20 18:22.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182214/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:22.45 [info     ] FQE_20220420182214: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00016662270523781, 'time_algorithm_update': 0.004610795614331267, 'loss': 0.20393932655691926, 'time_step': 0.004850208759307861, 'init_value': -10.273545265197754, 'ave_value': -11.269417344012323, 'soft_opc': nan} step=6192




2022-04-20 18:22.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182214/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:22.47 [info     ] FQE_20220420182214: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00016268117483272108, 'time_algorithm_update': 0.004292479088140088, 'loss': 0.218241490786462, 'time_step': 0.004529131706370864, 'init_value': -10.623300552368164, 'ave_value': -11.77955954792775, 'soft_opc': nan} step=6536




2022-04-20 18:22.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182214/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:22.49 [info     ] FQE_20220420182214: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00016773509424786236, 'time_algorithm_update': 0.00429342929707017, 'loss': 0.22815141176566656, 'time_step': 0.004536636346994445, 'init_value': -10.916646957397461, 'ave_value': -12.098110613870846, 'soft_opc': nan} step=6880




2022-04-20 18:22.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182214/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:22.50 [info     ] FQE_20220420182214: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00017206959946210995, 'time_algorithm_update': 0.004561621089314305, 'loss': 0.24146008577609304, 'time_step': 0.004810423351997553, 'init_value': -11.467931747436523, 'ave_value': -12.84241722896779, 'soft_opc': nan} step=7224




2022-04-20 18:22.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182214/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:22.52 [info     ] FQE_20220420182214: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00016725409862607024, 'time_algorithm_update': 0.0042001112949016485, 'loss': 0.2567550243084254, 'time_step': 0.004444707271664641, 'init_value': -11.44011116027832, 'ave_value': -12.880577064186403, 'soft_opc': nan} step=7568




2022-04-20 18:22.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182214/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:22.54 [info     ] FQE_20220420182214: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.0001705524533293968, 'time_algorithm_update': 0.004654060962588288, 'loss': 0.27231251680682045, 'time_step': 0.00490229420883711, 'init_value': -12.032350540161133, 'ave_value': -13.56327940721006, 'soft_opc': nan} step=7912




2022-04-20 18:22.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182214/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:22.56 [info     ] FQE_20220420182214: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.0001694178858468699, 'time_algorithm_update': 0.004183523876722469, 'loss': 0.2848763180961616, 'time_step': 0.004430253838383874, 'init_value': -12.032464981079102, 'ave_value': -13.674140218798877, 'soft_opc': nan} step=8256




2022-04-20 18:22.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182214/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:22.57 [info     ] FQE_20220420182214: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.0001654971477597259, 'time_algorithm_update': 0.004604968220688576, 'loss': 0.3004747056889586, 'time_step': 0.004844434039537297, 'init_value': -12.426722526550293, 'ave_value': -14.171691542750576, 'soft_opc': nan} step=8600




2022-04-20 18:22.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182214/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:22.59 [info     ] FQE_20220420182214: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00016845104306243187, 'time_algorithm_update': 0.0043317821136740755, 'loss': 0.3183695626293504, 'time_step': 0.004576938096867051, 'init_value': -12.556419372558594, 'ave_value': -14.46673410283905, 'soft_opc': nan} step=8944




2022-04-20 18:22.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182214/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:23.01 [info     ] FQE_20220420182214: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00016641062359477199, 'time_algorithm_update': 0.004244664380716723, 'loss': 0.3333843884134102, 'time_step': 0.004487521426622258, 'init_value': -13.017247200012207, 'ave_value': -14.978610493780979, 'soft_opc': nan} step=9288




2022-04-20 18:23.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182214/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:23.03 [info     ] FQE_20220420182214: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.0001671494439590809, 'time_algorithm_update': 0.004583456488542779, 'loss': 0.35683212118483215, 'time_step': 0.004824923914532328, 'init_value': -13.117447853088379, 'ave_value': -15.236845270261538, 'soft_opc': nan} step=9632




2022-04-20 18:23.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182214/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:23.04 [info     ] FQE_20220420182214: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00016727558402127997, 'time_algorithm_update': 0.0041928935882657075, 'loss': 0.375756153858505, 'time_step': 0.004435968953509664, 'init_value': -13.256061553955078, 'ave_value': -15.46553538519352, 'soft_opc': nan} step=9976




2022-04-20 18:23.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182214/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:23.06 [info     ] FQE_20220420182214: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00016668785450070403, 'time_algorithm_update': 0.004608028156812801, 'loss': 0.3899660988678333, 'time_step': 0.004852965127590091, 'init_value': -13.136796951293945, 'ave_value': -15.447787095529556, 'soft_opc': nan} step=10320




2022-04-20 18:23.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182214/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:23.08 [info     ] FQE_20220420182214: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00016506605370100155, 'time_algorithm_update': 0.004149761310843534, 'loss': 0.4016861286404174, 'time_step': 0.004391731217850086, 'init_value': -13.600831031799316, 'ave_value': -16.03631243617232, 'soft_opc': nan} step=10664




2022-04-20 18:23.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182214/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:23.09 [info     ] FQE_20220420182214: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00016901867334232775, 'time_algorithm_update': 0.004601253326549087, 'loss': 0.40991413460091447, 'time_step': 0.0048456656378368995, 'init_value': -13.758822441101074, 'ave_value': -16.350717151026988, 'soft_opc': nan} step=11008




2022-04-20 18:23.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182214/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:23.11 [info     ] FQE_20220420182214: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00016710231470507244, 'time_algorithm_update': 0.004396295824716258, 'loss': 0.4151779689858559, 'time_step': 0.004638091769329337, 'init_value': -13.796236038208008, 'ave_value': -16.50014602282076, 'soft_opc': nan} step=11352




2022-04-20 18:23.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182214/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:23.13 [info     ] FQE_20220420182214: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.0001664889413256978, 'time_algorithm_update': 0.004177533609922542, 'loss': 0.4272414116881961, 'time_step': 0.004421688096467839, 'init_value': -13.852107048034668, 'ave_value': -16.597789535087088, 'soft_opc': nan} step=11696




2022-04-20 18:23.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182214/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:23.15 [info     ] FQE_20220420182214: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00016632606816846272, 'time_algorithm_update': 0.004608138356097909, 'loss': 0.41717953700030785, 'time_step': 0.0048515311507291575, 'init_value': -14.42445182800293, 'ave_value': -17.345590924498467, 'soft_opc': nan} step=12040




2022-04-20 18:23.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182214/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:23.16 [info     ] FQE_20220420182214: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00016466337581013524, 'time_algorithm_update': 0.0041086299474849255, 'loss': 0.4528509573902675, 'time_step': 0.0043483175510583925, 'init_value': -14.545960426330566, 'ave_value': -17.493893966411015, 'soft_opc': nan} step=12384




2022-04-20 18:23.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182214/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:23.18 [info     ] FQE_20220420182214: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.0001660980457483336, 'time_algorithm_update': 0.00465754228968953, 'loss': 0.46066926491750015, 'time_step': 0.004900509534880172, 'init_value': -14.834875106811523, 'ave_value': -17.933305638485276, 'soft_opc': nan} step=12728




2022-04-20 18:23.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182214/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:23.20 [info     ] FQE_20220420182214: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00016558308934056483, 'time_algorithm_update': 0.004210948944091797, 'loss': 0.46340134433947155, 'time_step': 0.004453231428944787, 'init_value': -14.88341999053955, 'ave_value': -18.17789377117622, 'soft_opc': nan} step=13072




2022-04-20 18:23.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182214/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:23.21 [info     ] FQE_20220420182214: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00016850094462549962, 'time_algorithm_update': 0.004579705554385518, 'loss': 0.4640106409354958, 'time_step': 0.004822882108910139, 'init_value': -15.20107650756836, 'ave_value': -18.578593812101346, 'soft_opc': nan} step=13416




2022-04-20 18:23.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182214/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:23.23 [info     ] FQE_20220420182214: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00016730469326640284, 'time_algorithm_update': 0.004452881424926048, 'loss': 0.4622026621743084, 'time_step': 0.004695914512456849, 'init_value': -15.351409912109375, 'ave_value': -18.76060673670659, 'soft_opc': nan} step=13760




2022-04-20 18:23.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182214/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:23.25 [info     ] FQE_20220420182214: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00016699003618816997, 'time_algorithm_update': 0.0042073962300322775, 'loss': 0.471090319284866, 'time_step': 0.004448787417522696, 'init_value': -15.613801002502441, 'ave_value': -19.208297392554364, 'soft_opc': nan} step=14104




2022-04-20 18:23.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182214/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:23.27 [info     ] FQE_20220420182214: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00016907342644624932, 'time_algorithm_update': 0.004655976628148278, 'loss': 0.46727033158720926, 'time_step': 0.004901880441710006, 'init_value': -15.609848022460938, 'ave_value': -19.250729248818715, 'soft_opc': nan} step=14448




2022-04-20 18:23.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182214/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:23.28 [info     ] FQE_20220420182214: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00016898194024729174, 'time_algorithm_update': 0.004222488680551219, 'loss': 0.46492965312317275, 'time_step': 0.004467591296794803, 'init_value': -15.505205154418945, 'ave_value': -19.25777605827143, 'soft_opc': nan} step=14792




2022-04-20 18:23.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182214/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:23.30 [info     ] FQE_20220420182214: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00016416505325672238, 'time_algorithm_update': 0.00461021481558334, 'loss': 0.4668678673428245, 'time_step': 0.004849612712860107, 'init_value': -15.458106994628906, 'ave_value': -19.3198684069521, 'soft_opc': nan} step=15136




2022-04-20 18:23.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182214/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:23.32 [info     ] FQE_20220420182214: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00016800400822661644, 'time_algorithm_update': 0.004221873227940049, 'loss': 0.4809858544491405, 'time_step': 0.004466221083042233, 'init_value': -15.683164596557617, 'ave_value': -19.619128412309497, 'soft_opc': nan} step=15480




2022-04-20 18:23.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182214/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:23.34 [info     ] FQE_20220420182214: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00016677864762239679, 'time_algorithm_update': 0.004501220098761625, 'loss': 0.48148077390654836, 'time_step': 0.00474346585051958, 'init_value': -15.798100471496582, 'ave_value': -19.838147812784726, 'soft_opc': nan} step=15824




2022-04-20 18:23.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182214/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:23.35 [info     ] FQE_20220420182214: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00016673636990924214, 'time_algorithm_update': 0.004403313232022662, 'loss': 0.48253874925268425, 'time_step': 0.004645991463993871, 'init_value': -15.85902214050293, 'ave_value': -19.95150206179551, 'soft_opc': nan} step=16168




2022-04-20 18:23.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182214/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:23.37 [info     ] FQE_20220420182214: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00017018304314724234, 'time_algorithm_update': 0.004051014434459598, 'loss': 0.4833999480294107, 'time_step': 0.004296313191569129, 'init_value': -15.902092933654785, 'ave_value': -19.963252159849496, 'soft_opc': nan} step=16512




2022-04-20 18:23.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182214/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:23.38 [info     ] FQE_20220420182214: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00016417614249295966, 'time_algorithm_update': 0.003509668416755144, 'loss': 0.47565236549044765, 'time_step': 0.003749491863472517, 'init_value': -15.860308647155762, 'ave_value': -19.954889634708028, 'soft_opc': nan} step=16856




2022-04-20 18:23.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182214/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:23.40 [info     ] FQE_20220420182214: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00017457022223361704, 'time_algorithm_update': 0.0037261587242747463, 'loss': 0.4730311022627406, 'time_step': 0.003977391608925753, 'init_value': -15.674680709838867, 'ave_value': -19.840828413175636, 'soft_opc': nan} step=17200




2022-04-20 18:23.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420182214/model_17200.pt
search iteration:  19
using hyper params:  [0.006010025683687231, 0.008541418156318844, 1.9729846047081013e-05, 5]
2022-04-20 18:23.40 [debug    ] RoundIterator is selected.
2022-04-20 18:23.40 [info     ] Directory is created at d3rlpy_logs/CQL_20220420182340
2022-04-20 18:23.40 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 18:23.40 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 18:23.40 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220420182340/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.006010025683687231, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'w

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:23.49 [info     ] CQL_20220420182340: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.00037800986864413433, 'time_algorithm_update': 0.024873667990255078, 'temp_loss': 4.619149827817727, 'temp': 0.9966707090188188, 'alpha_loss': -13.961699544337758, 'alpha': 1.0157686071089136, 'critic_loss': 32.64048189308211, 'actor_loss': 4.243055491942411, 'time_step': 0.025353773295530797, 'td_error': 10.023813947696558, 'init_value': -12.916646957397461, 'ave_value': -7.834329341148605} step=342
2022-04-20 18:23.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420182340/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:23.58 [info     ] CQL_20220420182340: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.00036985553496065194, 'time_algorithm_update': 0.024911307452017802, 'temp_loss': 3.7735377257330374, 'temp': 0.9903456185296265, 'alpha_loss': -4.73764740357622, 'alpha': 1.0362411716528106, 'critic_loss': 24.961093411808125, 'actor_loss': 10.797899428863971, 'time_step': 0.025384651987176193, 'td_error': 6.283990469486015, 'init_value': -18.461490631103516, 'ave_value': -11.164031233004554} step=684
2022-04-20 18:23.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420182340/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:24.07 [info     ] CQL_20220420182340: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.00036930480198553434, 'time_algorithm_update': 0.025345303858929906, 'temp_loss': 2.9106887565021626, 'temp': 0.9850314874746646, 'alpha_loss': 0.06265412779216524, 'alpha': 1.043775403011612, 'critic_loss': 49.40661657902233, 'actor_loss': 17.320848835839165, 'time_step': 0.02581501007080078, 'td_error': 10.534716364021836, 'init_value': -28.776172637939453, 'ave_value': -17.15584077576729} step=1026
2022-04-20 18:24.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420182340/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:24.16 [info     ] CQL_20220420182340: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.0003693675437168768, 'time_algorithm_update': 0.025072263695343197, 'temp_loss': 2.378692421299672, 'temp': 0.9802329974216327, 'alpha_loss': 3.926048566636286, 'alpha': 1.0337283213933308, 'critic_loss': 84.15351477282786, 'actor_loss': 23.95532572896857, 'time_step': 0.025544201421458818, 'td_error': 16.34566234039205, 'init_value': -39.06736755371094, 'ave_value': -22.71799293027335} step=1368
2022-04-20 18:24.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420182340/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:24.25 [info     ] CQL_20220420182340: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.0003667205397845709, 'time_algorithm_update': 0.024922170137104235, 'temp_loss': 1.9402610826910587, 'temp': 0.9757881983678941, 'alpha_loss': 6.932130169101626, 'alpha': 1.00414835413297, 'critic_loss': 125.83803683275367, 'actor_loss': 30.624350558944613, 'time_step': 0.02538993972086767, 'td_error': 24.68638897060464, 'init_value': -50.38481140136719, 'ave_value': -28.563708194327113} step=1710
2022-04-20 18:24.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420182340/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:24.34 [info     ] CQL_20220420182340: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.00036801511084127146, 'time_algorithm_update': 0.024948257451866105, 'temp_loss': 1.582744245117868, 'temp': 0.9716935602196476, 'alpha_loss': 9.172720076745016, 'alpha': 0.9644182814143555, 'critic_loss': 176.03723711158798, 'actor_loss': 37.15592948735109, 'time_step': 0.025421046374136943, 'td_error': 33.46942480648231, 'init_value': -60.73956298828125, 'ave_value': -34.04654325671561} step=2052
2022-04-20 18:24.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420182340/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:24.43 [info     ] CQL_20220420182340: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.0003675584904631676, 'time_algorithm_update': 0.024994974247893396, 'temp_loss': 1.2535642483081038, 'temp': 0.9679747493992075, 'alpha_loss': 11.125424531468173, 'alpha': 0.922106304189615, 'critic_loss': 235.40140546832168, 'actor_loss': 43.833107206556534, 'time_step': 0.025464517331262777, 'td_error': 47.40151076387485, 'init_value': -70.48435974121094, 'ave_value': -39.630899841100785} step=2394
2022-04-20 18:24.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420182340/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:24.53 [info     ] CQL_20220420182340: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.00037103229098849825, 'time_algorithm_update': 0.024948629022341722, 'temp_loss': 0.9469738344264309, 'temp': 0.9646490280739746, 'alpha_loss': 12.465006927300616, 'alpha': 0.8814684410541378, 'critic_loss': 303.3475012528269, 'actor_loss': 50.42390514396087, 'time_step': 0.025423789582057307, 'td_error': 60.58899659085121, 'init_value': -81.46138763427734, 'ave_value': -46.047603644924656} step=2736
2022-04-20 18:24.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420182340/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:25.02 [info     ] CQL_20220420182340: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.00037056103087308116, 'time_algorithm_update': 0.025107097904584562, 'temp_loss': 0.6610606426362231, 'temp': 0.9618944020299186, 'alpha_loss': 13.729343018336603, 'alpha': 0.8437524498903264, 'critic_loss': 376.28211992944193, 'actor_loss': 57.013427098592125, 'time_step': 0.025581737010799653, 'td_error': 101.61555993727815, 'init_value': -91.60020446777344, 'ave_value': -51.03185597419806} step=3078
2022-04-20 18:25.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420182340/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:25.11 [info     ] CQL_20220420182340: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.0003722759715297766, 'time_algorithm_update': 0.025150939735055666, 'temp_loss': 0.4970702650624583, 'temp': 0.9596791903526463, 'alpha_loss': 14.783610817981742, 'alpha': 0.8088749368288364, 'critic_loss': 458.5481176543654, 'actor_loss': 63.81238115321823, 'time_step': 0.025625641583002103, 'td_error': 105.9487535221353, 'init_value': -103.8252182006836, 'ave_value': -56.79056307405435} step=3420
2022-04-20 18:25.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420182340/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:25.20 [info     ] CQL_20220420182340: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.00037118496253476504, 'time_algorithm_update': 0.024978023523475692, 'temp_loss': 0.3478054184226962, 'temp': 0.9576832564950687, 'alpha_loss': 15.33153810696295, 'alpha': 0.776134150767187, 'critic_loss': 549.1019527145296, 'actor_loss': 70.58625452142013, 'time_step': 0.02545443334077534, 'td_error': 172.98312762350162, 'init_value': -112.4857177734375, 'ave_value': -63.01773648306392} step=3762
2022-04-20 18:25.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420182340/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:25.29 [info     ] CQL_20220420182340: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.0003734527275576229, 'time_algorithm_update': 0.025081371006212737, 'temp_loss': 0.18561518949331246, 'temp': 0.9562056409685236, 'alpha_loss': 15.695708436575549, 'alpha': 0.7466774560206118, 'critic_loss': 635.3906733641151, 'actor_loss': 76.74396386620595, 'time_step': 0.025555366661116394, 'td_error': 230.3531807778398, 'init_value': -127.29703521728516, 'ave_value': -69.92334328944216} step=4104
2022-04-20 18:25.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420182340/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:25.38 [info     ] CQL_20220420182340: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.00036564347339652437, 'time_algorithm_update': 0.025037197341695863, 'temp_loss': 0.09404988833076773, 'temp': 0.9554238699333012, 'alpha_loss': 17.121585741377714, 'alpha': 0.7179704857499976, 'critic_loss': 726.5760580140944, 'actor_loss': 83.28337913647033, 'time_step': 0.02550093054074293, 'td_error': 339.93544846731766, 'init_value': -137.8859100341797, 'ave_value': -75.38364813507208} step=4446
2022-04-20 18:25.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420182340/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:25.47 [info     ] CQL_20220420182340: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.0003347194682784945, 'time_algorithm_update': 0.022621213344105502, 'temp_loss': -0.08786631036789445, 'temp': 0.9552578957457292, 'alpha_loss': 20.912935206764622, 'alpha': 0.6899327255829036, 'critic_loss': 826.7703741419385, 'actor_loss': 90.47707703796743, 'time_step': 0.02304587447852419, 'td_error': 604.7755943297166, 'init_value': -153.76902770996094, 'ave_value': -81.89853055663944} step=4788
2022-04-20 18:25.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420182340/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:25.55 [info     ] CQL_20220420182340: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.0003539651458026373, 'time_algorithm_update': 0.024347024354321217, 'temp_loss': -0.18631611858652522, 'temp': 0.9564433474289743, 'alpha_loss': 22.59624892229225, 'alpha': 0.6605437675066161, 'critic_loss': 938.9624367875663, 'actor_loss': 97.1238782559222, 'time_step': 0.024800695173921642, 'td_error': 1024.006217217972, 'init_value': -171.3318634033203, 'ave_value': -89.48182288535342} step=5130
2022-04-20 18:25.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420182340/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:26.04 [info     ] CQL_20220420182340: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.00035926473071003516, 'time_algorithm_update': 0.02424027208696332, 'temp_loss': -0.19223954298735013, 'temp': 0.9581505502873694, 'alpha_loss': 21.246858937001367, 'alpha': 0.6359607101532451, 'critic_loss': 1045.6931928668105, 'actor_loss': 103.44035850212587, 'time_step': 0.024697394398917927, 'td_error': 1509.0713744350976, 'init_value': -180.9685821533203, 'ave_value': -94.72698732029411} step=5472
2022-04-20 18:26.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420182340/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:26.13 [info     ] CQL_20220420182340: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.0003662548567119398, 'time_algorithm_update': 0.025071646734984996, 'temp_loss': -0.3461338630414497, 'temp': 0.9611836541117283, 'alpha_loss': 27.77605548657869, 'alpha': 0.6094444537023355, 'critic_loss': 1162.3800668102956, 'actor_loss': 110.5639645314356, 'time_step': 0.02553682229672259, 'td_error': 1799.6931486463993, 'init_value': -200.94491577148438, 'ave_value': -104.04001906164729} step=5814
2022-04-20 18:26.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420182340/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:26.23 [info     ] CQL_20220420182340: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.00036990921399746723, 'time_algorithm_update': 0.025545792970043873, 'temp_loss': -0.28846169416711, 'temp': 0.9642386865197566, 'alpha_loss': 21.60117839093794, 'alpha': 0.5885297518376021, 'critic_loss': 1267.3976508246528, 'actor_loss': 115.72906175133778, 'time_step': 0.026016237442953546, 'td_error': 1492.338337801802, 'init_value': -207.2939453125, 'ave_value': -107.4279832005494} step=6156
2022-04-20 18:26.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420182340/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:26.32 [info     ] CQL_20220420182340: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.0003590158551757099, 'time_algorithm_update': 0.02539039773550647, 'temp_loss': -0.5335547204418053, 'temp': 0.9690732199546189, 'alpha_loss': 35.491441534276596, 'alpha': 0.5647847912116357, 'critic_loss': 1380.910744114926, 'actor_loss': 123.50837952909414, 'time_step': 0.025848692620706836, 'td_error': 5005.6758844705955, 'init_value': -227.6480712890625, 'ave_value': -115.87158094384783} step=6498
2022-04-20 18:26.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420182340/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:26.41 [info     ] CQL_20220420182340: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.0003608806788572791, 'time_algorithm_update': 0.025372917889154446, 'temp_loss': -0.607343348048148, 'temp': 0.9755262413568664, 'alpha_loss': 40.24424555915141, 'alpha': 0.5393153083951849, 'critic_loss': 1536.4864455552129, 'actor_loss': 132.4512347394263, 'time_step': 0.02583350493893986, 'td_error': 868.9483882149319, 'init_value': -237.55502319335938, 'ave_value': -118.52588500213314} step=6840
2022-04-20 18:26.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420182340/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:26.50 [info     ] CQL_20220420182340: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.0003547041039717825, 'time_algorithm_update': 0.02515696642691629, 'temp_loss': -0.30979967005659786, 'temp': 0.9803917969179432, 'alpha_loss': 21.96578906432927, 'alpha': 0.5228117577165191, 'critic_loss': 1665.1526624891494, 'actor_loss': 135.2234136347185, 'time_step': 0.025607042842441134, 'td_error': 741.9789029607263, 'init_value': -251.05517578125, 'ave_value': -125.20592944860257} step=7182
2022-04-20 18:26.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420182340/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:27.00 [info     ] CQL_20220420182340: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.00035836334116974767, 'time_algorithm_update': 0.02565665830645645, 'temp_loss': -0.31068025726541787, 'temp': 0.9836910734748283, 'alpha_loss': 30.453740043249745, 'alpha': 0.5081812777714423, 'critic_loss': 1742.0420307360198, 'actor_loss': 139.8006518402992, 'time_step': 0.026115348464564273, 'td_error': 5603.298715180583, 'init_value': -256.2482604980469, 'ave_value': -129.54805988886068} step=7524
2022-04-20 18:27.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420182340/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:27.09 [info     ] CQL_20220420182340: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.00036245061640153854, 'time_algorithm_update': 0.025420942501715053, 'temp_loss': -0.3882671084353014, 'temp': 0.9875693748220365, 'alpha_loss': 38.796318689982094, 'alpha': 0.48926492340383476, 'critic_loss': 1810.5082236842106, 'actor_loss': 143.84646102280644, 'time_step': 0.025879556672614917, 'td_error': 8472.543685146436, 'init_value': -267.7558288574219, 'ave_value': -132.58443132623964} step=7866
2022-04-20 18:27.09 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420182340/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:27.18 [info     ] CQL_20220420182340: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.00036236905215079326, 'time_algorithm_update': 0.02569842687127186, 'temp_loss': -0.45798206587501783, 'temp': 0.992499227760828, 'alpha_loss': 41.12172571539182, 'alpha': 0.47077947070724085, 'critic_loss': 1902.8094164753518, 'actor_loss': 149.33867683187563, 'time_step': 0.02615926558511299, 'td_error': 6319.238412470234, 'init_value': -275.8641052246094, 'ave_value': -134.70124601649943} step=8208
2022-04-20 18:27.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420182340/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:27.28 [info     ] CQL_20220420182340: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.0003650509126005117, 'time_algorithm_update': 0.02567427618461743, 'temp_loss': -0.36349080909827824, 'temp': 0.9969556472803417, 'alpha_loss': 31.401234134595995, 'alpha': 0.455002024769783, 'critic_loss': 1963.9440878706368, 'actor_loss': 150.35183544047393, 'time_step': 0.026139427346792834, 'td_error': 10355.063920197343, 'init_value': -279.6611022949219, 'ave_value': -139.50451348866025} step=8550
2022-04-20 18:27.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420182340/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:27.37 [info     ] CQL_20220420182340: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.0003616384595458271, 'time_algorithm_update': 0.025525386570489896, 'temp_loss': -0.3335052706214071, 'temp': 1.000840019935753, 'alpha_loss': 36.31872041323032, 'alpha': 0.44080768987449287, 'critic_loss': 1999.1677877861157, 'actor_loss': 152.64677574202332, 'time_step': 0.025985378270957902, 'td_error': 12148.573182999644, 'init_value': -291.07135009765625, 'ave_value': -140.44448323349627} step=8892
2022-04-20 18:27.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420182340/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:27.46 [info     ] CQL_20220420182340: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.0003578927781846788, 'time_algorithm_update': 0.02583560748407018, 'temp_loss': -0.27776858663698384, 'temp': 1.0046426939685442, 'alpha_loss': 35.47201879122104, 'alpha': 0.4263472532668309, 'critic_loss': 2027.7969299673337, 'actor_loss': 153.5549414562203, 'time_step': 0.026293900975009853, 'td_error': 11602.136253864814, 'init_value': -298.3427734375, 'ave_value': -142.5679990136395} step=9234
2022-04-20 18:27.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420182340/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:27.56 [info     ] CQL_20220420182340: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.00035772964968318826, 'time_algorithm_update': 0.02594112513358133, 'temp_loss': -0.1967618498428349, 'temp': 1.0077077164287456, 'alpha_loss': 32.236806309014035, 'alpha': 0.41307733569577426, 'critic_loss': 2049.929070723684, 'actor_loss': 154.2715013069019, 'time_step': 0.02639721081270809, 'td_error': 12776.869877353101, 'init_value': -304.0782775878906, 'ave_value': -148.01344122157576} step=9576
2022-04-20 18:27.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420182340/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:28.05 [info     ] CQL_20220420182340: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.0003574410377190127, 'time_algorithm_update': 0.026255951987372503, 'temp_loss': -0.1457954015615478, 'temp': 1.0099850788451077, 'alpha_loss': 34.63107093035826, 'alpha': 0.4004147985176733, 'critic_loss': 2074.2380688762105, 'actor_loss': 155.58628088967842, 'time_step': 0.02671629573866638, 'td_error': 15953.36294686174, 'init_value': -303.2984924316406, 'ave_value': -144.92274673216113} step=9918
2022-04-20 18:28.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420182340/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:28.15 [info     ] CQL_20220420182340: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.0003591371558563054, 'time_algorithm_update': 0.025938413296526637, 'temp_loss': -0.08590369711886024, 'temp': 1.0116334233367652, 'alpha_loss': 35.18843702963221, 'alpha': 0.3876808365883186, 'critic_loss': 2081.53335803294, 'actor_loss': 156.0889550816943, 'time_step': 0.026396480917233473, 'td_error': 21906.951579325756, 'init_value': -310.13372802734375, 'ave_value': -150.11681084527356} step=10260
2022-04-20 18:28.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420182340/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:28.24 [info     ] CQL_20220420182340: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.00036429592043335675, 'time_algorithm_update': 0.02612459729289451, 'temp_loss': -0.09650653611585411, 'temp': 1.0128790241235879, 'alpha_loss': 37.95287940289542, 'alpha': 0.37450533443027073, 'critic_loss': 2086.764154445358, 'actor_loss': 156.6421740347879, 'time_step': 0.026585926089370458, 'td_error': 22609.907776232598, 'init_value': -301.581298828125, 'ave_value': -145.49785183474154} step=10602
2022-04-20 18:28.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420182340/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:28.34 [info     ] CQL_20220420182340: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.0003620170013249269, 'time_algorithm_update': 0.025847733369347644, 'temp_loss': -0.07186834422098091, 'temp': 1.0145042054137292, 'alpha_loss': 36.42716214949625, 'alpha': 0.36210669289555464, 'critic_loss': 2086.8720014248674, 'actor_loss': 156.52821359020925, 'time_step': 0.026315134868287203, 'td_error': 24863.455153979376, 'init_value': -304.9729309082031, 'ave_value': -146.47472643186717} step=10944
2022-04-20 18:28.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420182340/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:28.43 [info     ] CQL_20220420182340: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.0003723603243019149, 'time_algorithm_update': 0.02583565140328212, 'temp_loss': -0.05499773826559036, 'temp': 1.0152415688972027, 'alpha_loss': 39.45198437902663, 'alpha': 0.35028148290009525, 'critic_loss': 2085.041356136924, 'actor_loss': 156.80841622157405, 'time_step': 0.02631308879071509, 'td_error': 24612.051508278673, 'init_value': -301.6673889160156, 'ave_value': -148.5362939658996} step=11286
2022-04-20 18:28.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420182340/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:28.53 [info     ] CQL_20220420182340: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.0003590402547378986, 'time_algorithm_update': 0.025673871849015442, 'temp_loss': -0.013434717992636545, 'temp': 1.0158600573651275, 'alpha_loss': 42.51721180949295, 'alpha': 0.3378555229184223, 'critic_loss': 2083.322423745317, 'actor_loss': 156.88785932217425, 'time_step': 0.026133543566653605, 'td_error': 25756.96360897497, 'init_value': -298.1907653808594, 'ave_value': -146.45998701202438} step=11628
2022-04-20 18:28.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420182340/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:29.02 [info     ] CQL_20220420182340: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.00036255448882342777, 'time_algorithm_update': 0.025681586293449177, 'temp_loss': 0.021995925192029503, 'temp': 1.0155878809460424, 'alpha_loss': 46.17403522848386, 'alpha': 0.3261786430028447, 'critic_loss': 2076.999941106428, 'actor_loss': 156.83918851439716, 'time_step': 0.026144662098577846, 'td_error': 52618.38893061567, 'init_value': -300.9566345214844, 'ave_value': -147.83798859683645} step=11970
2022-04-20 18:29.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420182340/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:29.11 [info     ] CQL_20220420182340: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.00036338337680749726, 'time_algorithm_update': 0.025236437892356113, 'temp_loss': 0.06648679548188259, 'temp': 1.014990468819936, 'alpha_loss': 46.960730103721396, 'alpha': 0.3141033190209963, 'critic_loss': 2083.165576243261, 'actor_loss': 157.38483051946986, 'time_step': 0.025699054985715633, 'td_error': 36143.91038480951, 'init_value': -304.204345703125, 'ave_value': -150.4805895985757} step=12312
2022-04-20 18:29.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420182340/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:29.20 [info     ] CQL_20220420182340: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.00036475463220250536, 'time_algorithm_update': 0.024193509977463393, 'temp_loss': 0.05598734528349157, 'temp': 1.0141951128056175, 'alpha_loss': 42.88606829461995, 'alpha': 0.3032763900986889, 'critic_loss': 2082.0734720508954, 'actor_loss': 157.07444740875422, 'time_step': 0.024657635660896526, 'td_error': 39216.16938204054, 'init_value': -297.41015625, 'ave_value': -145.31432183797764} step=12654
2022-04-20 18:29.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420182340/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:29.29 [info     ] CQL_20220420182340: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.0003692992249427483, 'time_algorithm_update': 0.02467104426601477, 'temp_loss': 0.1044408041460995, 'temp': 1.0127183805432236, 'alpha_loss': 44.9900418387519, 'alpha': 0.29394638529646466, 'critic_loss': 2074.590527272364, 'actor_loss': 156.77389892221194, 'time_step': 0.02514155636056822, 'td_error': 72347.79749473097, 'init_value': -296.83880615234375, 'ave_value': -149.27937238480794} step=12996
2022-04-20 18:29.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420182340/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:29.38 [info     ] CQL_20220420182340: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.00036505997529503895, 'time_algorithm_update': 0.02451480341236494, 'temp_loss': 0.14443309860009895, 'temp': 1.010129958216907, 'alpha_loss': 37.95518016675759, 'alpha': 0.2846331736671994, 'critic_loss': 2066.511470683137, 'actor_loss': 156.1829920540079, 'time_step': 0.02497974961821796, 'td_error': 27560.097593772287, 'init_value': -291.7850646972656, 'ave_value': -144.01539281887918} step=13338
2022-04-20 18:29.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420182340/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:29.47 [info     ] CQL_20220420182340: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.00036885515291091295, 'time_algorithm_update': 0.024388942105031154, 'temp_loss': 0.13883897719893887, 'temp': 1.0078690498195895, 'alpha_loss': 46.28397258262188, 'alpha': 0.27596965719733324, 'critic_loss': 2055.116714566772, 'actor_loss': 156.14009709943804, 'time_step': 0.024859652184603506, 'td_error': 41111.77095423166, 'init_value': -298.5977783203125, 'ave_value': -148.83394680497076} step=13680
2022-04-20 18:29.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420182340/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:29.56 [info     ] CQL_20220420182340: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.0003620699832313939, 'time_algorithm_update': 0.024303552002934683, 'temp_loss': 0.1347260495419042, 'temp': 1.005014860142044, 'alpha_loss': 43.75028903721369, 'alpha': 0.26738689691699735, 'critic_loss': 2045.3254355268868, 'actor_loss': 155.51348462021141, 'time_step': 0.024769101226538942, 'td_error': 73845.06728999499, 'init_value': -289.63128662109375, 'ave_value': -144.7447960029764} step=14022
2022-04-20 18:29.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420182340/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:30.05 [info     ] CQL_20220420182340: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.00036972098880343966, 'time_algorithm_update': 0.02457681873388458, 'temp_loss': 0.20379126725489632, 'temp': 1.0019485995783444, 'alpha_loss': 49.50570974573057, 'alpha': 0.25863981351517795, 'critic_loss': 2033.091809367576, 'actor_loss': 155.1205863729555, 'time_step': 0.02504795127444797, 'td_error': 60989.71783684419, 'init_value': -291.12261962890625, 'ave_value': -145.48574063607433} step=14364
2022-04-20 18:30.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420182340/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:30.14 [info     ] CQL_20220420182340: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.0003653674097786173, 'time_algorithm_update': 0.02476458870179472, 'temp_loss': 0.20617053363668292, 'temp': 0.9982786750235753, 'alpha_loss': 49.893965124386796, 'alpha': 0.2495071248352876, 'critic_loss': 2029.0393851653873, 'actor_loss': 154.8945803279765, 'time_step': 0.025231447833323338, 'td_error': 52492.72426829229, 'init_value': -287.0419006347656, 'ave_value': -146.46249823413117} step=14706
2022-04-20 18:30.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420182340/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:30.23 [info     ] CQL_20220420182340: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.0003689687851576777, 'time_algorithm_update': 0.024716391200907746, 'temp_loss': 0.2845660134256757, 'temp': 0.9938252336449094, 'alpha_loss': 49.25211276924401, 'alpha': 0.24112993182494627, 'critic_loss': 2026.8997941937364, 'actor_loss': 154.6232343528703, 'time_step': 0.025187446360002485, 'td_error': 90422.68507483485, 'init_value': -284.209228515625, 'ave_value': -145.95671843228575} step=15048
2022-04-20 18:30.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420182340/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:30.31 [info     ] CQL_20220420182340: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.00036380862631992986, 'time_algorithm_update': 0.024265071104841624, 'temp_loss': 0.25162424850795007, 'temp': 0.9890393468371609, 'alpha_loss': 44.94288803122894, 'alpha': 0.23358927828352355, 'critic_loss': 2024.2036297000639, 'actor_loss': 154.2903991833068, 'time_step': 0.024730203444497625, 'td_error': 79198.52958033155, 'init_value': -278.63330078125, 'ave_value': -144.7979354384639} step=15390
2022-04-20 18:30.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420182340/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:30.41 [info     ] CQL_20220420182340: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.00038131496362518843, 'time_algorithm_update': 0.027391917524281998, 'temp_loss': 0.2817031555204538, 'temp': 0.9845551142567083, 'alpha_loss': 42.125737228588754, 'alpha': 0.2267967872539459, 'critic_loss': 2011.851342987596, 'actor_loss': 153.5314950775682, 'time_step': 0.027876873462520844, 'td_error': 61667.73033794903, 'init_value': -279.11651611328125, 'ave_value': -147.63692846923462} step=15732
2022-04-20 18:30.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420182340/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:30.52 [info     ] CQL_20220420182340: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.0003846507323415656, 'time_algorithm_update': 0.02789787939417432, 'temp_loss': 0.28780635040013763, 'temp': 0.9797934633597993, 'alpha_loss': 36.26407247398332, 'alpha': 0.22061079252533047, 'critic_loss': 2001.5861819975557, 'actor_loss': 153.04977695844326, 'time_step': 0.0283913270771852, 'td_error': 77773.64995399096, 'init_value': -272.62896728515625, 'ave_value': -144.38483517857284} step=16074
2022-04-20 18:30.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420182340/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:31.01 [info     ] CQL_20220420182340: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.0003820037284092596, 'time_algorithm_update': 0.027197357506779898, 'temp_loss': 0.3153678268542764, 'temp': 0.9747977418857708, 'alpha_loss': 33.142405412350485, 'alpha': 0.2149971730527822, 'critic_loss': 1988.6497642115544, 'actor_loss': 152.3270073606257, 'time_step': 0.027683076105619733, 'td_error': 54942.26881216813, 'init_value': -267.05853271484375, 'ave_value': -142.09464342259977} step=16416
2022-04-20 18:31.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420182340/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:31.11 [info     ] CQL_20220420182340: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.00037244607133474965, 'time_algorithm_update': 0.02623888553931699, 'temp_loss': 0.31020701565501985, 'temp': 0.9703128113035571, 'alpha_loss': 26.803968493701422, 'alpha': 0.20989732518356446, 'critic_loss': 1972.2972901104486, 'actor_loss': 151.42787420261672, 'time_step': 0.02671430612865247, 'td_error': 36706.830602655435, 'init_value': -266.8565368652344, 'ave_value': -142.87442582326563} step=16758
2022-04-20 18:31.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420182340/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:31.20 [info     ] CQL_20220420182340: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.00036974817688702145, 'time_algorithm_update': 0.026257765223408305, 'temp_loss': 0.3519475086216341, 'temp': 0.9654170518730119, 'alpha_loss': 23.213561798396864, 'alpha': 0.20528504762210345, 'critic_loss': 1953.3391787880346, 'actor_loss': 150.4382663526033, 'time_step': 0.026735767286423354, 'td_error': 38559.17833516795, 'init_value': -262.0943298339844, 'ave_value': -141.39788841110865} step=17100
2022-04-20 18:31.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420182340/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 18:31.21 [info     ] FQE_20220420183121: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00016006073319768332, 'time_algorithm_update': 0.0037206712975559465, 'loss': 0.007789400762440749, 'time_step': 0.0039575774985623645, 'init_value': 0.24682137370109558, 'ave_value': 0.2716648822146896, 'soft_opc': nan} step=166




2022-04-20 18:31.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183121/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:31.22 [info     ] FQE_20220420183121: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00015514442719608904, 'time_algorithm_update': 0.0035916767924664967, 'loss': 0.005964392229510718, 'time_step': 0.0038168487778629163, 'init_value': 0.10667720437049866, 'ave_value': 0.1844873100137422, 'soft_opc': nan} step=332




2022-04-20 18:31.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183121/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:31.23 [info     ] FQE_20220420183121: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.0001506202192191618, 'time_algorithm_update': 0.0035870348114565194, 'loss': 0.0053690209359222325, 'time_step': 0.0038063856492559596, 'init_value': 0.05763986334204674, 'ave_value': 0.16135354675058913, 'soft_opc': nan} step=498




2022-04-20 18:31.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183121/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:31.24 [info     ] FQE_20220420183121: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00015342091939535486, 'time_algorithm_update': 0.003675480923020696, 'loss': 0.005536086004541581, 'time_step': 0.00390042885240302, 'init_value': -0.06633627414703369, 'ave_value': 0.08285877702134736, 'soft_opc': nan} step=664




2022-04-20 18:31.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183121/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:31.24 [info     ] FQE_20220420183121: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.0001535257661198995, 'time_algorithm_update': 0.003704141421490405, 'loss': 0.00524803521728749, 'time_step': 0.003931406032608216, 'init_value': -0.13532887399196625, 'ave_value': 0.059769947487265204, 'soft_opc': nan} step=830




2022-04-20 18:31.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183121/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:31.25 [info     ] FQE_20220420183121: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00015456274331334126, 'time_algorithm_update': 0.003722644713987787, 'loss': 0.004999193814527019, 'time_step': 0.0039482978453119115, 'init_value': -0.17543287575244904, 'ave_value': 0.035279525198705225, 'soft_opc': nan} step=996




2022-04-20 18:31.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183121/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:31.26 [info     ] FQE_20220420183121: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00015586542795939618, 'time_algorithm_update': 0.00362213117530547, 'loss': 0.004867172720056611, 'time_step': 0.003852706357657191, 'init_value': -0.22758755087852478, 'ave_value': 0.008283876848505676, 'soft_opc': nan} step=1162




2022-04-20 18:31.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183121/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:31.26 [info     ] FQE_20220420183121: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00015294264598065112, 'time_algorithm_update': 0.003675962068948401, 'loss': 0.0045739615796111435, 'time_step': 0.0038962177483432264, 'init_value': -0.3097112774848938, 'ave_value': -0.03590546182104017, 'soft_opc': nan} step=1328




2022-04-20 18:31.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183121/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:31.27 [info     ] FQE_20220420183121: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00015106833124735268, 'time_algorithm_update': 0.0033883646310093893, 'loss': 0.004171075613563319, 'time_step': 0.0036082497562270567, 'init_value': -0.3182826340198517, 'ave_value': -0.02611651390723817, 'soft_opc': nan} step=1494




2022-04-20 18:31.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183121/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:31.28 [info     ] FQE_20220420183121: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00015168304902961455, 'time_algorithm_update': 0.003594661333474768, 'loss': 0.004118634321928832, 'time_step': 0.003817258110965591, 'init_value': -0.4187873601913452, 'ave_value': -0.09293948295106692, 'soft_opc': nan} step=1660




2022-04-20 18:31.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183121/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:31.28 [info     ] FQE_20220420183121: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00016004349811967597, 'time_algorithm_update': 0.0035173533910728364, 'loss': 0.003848794919960425, 'time_step': 0.003751889768853245, 'init_value': -0.5045511722564697, 'ave_value': -0.13613222151085408, 'soft_opc': nan} step=1826




2022-04-20 18:31.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183121/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:31.29 [info     ] FQE_20220420183121: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00015391786414456656, 'time_algorithm_update': 0.0037171682679509543, 'loss': 0.003721491628640657, 'time_step': 0.003938663436705808, 'init_value': -0.5617054104804993, 'ave_value': -0.1724162926993895, 'soft_opc': nan} step=1992




2022-04-20 18:31.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183121/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:31.30 [info     ] FQE_20220420183121: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.0001604585762483528, 'time_algorithm_update': 0.003683883023549275, 'loss': 0.003864179981075766, 'time_step': 0.003921842000570642, 'init_value': -0.632219672203064, 'ave_value': -0.20622761505655945, 'soft_opc': nan} step=2158




2022-04-20 18:31.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183121/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:31.31 [info     ] FQE_20220420183121: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00015507405062755906, 'time_algorithm_update': 0.0036545546658067816, 'loss': 0.00402225044286664, 'time_step': 0.0038829611008425794, 'init_value': -0.726347804069519, 'ave_value': -0.262579610996758, 'soft_opc': nan} step=2324




2022-04-20 18:31.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183121/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:31.31 [info     ] FQE_20220420183121: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00015578930636486374, 'time_algorithm_update': 0.003650882157934717, 'loss': 0.003913240262492369, 'time_step': 0.003875495439552399, 'init_value': -0.7816512584686279, 'ave_value': -0.2778975265136257, 'soft_opc': nan} step=2490




2022-04-20 18:31.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183121/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:31.32 [info     ] FQE_20220420183121: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00015789916716426252, 'time_algorithm_update': 0.003633049597223121, 'loss': 0.004199259033628616, 'time_step': 0.003860032702066812, 'init_value': -0.843969464302063, 'ave_value': -0.30631755296568813, 'soft_opc': nan} step=2656




2022-04-20 18:31.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183121/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:31.33 [info     ] FQE_20220420183121: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00015739216861954653, 'time_algorithm_update': 0.003554058362202472, 'loss': 0.004312680966181524, 'time_step': 0.0037848733993897953, 'init_value': -0.9641448259353638, 'ave_value': -0.40308679551046617, 'soft_opc': nan} step=2822




2022-04-20 18:31.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183121/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:31.33 [info     ] FQE_20220420183121: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.0001525146415434688, 'time_algorithm_update': 0.0036012049180915557, 'loss': 0.004681106007388647, 'time_step': 0.003825098635202431, 'init_value': -0.9972407221794128, 'ave_value': -0.40800283430563705, 'soft_opc': nan} step=2988




2022-04-20 18:31.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183121/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:31.34 [info     ] FQE_20220420183121: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00015613257166851, 'time_algorithm_update': 0.0032526771706270882, 'loss': 0.005239366712216961, 'time_step': 0.0034803712224385826, 'init_value': -1.0461959838867188, 'ave_value': -0.43496254654239386, 'soft_opc': nan} step=3154




2022-04-20 18:31.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183121/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:31.35 [info     ] FQE_20220420183121: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00015132111239146037, 'time_algorithm_update': 0.0035963029746549674, 'loss': 0.005537467064049813, 'time_step': 0.0038146340703389733, 'init_value': -1.115159034729004, 'ave_value': -0.4471724103332858, 'soft_opc': nan} step=3320




2022-04-20 18:31.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183121/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:31.36 [info     ] FQE_20220420183121: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00015604065125247082, 'time_algorithm_update': 0.004322446972490793, 'loss': 0.0059030278836915565, 'time_step': 0.004544707665960473, 'init_value': -1.1798598766326904, 'ave_value': -0.49297172109248233, 'soft_opc': nan} step=3486




2022-04-20 18:31.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183121/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:31.36 [info     ] FQE_20220420183121: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00015986252980059888, 'time_algorithm_update': 0.004506543458226216, 'loss': 0.006633265297411077, 'time_step': 0.004742664026926799, 'init_value': -1.2733403444290161, 'ave_value': -0.5323071988319416, 'soft_opc': nan} step=3652




2022-04-20 18:31.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183121/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:31.37 [info     ] FQE_20220420183121: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00015658642872270332, 'time_algorithm_update': 0.003713265959038792, 'loss': 0.006814698270712811, 'time_step': 0.0039407259010406865, 'init_value': -1.3272236585617065, 'ave_value': -0.5604749955092704, 'soft_opc': nan} step=3818




2022-04-20 18:31.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183121/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:31.38 [info     ] FQE_20220420183121: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00015751281416559793, 'time_algorithm_update': 0.004354186804897814, 'loss': 0.007233484673414783, 'time_step': 0.0045847763498145415, 'init_value': -1.3635334968566895, 'ave_value': -0.5822323393961835, 'soft_opc': nan} step=3984




2022-04-20 18:31.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183121/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:31.39 [info     ] FQE_20220420183121: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.0001667163458215185, 'time_algorithm_update': 0.004466244973332049, 'loss': 0.008235995151054854, 'time_step': 0.004708363349179187, 'init_value': -1.490580439567566, 'ave_value': -0.6380107017613104, 'soft_opc': nan} step=4150




2022-04-20 18:31.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183121/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:31.40 [info     ] FQE_20220420183121: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.0001566323889307229, 'time_algorithm_update': 0.0044513768460377155, 'loss': 0.008603156637540648, 'time_step': 0.0046796841793749705, 'init_value': -1.5543818473815918, 'ave_value': -0.6898276304647323, 'soft_opc': nan} step=4316




2022-04-20 18:31.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183121/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:31.40 [info     ] FQE_20220420183121: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00015869485326560148, 'time_algorithm_update': 0.003682784287326307, 'loss': 0.009507859046136153, 'time_step': 0.0039185658994927465, 'init_value': -1.579222321510315, 'ave_value': -0.6872481219821844, 'soft_opc': nan} step=4482




2022-04-20 18:31.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183121/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:31.41 [info     ] FQE_20220420183121: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.0001556241368672934, 'time_algorithm_update': 0.004438979079924434, 'loss': 0.010114143491745085, 'time_step': 0.004667404186294739, 'init_value': -1.6842994689941406, 'ave_value': -0.7781795301924036, 'soft_opc': nan} step=4648




2022-04-20 18:31.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183121/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:31.42 [info     ] FQE_20220420183121: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.0001570948635239199, 'time_algorithm_update': 0.004471727164394884, 'loss': 0.010477637334204986, 'time_step': 0.0046966880200857136, 'init_value': -1.6853914260864258, 'ave_value': -0.7472253662291788, 'soft_opc': nan} step=4814




2022-04-20 18:31.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183121/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:31.43 [info     ] FQE_20220420183121: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00015958964106548265, 'time_algorithm_update': 0.00432741641998291, 'loss': 0.010857449613051512, 'time_step': 0.0045567894556436195, 'init_value': -1.7258317470550537, 'ave_value': -0.7918164718025658, 'soft_opc': nan} step=4980




2022-04-20 18:31.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183121/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:31.44 [info     ] FQE_20220420183121: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00015738642359354408, 'time_algorithm_update': 0.0041435554803135885, 'loss': 0.011891521833253553, 'time_step': 0.0043695059167333395, 'init_value': -1.773377537727356, 'ave_value': -0.8090452805024085, 'soft_opc': nan} step=5146




2022-04-20 18:31.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183121/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:31.45 [info     ] FQE_20220420183121: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.000158733632191118, 'time_algorithm_update': 0.004505918686648449, 'loss': 0.01248019232193053, 'time_step': 0.004738618092364575, 'init_value': -1.820643424987793, 'ave_value': -0.8327350608152359, 'soft_opc': nan} step=5312




2022-04-20 18:31.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183121/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:31.45 [info     ] FQE_20220420183121: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00016222804425710654, 'time_algorithm_update': 0.004473733614726239, 'loss': 0.013590489704223597, 'time_step': 0.004706904112574566, 'init_value': -1.8427939414978027, 'ave_value': -0.8313108569323631, 'soft_opc': nan} step=5478




2022-04-20 18:31.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183121/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:31.46 [info     ] FQE_20220420183121: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.0001592794096613505, 'time_algorithm_update': 0.0037446797612201736, 'loss': 0.014043452434875744, 'time_step': 0.003974907369498747, 'init_value': -1.965811014175415, 'ave_value': -0.929819663596415, 'soft_opc': nan} step=5644




2022-04-20 18:31.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183121/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:31.47 [info     ] FQE_20220420183121: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00015815625707787205, 'time_algorithm_update': 0.004470885518085526, 'loss': 0.014310618833055249, 'time_step': 0.004706572337322925, 'init_value': -1.962489366531372, 'ave_value': -0.9183011612917301, 'soft_opc': nan} step=5810




2022-04-20 18:31.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183121/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:31.48 [info     ] FQE_20220420183121: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00015895194317921098, 'time_algorithm_update': 0.004394399114401944, 'loss': 0.015563644994757172, 'time_step': 0.0046287344162722665, 'init_value': -2.0563268661499023, 'ave_value': -0.9879423361660196, 'soft_opc': nan} step=5976




2022-04-20 18:31.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183121/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:31.49 [info     ] FQE_20220420183121: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00016295479004641613, 'time_algorithm_update': 0.004652126725897731, 'loss': 0.01634754983326757, 'time_step': 0.004885134926761489, 'init_value': -2.2072396278381348, 'ave_value': -1.1206530967114812, 'soft_opc': nan} step=6142




2022-04-20 18:31.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183121/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:31.49 [info     ] FQE_20220420183121: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.0001550783593970609, 'time_algorithm_update': 0.0036819541310689537, 'loss': 0.01643707650483597, 'time_step': 0.003910492701702808, 'init_value': -2.2791318893432617, 'ave_value': -1.1935015655114307, 'soft_opc': nan} step=6308




2022-04-20 18:31.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183121/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:31.50 [info     ] FQE_20220420183121: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.0001604973551738693, 'time_algorithm_update': 0.004463157021855733, 'loss': 0.01724059696811397, 'time_step': 0.004699760172740522, 'init_value': -2.3736579418182373, 'ave_value': -1.2671533902171006, 'soft_opc': nan} step=6474




2022-04-20 18:31.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183121/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:31.51 [info     ] FQE_20220420183121: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00015853686505053416, 'time_algorithm_update': 0.004559660532388343, 'loss': 0.018344712069157106, 'time_step': 0.004792059760495841, 'init_value': -2.538137674331665, 'ave_value': -1.3834745402506612, 'soft_opc': nan} step=6640




2022-04-20 18:31.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183121/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:31.52 [info     ] FQE_20220420183121: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.0001618560538234481, 'time_algorithm_update': 0.003949920815157603, 'loss': 0.019296739600368786, 'time_step': 0.004183843911412251, 'init_value': -2.569371223449707, 'ave_value': -1.413410199838187, 'soft_opc': nan} step=6806




2022-04-20 18:31.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183121/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:31.53 [info     ] FQE_20220420183121: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00016055193292089254, 'time_algorithm_update': 0.004366754049278167, 'loss': 0.01987200248593756, 'time_step': 0.004601752901651773, 'init_value': -2.63089656829834, 'ave_value': -1.4674836345529814, 'soft_opc': nan} step=6972




2022-04-20 18:31.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183121/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:31.54 [info     ] FQE_20220420183121: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.0001633038003760648, 'time_algorithm_update': 0.004499777253851833, 'loss': 0.021132447698619217, 'time_step': 0.0047356981828988315, 'init_value': -2.602984666824341, 'ave_value': -1.4608636987349437, 'soft_opc': nan} step=7138




2022-04-20 18:31.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183121/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:31.55 [info     ] FQE_20220420183121: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00015986971108310194, 'time_algorithm_update': 0.004495112292737846, 'loss': 0.022231597374109215, 'time_step': 0.004726941327014601, 'init_value': -2.832400321960449, 'ave_value': -1.6869632365538105, 'soft_opc': nan} step=7304




2022-04-20 18:31.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183121/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:31.55 [info     ] FQE_20220420183121: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00016170524689088385, 'time_algorithm_update': 0.0037328694240156427, 'loss': 0.022486258809404247, 'time_step': 0.003967391439231045, 'init_value': -2.7982101440429688, 'ave_value': -1.606523705920365, 'soft_opc': nan} step=7470




2022-04-20 18:31.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183121/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:31.56 [info     ] FQE_20220420183121: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00015554514275975973, 'time_algorithm_update': 0.004373228693582925, 'loss': 0.023908609652005315, 'time_step': 0.0046005363923957545, 'init_value': -2.859074115753174, 'ave_value': -1.6694209625726348, 'soft_opc': nan} step=7636




2022-04-20 18:31.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183121/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:31.57 [info     ] FQE_20220420183121: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00015717242137495293, 'time_algorithm_update': 0.004455319370131895, 'loss': 0.02425717915049249, 'time_step': 0.004685597247387989, 'init_value': -2.950239658355713, 'ave_value': -1.7741974800331404, 'soft_opc': nan} step=7802




2022-04-20 18:31.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183121/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:31.58 [info     ] FQE_20220420183121: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.000165489782769996, 'time_algorithm_update': 0.004331716571945742, 'loss': 0.025087702233927797, 'time_step': 0.004568562450179134, 'init_value': -3.039700984954834, 'ave_value': -1.8591909864013867, 'soft_opc': nan} step=7968




2022-04-20 18:31.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183121/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:31.59 [info     ] FQE_20220420183121: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.0001596442188125059, 'time_algorithm_update': 0.003882909395608557, 'loss': 0.026342801932043518, 'time_step': 0.004113259085689683, 'init_value': -3.054621696472168, 'ave_value': -1.8640159289897964, 'soft_opc': nan} step=8134




2022-04-20 18:31.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183121/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:31.59 [info     ] FQE_20220420183121: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00016252391309623258, 'time_algorithm_update': 0.0044985894697258276, 'loss': 0.026599273504268557, 'time_step': 0.0047337994518050226, 'init_value': -3.0786094665527344, 'ave_value': -1.9098981243750681, 'soft_opc': nan} step=8300




2022-04-20 18:31.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183121/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

[ 0.00000000e+00  7.95731469e+08  4.27108923e-02  1.24000047e-02
  1.42999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.34732792e-01  6.00000000e-01  3.37421461e-01]
Read chunk # 39 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.38489108e-01  4.94000047e-02
 -1.56000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -7.49080829e-02  7.04145269e-02]
Read chunk # 40 out of 4999
torch.Size([44400, 6])
2022-04-20 18:32.00 [debug    ] RoundIterator is selected.
2022-04-20 18:32.00 [info     ] Directory is created at d3rlpy_logs/FQE_20220420183200
2022-04-20 18:32.00 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 18:32.00 [debug    ] Building models...
2022-04-20 18:32.00 [debug    ] Models have been built.
2022-04-20 18:32.00 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420183200/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size':

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 18:32.02 [info     ] FQE_20220420183200: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00016650003056193507, 'time_algorithm_update': 0.004110088182050128, 'loss': 0.024838893899022666, 'time_step': 0.004352412251539008, 'init_value': -0.9469600915908813, 'ave_value': -0.9689181724714266, 'soft_opc': nan} step=344




2022-04-20 18:32.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183200/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:32.03 [info     ] FQE_20220420183200: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00016645220823066178, 'time_algorithm_update': 0.004465355429538461, 'loss': 0.022115674939244812, 'time_step': 0.0047056605649548906, 'init_value': -1.790850281715393, 'ave_value': -1.8089936604773675, 'soft_opc': nan} step=688




2022-04-20 18:32.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183200/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:32.05 [info     ] FQE_20220420183200: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00016099838323371354, 'time_algorithm_update': 0.004226876552714858, 'loss': 0.02299106947070551, 'time_step': 0.004462027965590011, 'init_value': -2.8737082481384277, 'ave_value': -2.8968696284670012, 'soft_opc': nan} step=1032




2022-04-20 18:32.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183200/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:32.07 [info     ] FQE_20220420183200: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.0001645178295845209, 'time_algorithm_update': 0.0040271753488585, 'loss': 0.023661444285819525, 'time_step': 0.004267212956450706, 'init_value': -3.622471332550049, 'ave_value': -3.6511635034470946, 'soft_opc': nan} step=1376




2022-04-20 18:32.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183200/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:32.08 [info     ] FQE_20220420183200: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00016461971194245095, 'time_algorithm_update': 0.004442883785380874, 'loss': 0.029074740387650944, 'time_step': 0.004682391188865484, 'init_value': -4.664044380187988, 'ave_value': -4.765116159416534, 'soft_opc': nan} step=1720




2022-04-20 18:32.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183200/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:32.10 [info     ] FQE_20220420183200: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.0001682909422142561, 'time_algorithm_update': 0.004140684770983319, 'loss': 0.03436630127442524, 'time_step': 0.004383887662444004, 'init_value': -5.17146110534668, 'ave_value': -5.312390222715902, 'soft_opc': nan} step=2064




2022-04-20 18:32.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183200/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:32.12 [info     ] FQE_20220420183200: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00016606200573056242, 'time_algorithm_update': 0.004473530968954397, 'loss': 0.0407354045540181, 'time_step': 0.004712936490081077, 'init_value': -6.07387113571167, 'ave_value': -6.285984161845199, 'soft_opc': nan} step=2408




2022-04-20 18:32.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183200/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:32.13 [info     ] FQE_20220420183200: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00016867767932803133, 'time_algorithm_update': 0.004114130208658618, 'loss': 0.04763960648533823, 'time_step': 0.004357559043307637, 'init_value': -6.648872375488281, 'ave_value': -6.942384022662232, 'soft_opc': nan} step=2752




2022-04-20 18:32.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183200/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:32.15 [info     ] FQE_20220420183200: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.0001683207445366438, 'time_algorithm_update': 0.0044669508934021, 'loss': 0.05546320767174367, 'time_step': 0.004710225171821062, 'init_value': -7.119306564331055, 'ave_value': -7.4981386592796255, 'soft_opc': nan} step=3096




2022-04-20 18:32.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183200/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:32.17 [info     ] FQE_20220420183200: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00016274147255476132, 'time_algorithm_update': 0.004236340522766113, 'loss': 0.0652546270799195, 'time_step': 0.004472809475521708, 'init_value': -7.785407066345215, 'ave_value': -8.257255464872799, 'soft_opc': nan} step=3440




2022-04-20 18:32.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183200/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:32.18 [info     ] FQE_20220420183200: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00016516862913619642, 'time_algorithm_update': 0.004207458606986112, 'loss': 0.07248634391810832, 'time_step': 0.00444659105567045, 'init_value': -8.21773910522461, 'ave_value': -8.85897418139754, 'soft_opc': nan} step=3784




2022-04-20 18:32.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183200/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:32.20 [info     ] FQE_20220420183200: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.0001654978408369907, 'time_algorithm_update': 0.004473526117413543, 'loss': 0.08522182623964063, 'time_step': 0.004712729953056158, 'init_value': -8.769672393798828, 'ave_value': -9.497297965513694, 'soft_opc': nan} step=4128




2022-04-20 18:32.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183200/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:32.22 [info     ] FQE_20220420183200: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00016673844914103663, 'time_algorithm_update': 0.00415185994880144, 'loss': 0.09502656546659595, 'time_step': 0.004392403502796971, 'init_value': -9.300753593444824, 'ave_value': -10.13198495485761, 'soft_opc': nan} step=4472




2022-04-20 18:32.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183200/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:32.24 [info     ] FQE_20220420183200: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00016431891640951467, 'time_algorithm_update': 0.004422657018484071, 'loss': 0.11280671310567753, 'time_step': 0.004661622435547585, 'init_value': -10.081093788146973, 'ave_value': -11.09226593603422, 'soft_opc': nan} step=4816




2022-04-20 18:32.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183200/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:32.25 [info     ] FQE_20220420183200: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00016649795133014057, 'time_algorithm_update': 0.004075269366419593, 'loss': 0.12920213937705252, 'time_step': 0.0043146942937096885, 'init_value': -10.44839859008789, 'ave_value': -11.668628314271704, 'soft_opc': nan} step=5160




2022-04-20 18:32.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183200/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:32.27 [info     ] FQE_20220420183200: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.0001668729061304137, 'time_algorithm_update': 0.0044314514758975005, 'loss': 0.14450039378053314, 'time_step': 0.004676423793615297, 'init_value': -11.246889114379883, 'ave_value': -12.642558177043727, 'soft_opc': nan} step=5504




2022-04-20 18:32.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183200/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:32.29 [info     ] FQE_20220420183200: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00016216344611589298, 'time_algorithm_update': 0.004146128199821295, 'loss': 0.1628109762271823, 'time_step': 0.0043793126594188604, 'init_value': -11.71629524230957, 'ave_value': -13.222611147218997, 'soft_opc': nan} step=5848




2022-04-20 18:32.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183200/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:32.30 [info     ] FQE_20220420183200: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00016432931256848713, 'time_algorithm_update': 0.00416462920432867, 'loss': 0.17912928137365122, 'time_step': 0.004400041907332664, 'init_value': -12.16024398803711, 'ave_value': -13.76904917971508, 'soft_opc': nan} step=6192




2022-04-20 18:32.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183200/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:32.32 [info     ] FQE_20220420183200: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00016852243002070936, 'time_algorithm_update': 0.004511310610660287, 'loss': 0.2006592731435545, 'time_step': 0.004753840524096822, 'init_value': -12.62116813659668, 'ave_value': -14.424684497901985, 'soft_opc': nan} step=6536




2022-04-20 18:32.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183200/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:32.34 [info     ] FQE_20220420183200: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00016216413919315783, 'time_algorithm_update': 0.004026093455248101, 'loss': 0.2207529320717291, 'time_step': 0.004259995249814765, 'init_value': -13.375852584838867, 'ave_value': -15.38051186671128, 'soft_opc': nan} step=6880




2022-04-20 18:32.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183200/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:32.35 [info     ] FQE_20220420183200: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.0001657875471336897, 'time_algorithm_update': 0.004478160032006197, 'loss': 0.2404794994535921, 'time_step': 0.004716545343399048, 'init_value': -13.855121612548828, 'ave_value': -15.986813648273278, 'soft_opc': nan} step=7224




2022-04-20 18:32.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183200/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:32.37 [info     ] FQE_20220420183200: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00016603220340817473, 'time_algorithm_update': 0.004201142593871715, 'loss': 0.2660481184883433, 'time_step': 0.00444212417269862, 'init_value': -14.396844863891602, 'ave_value': -16.618551172759084, 'soft_opc': nan} step=7568




2022-04-20 18:32.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183200/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:32.39 [info     ] FQE_20220420183200: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00016407703244408897, 'time_algorithm_update': 0.004513007263804591, 'loss': 0.2831909321681705, 'time_step': 0.004752843185912731, 'init_value': -14.468095779418945, 'ave_value': -16.90655877681466, 'soft_opc': nan} step=7912




2022-04-20 18:32.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183200/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:32.40 [info     ] FQE_20220420183200: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00016693666923877804, 'time_algorithm_update': 0.004210418046906937, 'loss': 0.30954086491404925, 'time_step': 0.004453269548194353, 'init_value': -14.856147766113281, 'ave_value': -17.43030237480327, 'soft_opc': nan} step=8256




2022-04-20 18:32.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183200/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:32.42 [info     ] FQE_20220420183200: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00016430020332336426, 'time_algorithm_update': 0.00412405576816825, 'loss': 0.32773651704083867, 'time_step': 0.004362655240435933, 'init_value': -15.15880012512207, 'ave_value': -17.8886311680347, 'soft_opc': nan} step=8600




2022-04-20 18:32.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183200/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:32.44 [info     ] FQE_20220420183200: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.0001654978408369907, 'time_algorithm_update': 0.0043913950753766435, 'loss': 0.3512668644381297, 'time_step': 0.0046311298082041185, 'init_value': -15.576611518859863, 'ave_value': -18.55037264050664, 'soft_opc': nan} step=8944




2022-04-20 18:32.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183200/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:32.45 [info     ] FQE_20220420183200: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00016797628513602324, 'time_algorithm_update': 0.004047908755235894, 'loss': 0.38015060045020005, 'time_step': 0.004292209481084069, 'init_value': -15.814881324768066, 'ave_value': -19.03870254964442, 'soft_opc': nan} step=9288




2022-04-20 18:32.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183200/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:32.47 [info     ] FQE_20220420183200: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00016893411791601845, 'time_algorithm_update': 0.004497096289035885, 'loss': 0.4064426558785314, 'time_step': 0.004740025414976963, 'init_value': -16.441314697265625, 'ave_value': -19.80057674155042, 'soft_opc': nan} step=9632




2022-04-20 18:32.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183200/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:32.49 [info     ] FQE_20220420183200: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.0001626624617465707, 'time_algorithm_update': 0.0040277644645336065, 'loss': 0.4352278740383511, 'time_step': 0.004264608372089474, 'init_value': -16.737281799316406, 'ave_value': -20.271287336730744, 'soft_opc': nan} step=9976




2022-04-20 18:32.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183200/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:32.50 [info     ] FQE_20220420183200: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00016603774802629337, 'time_algorithm_update': 0.004458104455193808, 'loss': 0.4595813569125481, 'time_step': 0.004699173361756081, 'init_value': -17.368242263793945, 'ave_value': -21.036846526973957, 'soft_opc': nan} step=10320




2022-04-20 18:32.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183200/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:32.52 [info     ] FQE_20220420183200: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00016261394633803258, 'time_algorithm_update': 0.004135513721510421, 'loss': 0.4966774475165145, 'time_step': 0.0043752235035563625, 'init_value': -17.486919403076172, 'ave_value': -21.325523542391288, 'soft_opc': nan} step=10664




2022-04-20 18:32.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183200/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:32.54 [info     ] FQE_20220420183200: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00016616804655208143, 'time_algorithm_update': 0.00419863642648209, 'loss': 0.5052123133764538, 'time_step': 0.004440219596374867, 'init_value': -17.605710983276367, 'ave_value': -21.714558075704016, 'soft_opc': nan} step=11008




2022-04-20 18:32.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183200/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:32.55 [info     ] FQE_20220420183200: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00016682369764461073, 'time_algorithm_update': 0.004465900188268617, 'loss': 0.5227605644992531, 'time_step': 0.004707109789515651, 'init_value': -17.578388214111328, 'ave_value': -21.898592613516627, 'soft_opc': nan} step=11352




2022-04-20 18:32.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183200/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:32.57 [info     ] FQE_20220420183200: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00016399247701777967, 'time_algorithm_update': 0.004069084344908248, 'loss': 0.5379277795260815, 'time_step': 0.004308585510697476, 'init_value': -17.933799743652344, 'ave_value': -22.37472061799453, 'soft_opc': nan} step=11696




2022-04-20 18:32.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183200/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:32.59 [info     ] FQE_20220420183200: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.0001675985580266908, 'time_algorithm_update': 0.004467345947443053, 'loss': 0.5501478135065977, 'time_step': 0.004710593888925952, 'init_value': -18.04131317138672, 'ave_value': -22.658410825801862, 'soft_opc': nan} step=12040




2022-04-20 18:32.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183200/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:33.00 [info     ] FQE_20220420183200: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00016710508701413176, 'time_algorithm_update': 0.00410359266192414, 'loss': 0.572402025908665, 'time_step': 0.004345849502918332, 'init_value': -18.09162139892578, 'ave_value': -22.795233545539617, 'soft_opc': nan} step=12384




2022-04-20 18:33.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183200/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:33.02 [info     ] FQE_20220420183200: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00016426832176918207, 'time_algorithm_update': 0.00450093524400578, 'loss': 0.5853859283474036, 'time_step': 0.0047388159951498345, 'init_value': -18.368194580078125, 'ave_value': -23.04323116483855, 'soft_opc': nan} step=12728




2022-04-20 18:33.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183200/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:33.04 [info     ] FQE_20220420183200: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00016738023868826933, 'time_algorithm_update': 0.004244445368301037, 'loss': 0.5932946228259784, 'time_step': 0.004488216583118882, 'init_value': -18.387622833251953, 'ave_value': -23.229783047198712, 'soft_opc': nan} step=13072




2022-04-20 18:33.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183200/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:33.05 [info     ] FQE_20220420183200: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00016384485156037087, 'time_algorithm_update': 0.004090752712515897, 'loss': 0.6096038750405315, 'time_step': 0.004330696754677351, 'init_value': -18.88722801208496, 'ave_value': -23.936534957030602, 'soft_opc': nan} step=13416




2022-04-20 18:33.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183200/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:33.07 [info     ] FQE_20220420183200: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00016441317491753158, 'time_algorithm_update': 0.004491959200348965, 'loss': 0.6386538654305908, 'time_step': 0.004732340574264526, 'init_value': -18.544260025024414, 'ave_value': -23.653769438009004, 'soft_opc': nan} step=13760




2022-04-20 18:33.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183200/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:33.09 [info     ] FQE_20220420183200: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.0001646120880925378, 'time_algorithm_update': 0.004114081000172814, 'loss': 0.6505997665009873, 'time_step': 0.004354494255642558, 'init_value': -19.3913516998291, 'ave_value': -24.462788279864704, 'soft_opc': nan} step=14104




2022-04-20 18:33.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183200/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:33.11 [info     ] FQE_20220420183200: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00016527051149412643, 'time_algorithm_update': 0.004534166912699855, 'loss': 0.6615401094259564, 'time_step': 0.004777301189511321, 'init_value': -19.392349243164062, 'ave_value': -24.49462479514737, 'soft_opc': nan} step=14448




2022-04-20 18:33.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183200/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:33.12 [info     ] FQE_20220420183200: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00016666082448737565, 'time_algorithm_update': 0.004010985063952069, 'loss': 0.6826065909496487, 'time_step': 0.004251618024914763, 'init_value': -19.88778305053711, 'ave_value': -24.847872786798625, 'soft_opc': nan} step=14792




2022-04-20 18:33.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183200/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:33.14 [info     ] FQE_20220420183200: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00016532110613445903, 'time_algorithm_update': 0.004421446212502413, 'loss': 0.7075919531080005, 'time_step': 0.004660760940507401, 'init_value': -20.197986602783203, 'ave_value': -25.131954379207958, 'soft_opc': nan} step=15136




2022-04-20 18:33.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183200/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:33.16 [info     ] FQE_20220420183200: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.0001631580119909242, 'time_algorithm_update': 0.00426489322684532, 'loss': 0.7187223145250924, 'time_step': 0.0045012270295342735, 'init_value': -20.111583709716797, 'ave_value': -25.04578792075983, 'soft_opc': nan} step=15480




2022-04-20 18:33.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183200/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:33.17 [info     ] FQE_20220420183200: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.0001626104809517084, 'time_algorithm_update': 0.0040743877721387285, 'loss': 0.7265141748628298, 'time_step': 0.0043099862198496975, 'init_value': -20.694671630859375, 'ave_value': -25.323949291384864, 'soft_opc': nan} step=15824




2022-04-20 18:33.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183200/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:33.19 [info     ] FQE_20220420183200: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00016399178394051485, 'time_algorithm_update': 0.0044902888841407244, 'loss': 0.749418509934574, 'time_step': 0.004731587199277656, 'init_value': -21.277196884155273, 'ave_value': -25.89020491433246, 'soft_opc': nan} step=16168




2022-04-20 18:33.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183200/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:33.20 [info     ] FQE_20220420183200: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00016583051792410918, 'time_algorithm_update': 0.004125861927520397, 'loss': 0.7464619905763674, 'time_step': 0.004366274489912876, 'init_value': -20.499340057373047, 'ave_value': -25.22260519139435, 'soft_opc': nan} step=16512




2022-04-20 18:33.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183200/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:33.22 [info     ] FQE_20220420183200: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00016294177188429723, 'time_algorithm_update': 0.004543576822724453, 'loss': 0.7517748775697032, 'time_step': 0.004781504703122516, 'init_value': -20.679302215576172, 'ave_value': -25.47551766055896, 'soft_opc': nan} step=16856




2022-04-20 18:33.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183200/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:33.24 [info     ] FQE_20220420183200: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.0001634844513826592, 'time_algorithm_update': 0.004124636566916177, 'loss': 0.7319235760978487, 'time_step': 0.004363511190857998, 'init_value': -20.768329620361328, 'ave_value': -25.589413846671782, 'soft_opc': nan} step=17200




2022-04-20 18:33.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420183200/model_17200.pt
search iteration:  20
using hyper params:  [0.0010389651991182848, 0.008919277602270372, 7.784129445676263e-05, 1]
2022-04-20 18:33.24 [debug    ] RoundIterator is selected.
2022-04-20 18:33.24 [info     ] Directory is created at d3rlpy_logs/CQL_20220420183324
2022-04-20 18:33.24 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 18:33.24 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 18:33.24 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220420183324/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0010389651991182848, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, '

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:33.33 [info     ] CQL_20220420183324: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.00030894865069473, 'time_algorithm_update': 0.02592199727108604, 'temp_loss': 4.466904896044592, 'temp': 0.9871672548745808, 'alpha_loss': -10.5105333418874, 'alpha': 1.0145952718996862, 'critic_loss': 16.514593774115134, 'actor_loss': -0.39541956667968053, 'time_step': 0.026331115187260144, 'td_error': 8.532180797929692, 'init_value': -2.213340997695923, 'ave_value': -0.24931272838902366} step=342
2022-04-20 18:33.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420183324/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:33.43 [info     ] CQL_20220420183324: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.0003103045692220766, 'time_algorithm_update': 0.02578571595643696, 'temp_loss': 3.0123575002129317, 'temp': 0.9651147170722136, 'alpha_loss': -0.08925991262580209, 'alpha': 1.0273375403114229, 'critic_loss': 30.06892273997703, 'actor_loss': 2.4885466454321876, 'time_step': 0.026197458568372224, 'td_error': 9.778261455960791, 'init_value': -5.2224321365356445, 'ave_value': -1.2909078085637307} step=684
2022-04-20 18:33.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420183324/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:33.52 [info     ] CQL_20220420183324: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.00031361314985487197, 'time_algorithm_update': 0.02568071697190491, 'temp_loss': 2.049139380106452, 'temp': 0.9479177365874687, 'alpha_loss': 5.166001761865894, 'alpha': 1.0139288063983471, 'critic_loss': 64.62741193715591, 'actor_loss': 4.494308823730513, 'time_step': 0.026091973683987444, 'td_error': 12.601265653717475, 'init_value': -7.730493068695068, 'ave_value': -1.8320296841577903} step=1026
2022-04-20 18:33.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420183324/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:34.01 [info     ] CQL_20220420183324: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.00031110417773151953, 'time_algorithm_update': 0.025844442914103902, 'temp_loss': 1.4451118976052044, 'temp': 0.9336591898000728, 'alpha_loss': 8.724755376403095, 'alpha': 0.9785262567606586, 'critic_loss': 115.84723815025642, 'actor_loss': 7.150968403844108, 'time_step': 0.026253305680570545, 'td_error': 23.9849441750882, 'init_value': -13.175863265991211, 'ave_value': -4.4130009672528985} step=1368
2022-04-20 18:34.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420183324/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:34.11 [info     ] CQL_20220420183324: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.00031344444431059544, 'time_algorithm_update': 0.025480908260010836, 'temp_loss': 0.9820388789936813, 'temp': 0.9220055357406014, 'alpha_loss': 11.472177838721471, 'alpha': 0.9357913708129124, 'critic_loss': 180.52369814867166, 'actor_loss': 10.227706541094864, 'time_step': 0.025897919783118176, 'td_error': 40.66873595621843, 'init_value': -20.415193557739258, 'ave_value': -7.345540317396323} step=1710
2022-04-20 18:34.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420183324/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:34.19 [info     ] CQL_20220420183324: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.0003066774000201309, 'time_algorithm_update': 0.02410670330649928, 'temp_loss': 0.6344378885339227, 'temp': 0.9128761641811907, 'alpha_loss': 13.037495238042018, 'alpha': 0.8938994557536833, 'critic_loss': 264.47428233721104, 'actor_loss': 14.062253003929094, 'time_step': 0.02451418854339778, 'td_error': 49.58549573955333, 'init_value': -25.33762550354004, 'ave_value': -9.97867626099436} step=2052
2022-04-20 18:34.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420183324/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:34.28 [info     ] CQL_20220420183324: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.0003111662223325138, 'time_algorithm_update': 0.024293005815026355, 'temp_loss': 0.30929262364250526, 'temp': 0.9064381486839719, 'alpha_loss': 15.22785520832441, 'alpha': 0.8549954967889172, 'critic_loss': 359.13108816760325, 'actor_loss': 18.490540518398173, 'time_step': 0.024704101489998444, 'td_error': 106.6760680037877, 'init_value': -36.79033660888672, 'ave_value': -14.01313855818263} step=2394
2022-04-20 18:34.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420183324/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:34.37 [info     ] CQL_20220420183324: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.0003076694165056909, 'time_algorithm_update': 0.024214201503329806, 'temp_loss': 0.07320336360288293, 'temp': 0.9034069598766795, 'alpha_loss': 17.375827239967926, 'alpha': 0.8175727247494703, 'critic_loss': 485.39686655858804, 'actor_loss': 24.17207353714614, 'time_step': 0.02462265366002133, 'td_error': 207.19210720503762, 'init_value': -46.975013732910156, 'ave_value': -19.55377243571453} step=2736
2022-04-20 18:34.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420183324/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:34.46 [info     ] CQL_20220420183324: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.0003128233011703045, 'time_algorithm_update': 0.02417769836403473, 'temp_loss': -0.04795946572956286, 'temp': 0.9031650037096258, 'alpha_loss': 19.267490718796935, 'alpha': 0.7823536298428363, 'critic_loss': 639.8021096569753, 'actor_loss': 30.63653752399467, 'time_step': 0.024592958695707264, 'td_error': 223.08735890404654, 'init_value': -58.21269607543945, 'ave_value': -25.387939446627556} step=3078
2022-04-20 18:34.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420183324/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:34.55 [info     ] CQL_20220420183324: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.0003052420086330838, 'time_algorithm_update': 0.02410778664706046, 'temp_loss': -0.2170696337435024, 'temp': 0.906456022234688, 'alpha_loss': 23.8463332109284, 'alpha': 0.7485966696376689, 'critic_loss': 808.8278647974918, 'actor_loss': 38.227779957286096, 'time_step': 0.024512584446466458, 'td_error': 1098.443621033886, 'init_value': -78.42047119140625, 'ave_value': -33.927024399171} step=3420
2022-04-20 18:34.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420183324/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:35.03 [info     ] CQL_20220420183324: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.000316628238611054, 'time_algorithm_update': 0.02402722277836493, 'temp_loss': -0.2371216749946712, 'temp': 0.9129852467810201, 'alpha_loss': 24.55846813687107, 'alpha': 0.7156260623569377, 'critic_loss': 1014.7144479138112, 'actor_loss': 46.54508943167346, 'time_step': 0.02444434235667625, 'td_error': 1019.8339116475305, 'init_value': -98.04225158691406, 'ave_value': -41.461043665065425} step=3762
2022-04-20 18:35.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420183324/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:35.12 [info     ] CQL_20220420183324: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.00030888939461512873, 'time_algorithm_update': 0.024414321135359202, 'temp_loss': -0.27221874275642355, 'temp': 0.9206011797252455, 'alpha_loss': 27.888411850957144, 'alpha': 0.6873195197498589, 'critic_loss': 1218.7501495539793, 'actor_loss': 54.815110714114894, 'time_step': 0.024824442222104434, 'td_error': 2277.835139438944, 'init_value': -114.2279281616211, 'ave_value': -50.53899375505812} step=4104
2022-04-20 18:35.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420183324/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:35.21 [info     ] CQL_20220420183324: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.0003115203645494249, 'time_algorithm_update': 0.024383662039773504, 'temp_loss': -0.3012901971672188, 'temp': 0.9300685298373128, 'alpha_loss': 30.29354419206318, 'alpha': 0.6581890595238111, 'critic_loss': 1419.641448796144, 'actor_loss': 62.0854628267344, 'time_step': 0.024795729514451054, 'td_error': 1982.1767962428273, 'init_value': -126.96852111816406, 'ave_value': -54.90468869704384} step=4446
2022-04-20 18:35.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420183324/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:35.30 [info     ] CQL_20220420183324: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.0003159506279125548, 'time_algorithm_update': 0.0245399977031507, 'temp_loss': -0.2965660664193027, 'temp': 0.9424496176298599, 'alpha_loss': 29.252521627827694, 'alpha': 0.632348192365546, 'critic_loss': 1605.2660090574743, 'actor_loss': 69.18445055108322, 'time_step': 0.02495861402031971, 'td_error': 3340.4650959300393, 'init_value': -143.57476806640625, 'ave_value': -60.32685356597106} step=4788
2022-04-20 18:35.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420183324/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:35.39 [info     ] CQL_20220420183324: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.0003152081840916684, 'time_algorithm_update': 0.024404179283052857, 'temp_loss': -0.37229185009495036, 'temp': 0.956660983506699, 'alpha_loss': 36.34954873581379, 'alpha': 0.6086276163134658, 'critic_loss': 1777.296498438071, 'actor_loss': 75.65974378306963, 'time_step': 0.02482156725654825, 'td_error': 4009.93349602265, 'init_value': -163.85049438476562, 'ave_value': -67.91350043084171} step=5130
2022-04-20 18:35.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420183324/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:35.47 [info     ] CQL_20220420183324: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.0003006946273714478, 'time_algorithm_update': 0.023211430387887342, 'temp_loss': -0.35015590183916145, 'temp': 0.971313135142912, 'alpha_loss': 38.06725436483907, 'alpha': 0.5845434702279275, 'critic_loss': 1954.5970673142817, 'actor_loss': 82.16739377919693, 'time_step': 0.023607408094127275, 'td_error': 4012.5448661573364, 'init_value': -183.6077880859375, 'ave_value': -73.5014479383531} step=5472
2022-04-20 18:35.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420183324/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:35.55 [info     ] CQL_20220420183324: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.0002799661535965769, 'time_algorithm_update': 0.02141620471463566, 'temp_loss': -0.3399816666571805, 'temp': 0.9891516456478521, 'alpha_loss': 40.674519552822, 'alpha': 0.5617178415345867, 'critic_loss': 2129.104465774625, 'actor_loss': 88.92256135550159, 'time_step': 0.02178481308340329, 'td_error': 10744.315168033922, 'init_value': -209.99868774414062, 'ave_value': -80.76022978349312} step=5814
2022-04-20 18:35.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420183324/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:36.04 [info     ] CQL_20220420183324: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.0003103282716539171, 'time_algorithm_update': 0.023923552524276644, 'temp_loss': -0.3293866464726583, 'temp': 1.0059106679339158, 'alpha_loss': 29.9231984364359, 'alpha': 0.5424883822251482, 'critic_loss': 2292.1210509183115, 'actor_loss': 93.42507026627747, 'time_step': 0.024333767026488543, 'td_error': 2735.3149976076234, 'init_value': -215.1345672607422, 'ave_value': -81.58353338812923} step=6156
2022-04-20 18:36.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420183324/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:36.13 [info     ] CQL_20220420183324: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.00030251762323212203, 'time_algorithm_update': 0.023966349356355724, 'temp_loss': -0.2956006067821331, 'temp': 1.0229755960709868, 'alpha_loss': 26.6101580063502, 'alpha': 0.5283865360488669, 'critic_loss': 2410.8813608626874, 'actor_loss': 97.31577352334185, 'time_step': 0.02436829937828912, 'td_error': 3291.738567848746, 'init_value': -228.80911254882812, 'ave_value': -85.47714011641773} step=6498
2022-04-20 18:36.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420183324/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:36.21 [info     ] CQL_20220420183324: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.0003069304583365457, 'time_algorithm_update': 0.024257780515659622, 'temp_loss': -0.29250546729965515, 'temp': 1.0384443600972493, 'alpha_loss': 27.296799185331803, 'alpha': 0.5143080071399087, 'critic_loss': 2513.411836322985, 'actor_loss': 100.99268626609044, 'time_step': 0.024662882961027803, 'td_error': 3952.681442436787, 'init_value': -244.8936767578125, 'ave_value': -89.79877692254277} step=6840
2022-04-20 18:36.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420183324/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:36.30 [info     ] CQL_20220420183324: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.0003111306686847531, 'time_algorithm_update': 0.024171653546785053, 'temp_loss': -0.29744091547197765, 'temp': 1.056144154211234, 'alpha_loss': 29.074064769242938, 'alpha': 0.49920526600023457, 'critic_loss': 2613.51805676912, 'actor_loss': 104.81772519831071, 'time_step': 0.024587380955790914, 'td_error': 5177.241566973192, 'init_value': -261.8665771484375, 'ave_value': -95.620871774173} step=7182
2022-04-20 18:36.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420183324/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:36.39 [info     ] CQL_20220420183324: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.0003011373051425867, 'time_algorithm_update': 0.02459006002771924, 'temp_loss': -0.26570098226269084, 'temp': 1.0733472297066136, 'alpha_loss': 25.520058503450706, 'alpha': 0.48449032444354384, 'critic_loss': 2714.52058990657, 'actor_loss': 107.96000682540804, 'time_step': 0.024992546840020786, 'td_error': 5455.1455948845105, 'init_value': -269.38739013671875, 'ave_value': -95.56848919838936} step=7524
2022-04-20 18:36.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420183324/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:36.49 [info     ] CQL_20220420183324: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.0003064341015285916, 'time_algorithm_update': 0.025836614140293056, 'temp_loss': -0.20128214347846152, 'temp': 1.0877274863901194, 'alpha_loss': 24.694856148714212, 'alpha': 0.47134938043111946, 'critic_loss': 2796.592929773163, 'actor_loss': 110.91549013372054, 'time_step': 0.026242035871360734, 'td_error': 5216.722449766122, 'init_value': -284.9485778808594, 'ave_value': -99.55860518121504} step=7866
2022-04-20 18:36.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420183324/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:36.58 [info     ] CQL_20220420183324: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.0003140014514588473, 'time_algorithm_update': 0.02559706830141837, 'temp_loss': -0.18428033122476953, 'temp': 1.1007107025698613, 'alpha_loss': 22.938901016586705, 'alpha': 0.45897043082449174, 'critic_loss': 2872.7829525596217, 'actor_loss': 113.47076670328777, 'time_step': 0.026013269061930695, 'td_error': 6657.704700102084, 'init_value': -287.1576232910156, 'ave_value': -98.59159582967156} step=8208
2022-04-20 18:36.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420183324/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:37.08 [info     ] CQL_20220420183324: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.000312643441540456, 'time_algorithm_update': 0.0262800371437742, 'temp_loss': -0.17259325126284047, 'temp': 1.1136125458611383, 'alpha_loss': 24.42961012759404, 'alpha': 0.44616470847562045, 'critic_loss': 2947.8505352533352, 'actor_loss': 116.27968474717169, 'time_step': 0.026694871528803954, 'td_error': 5090.20571264412, 'init_value': -291.47119140625, 'ave_value': -99.68847318181047} step=8550
2022-04-20 18:37.08 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420183324/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:37.17 [info     ] CQL_20220420183324: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.00031291532237627355, 'time_algorithm_update': 0.025968783083017807, 'temp_loss': -0.1686587860884025, 'temp': 1.123496108933499, 'alpha_loss': 22.788330811854692, 'alpha': 0.43373421740810775, 'critic_loss': 3019.918317828262, 'actor_loss': 118.6813865126225, 'time_step': 0.026384535588716205, 'td_error': 12298.94724304547, 'init_value': -309.30694580078125, 'ave_value': -102.37957612827017} step=8892
2022-04-20 18:37.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420183324/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:37.26 [info     ] CQL_20220420183324: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.000309021152250948, 'time_algorithm_update': 0.025656874416864407, 'temp_loss': -0.18501307568049918, 'temp': 1.1375414129586248, 'alpha_loss': 28.177278037308252, 'alpha': 0.420940994432098, 'critic_loss': 3087.7127806903327, 'actor_loss': 121.48278237504569, 'time_step': 0.02606934692427429, 'td_error': 9389.849715677232, 'init_value': -310.1742248535156, 'ave_value': -104.255386440421} step=9234
2022-04-20 18:37.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420183324/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:37.36 [info     ] CQL_20220420183324: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.00031797370018317683, 'time_algorithm_update': 0.026226283514011673, 'temp_loss': -0.09190155943714039, 'temp': 1.1489889973785445, 'alpha_loss': 18.73466559355719, 'alpha': 0.40809008357120535, 'critic_loss': 3165.963148328993, 'actor_loss': 123.71721680401362, 'time_step': 0.026645942738181667, 'td_error': 2461.8741518124657, 'init_value': -328.37481689453125, 'ave_value': -108.715291791961} step=9576
2022-04-20 18:37.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420183324/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:37.45 [info     ] CQL_20220420183324: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.0003143541994150619, 'time_algorithm_update': 0.02590566699267828, 'temp_loss': -0.030213291108695386, 'temp': 1.1518980693398861, 'alpha_loss': 14.07444511850675, 'alpha': 0.400470028233807, 'critic_loss': 3229.179726762381, 'actor_loss': 125.53006092986168, 'time_step': 0.026321839867976673, 'td_error': 1130.7893428609445, 'init_value': -332.21026611328125, 'ave_value': -109.78822859579378} step=9918
2022-04-20 18:37.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420183324/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:37.55 [info     ] CQL_20220420183324: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.00030928606178328306, 'time_algorithm_update': 0.025952828557867753, 'temp_loss': -0.030944163774886325, 'temp': 1.1555076980451395, 'alpha_loss': 13.462496960372256, 'alpha': 0.39349927888279074, 'critic_loss': 3278.3859570598042, 'actor_loss': 127.21271338658026, 'time_step': 0.02636249511562593, 'td_error': 3185.3395288187044, 'init_value': -334.5638122558594, 'ave_value': -109.31044854762318} step=10260
2022-04-20 18:37.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420183324/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:38.04 [info     ] CQL_20220420183324: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.00031241269139518517, 'time_algorithm_update': 0.025789045450980205, 'temp_loss': -0.04919828420058329, 'temp': 1.1582310799269648, 'alpha_loss': 16.50093367321101, 'alpha': 0.38434250234511863, 'critic_loss': 3327.9474669339365, 'actor_loss': 129.0377474779274, 'time_step': 0.02620500221587064, 'td_error': 3305.573841711499, 'init_value': -348.3600769042969, 'ave_value': -113.56208054706858} step=10602
2022-04-20 18:38.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420183324/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:38.14 [info     ] CQL_20220420183324: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.00031462468599018295, 'time_algorithm_update': 0.026058053412632634, 'temp_loss': -0.025086371968799863, 'temp': 1.1607098032159415, 'alpha_loss': 16.250563307115208, 'alpha': 0.3740098889459643, 'critic_loss': 3379.78019134343, 'actor_loss': 130.99960507844625, 'time_step': 0.026473745965121084, 'td_error': 2595.4813124884936, 'init_value': -351.693115234375, 'ave_value': -113.58802590640815} step=10944
2022-04-20 18:38.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420183324/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:38.23 [info     ] CQL_20220420183324: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.00031664566686976026, 'time_algorithm_update': 0.0260298210277892, 'temp_loss': -0.04433565660875443, 'temp': 1.1629188280356557, 'alpha_loss': 18.185168950006975, 'alpha': 0.36371874051135883, 'critic_loss': 3428.7044884754205, 'actor_loss': 132.50863705462183, 'time_step': 0.02645002192223978, 'td_error': 3255.541840854008, 'init_value': -353.43743896484375, 'ave_value': -114.8194576786982} step=11286
2022-04-20 18:38.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420183324/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:38.33 [info     ] CQL_20220420183324: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.0003142001336080986, 'time_algorithm_update': 0.025972972139280442, 'temp_loss': 0.023889406153943107, 'temp': 1.1651069166367514, 'alpha_loss': 14.93125362197558, 'alpha': 0.35332422899572474, 'critic_loss': 3475.9463947139984, 'actor_loss': 133.93049918280707, 'time_step': 0.0263915110749808, 'td_error': 3790.202235395145, 'init_value': -358.02166748046875, 'ave_value': -114.84770129563572} step=11628
2022-04-20 18:38.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420183324/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:38.42 [info     ] CQL_20220420183324: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.0003220470328080027, 'time_algorithm_update': 0.025992772035431443, 'temp_loss': 0.033593085392961027, 'temp': 1.1623548318070975, 'alpha_loss': 10.945391255512572, 'alpha': 0.34518040533651384, 'critic_loss': 3518.1451737253287, 'actor_loss': 135.2285599959524, 'time_step': 0.026420506120425218, 'td_error': 1141.762241019498, 'init_value': -366.45330810546875, 'ave_value': -118.99124628366651} step=11970
2022-04-20 18:38.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420183324/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:38.51 [info     ] CQL_20220420183324: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.00031764883744089226, 'time_algorithm_update': 0.025940999650118642, 'temp_loss': -0.0029912788301096325, 'temp': 1.159765600809577, 'alpha_loss': 9.361331500505147, 'alpha': 0.3387776234170847, 'critic_loss': 3557.0421228241503, 'actor_loss': 136.32772822686803, 'time_step': 0.0263637415846886, 'td_error': 563.9198102942257, 'init_value': -370.4807434082031, 'ave_value': -119.6457773138811} step=12312
2022-04-20 18:38.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420183324/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:39.01 [info     ] CQL_20220420183324: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.00031659407922398974, 'time_algorithm_update': 0.026154626182645385, 'temp_loss': 0.063933299364228, 'temp': 1.157500027564534, 'alpha_loss': 9.7131296821505, 'alpha': 0.33184920547649877, 'critic_loss': 3594.882419162326, 'actor_loss': 137.53878623560854, 'time_step': 0.02657575495758949, 'td_error': 926.1216763973454, 'init_value': -373.26416015625, 'ave_value': -120.63275985120653} step=12654
2022-04-20 18:39.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420183324/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:39.11 [info     ] CQL_20220420183324: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.00031615767562598513, 'time_algorithm_update': 0.02608268093644527, 'temp_loss': 0.0626480859644406, 'temp': 1.1526341584690831, 'alpha_loss': 9.395478611801103, 'alpha': 0.32413723794688953, 'critic_loss': 3629.17894294248, 'actor_loss': 138.75564907587062, 'time_step': 0.02650485192126001, 'td_error': 691.0009451942344, 'init_value': -372.28582763671875, 'ave_value': -119.18532283344784} step=12996
2022-04-20 18:39.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420183324/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:39.20 [info     ] CQL_20220420183324: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.00031811940042596117, 'time_algorithm_update': 0.02607929427721347, 'temp_loss': 0.003010103380993793, 'temp': 1.1501021597817627, 'alpha_loss': 9.080508846288536, 'alpha': 0.31670741048472667, 'critic_loss': 3655.9630069844206, 'actor_loss': 139.46351607919436, 'time_step': 0.026498749242191425, 'td_error': 642.7753602446415, 'init_value': -373.38897705078125, 'ave_value': -119.39332169932288} step=13338
2022-04-20 18:39.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420183324/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:39.29 [info     ] CQL_20220420183324: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.0003093976026390031, 'time_algorithm_update': 0.026117980828759265, 'temp_loss': 0.04117993794229121, 'temp': 1.147931240804014, 'alpha_loss': 9.535497086089954, 'alpha': 0.30855406350211095, 'critic_loss': 3684.6852541917947, 'actor_loss': 140.39232581958436, 'time_step': 0.026529871929458708, 'td_error': 2127.7032248750693, 'init_value': -377.2516174316406, 'ave_value': -121.13681873796223} step=13680
2022-04-20 18:39.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420183324/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:39.39 [info     ] CQL_20220420183324: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.00031391431016531606, 'time_algorithm_update': 0.025603313892208344, 'temp_loss': 0.06500314939658196, 'temp': 1.144311399139159, 'alpha_loss': 8.926396952735054, 'alpha': 0.3005044349627188, 'critic_loss': 3711.3523948624816, 'actor_loss': 141.31395984671965, 'time_step': 0.02601909149459928, 'td_error': 1068.0622869019548, 'init_value': -384.26116943359375, 'ave_value': -125.03767246549194} step=14022
2022-04-20 18:39.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420183324/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:39.48 [info     ] CQL_20220420183324: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.0003072344071683828, 'time_algorithm_update': 0.02422190130802623, 'temp_loss': 0.08812617338997752, 'temp': 1.1379744054978354, 'alpha_loss': 8.4427941431079, 'alpha': 0.29250978369113295, 'critic_loss': 3735.962318821957, 'actor_loss': 141.94618633337188, 'time_step': 0.02462988499312373, 'td_error': 1671.0285899228998, 'init_value': -380.6434631347656, 'ave_value': -124.04133012191669} step=14364
2022-04-20 18:39.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420183324/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:39.56 [info     ] CQL_20220420183324: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.0003131997515583596, 'time_algorithm_update': 0.024088984344437805, 'temp_loss': 0.05953768994768112, 'temp': 1.1315660619596293, 'alpha_loss': 7.888753041189316, 'alpha': 0.28495944060428796, 'critic_loss': 3757.9190838016266, 'actor_loss': 142.59538753130283, 'time_step': 0.024503222683019805, 'td_error': 1712.1093980081387, 'init_value': -380.7784118652344, 'ave_value': -122.16141424050203} step=14706
2022-04-20 18:39.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420183324/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:40.05 [info     ] CQL_20220420183324: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.00031001247160615975, 'time_algorithm_update': 0.023918017309311538, 'temp_loss': 0.10385584355228478, 'temp': 1.1247777841244524, 'alpha_loss': 7.382820967693775, 'alpha': 0.2777211415140252, 'critic_loss': 3780.7118098387245, 'actor_loss': 143.38155621534202, 'time_step': 0.024326748318142362, 'td_error': 1477.3753729383548, 'init_value': -379.2746276855469, 'ave_value': -120.51666186034143} step=15048
2022-04-20 18:40.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420183324/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:40.14 [info     ] CQL_20220420183324: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.0003064870834350586, 'time_algorithm_update': 0.02404231704466524, 'temp_loss': 0.0894604560902767, 'temp': 1.1174715976966054, 'alpha_loss': 7.719336764157167, 'alpha': 0.2701762319482558, 'critic_loss': 3808.5224280998723, 'actor_loss': 144.3336079692283, 'time_step': 0.024453976000958715, 'td_error': 1294.522791923595, 'init_value': -386.9625549316406, 'ave_value': -124.83114803000613} step=15390
2022-04-20 18:40.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420183324/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:40.23 [info     ] CQL_20220420183324: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.00030883850409970645, 'time_algorithm_update': 0.024055450980426274, 'temp_loss': 0.12414961595807159, 'temp': 1.1072656184609173, 'alpha_loss': 6.804545254735221, 'alpha': 0.26300958591943596, 'critic_loss': 3833.0518948739036, 'actor_loss': 144.9192022133989, 'time_step': 0.024468176546152572, 'td_error': 1508.4598859731109, 'init_value': -391.3359680175781, 'ave_value': -125.14651573174709} step=15732
2022-04-20 18:40.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420183324/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:40.31 [info     ] CQL_20220420183324: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.0003095196004499469, 'time_algorithm_update': 0.02324746993550083, 'temp_loss': 0.10395300993423538, 'temp': 1.09835846451988, 'alpha_loss': 6.878227273361725, 'alpha': 0.25602997586741083, 'critic_loss': 3856.6823387815243, 'actor_loss': 145.77366020247254, 'time_step': 0.023655627903185393, 'td_error': 1500.0496397238778, 'init_value': -389.25115966796875, 'ave_value': -125.14215317938779} step=16074
2022-04-20 18:40.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420183324/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:40.40 [info     ] CQL_20220420183324: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.0003097782358091477, 'time_algorithm_update': 0.024154921024166352, 'temp_loss': 0.10032158729495012, 'temp': 1.0913361290044952, 'alpha_loss': 6.41216482405077, 'alpha': 0.24908097197263562, 'critic_loss': 3873.437016001919, 'actor_loss': 146.23933995118614, 'time_step': 0.024566146365383214, 'td_error': 895.338621520445, 'init_value': -390.4554748535156, 'ave_value': -125.10518629016103} step=16416
2022-04-20 18:40.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420183324/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:40.49 [info     ] CQL_20220420183324: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.00030346362911469753, 'time_algorithm_update': 0.02404481555983337, 'temp_loss': 0.061538682591540425, 'temp': 1.0828109850660401, 'alpha_loss': 5.773838317882248, 'alpha': 0.2424259418022563, 'critic_loss': 3884.810828850283, 'actor_loss': 146.51289273981462, 'time_step': 0.024449840623732896, 'td_error': 494.14700155718344, 'init_value': -390.3812561035156, 'ave_value': -125.57673116134093} step=16758
2022-04-20 18:40.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420183324/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:40.57 [info     ] CQL_20220420183324: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.0003070273594549525, 'time_algorithm_update': 0.02402464130468536, 'temp_loss': 0.06875877961385668, 'temp': 1.0775097702678882, 'alpha_loss': 5.52023405464072, 'alpha': 0.23652962516797216, 'critic_loss': 3894.8118967870523, 'actor_loss': 146.94028283280934, 'time_step': 0.02443449260198582, 'td_error': 960.13919737913, 'init_value': -388.6368103027344, 'ave_value': -123.99848808649423} step=17100
2022-04-20 18:40.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420183324/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.51910

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 18:40.58 [info     ] FQE_20220420184058: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00016291601112089962, 'time_algorithm_update': 0.0035112852073577515, 'loss': 0.007804882858532021, 'time_step': 0.003745570240250553, 'init_value': -0.10520291328430176, 'ave_value': -0.05788522946395542, 'soft_opc': nan} step=166




2022-04-20 18:40.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184058/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:40.59 [info     ] FQE_20220420184058: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00016100004494908345, 'time_algorithm_update': 0.003328838980341532, 'loss': 0.005950665818134615, 'time_step': 0.0035633911569434478, 'init_value': -0.24457888305187225, 'ave_value': -0.1641203038911503, 'soft_opc': nan} step=332




2022-04-20 18:40.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184058/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:41.00 [info     ] FQE_20220420184058: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00016073146498346902, 'time_algorithm_update': 0.0035352893622524768, 'loss': 0.005404116094942732, 'time_step': 0.0037620670824165805, 'init_value': -0.30119186639785767, 'ave_value': -0.19629217611750596, 'soft_opc': nan} step=498




2022-04-20 18:41.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184058/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:41.00 [info     ] FQE_20220420184058: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00015922339565782663, 'time_algorithm_update': 0.0033091780651046568, 'loss': 0.005215995650491621, 'time_step': 0.0035406853779252753, 'init_value': -0.3658562898635864, 'ave_value': -0.2240658748736563, 'soft_opc': nan} step=664




2022-04-20 18:41.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184058/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:41.01 [info     ] FQE_20220420184058: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00016119681208966727, 'time_algorithm_update': 0.0034495649567569593, 'loss': 0.004690516418889614, 'time_step': 0.0036812805267701664, 'init_value': -0.39767134189605713, 'ave_value': -0.22224255930008324, 'soft_opc': nan} step=830




2022-04-20 18:41.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184058/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:41.02 [info     ] FQE_20220420184058: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.0001602302114647555, 'time_algorithm_update': 0.0033894920923623695, 'loss': 0.004252388848956809, 'time_step': 0.003619622035198901, 'init_value': -0.4474526643753052, 'ave_value': -0.2482902815453771, 'soft_opc': nan} step=996




2022-04-20 18:41.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184058/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:41.02 [info     ] FQE_20220420184058: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00015696847295186607, 'time_algorithm_update': 0.003451934779982969, 'loss': 0.004190681265885885, 'time_step': 0.003673457237611334, 'init_value': -0.5222886800765991, 'ave_value': -0.2807531341794996, 'soft_opc': nan} step=1162




2022-04-20 18:41.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184058/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:41.03 [info     ] FQE_20220420184058: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.0001636571194752153, 'time_algorithm_update': 0.003328037549214191, 'loss': 0.003955639966101532, 'time_step': 0.0035559843821697927, 'init_value': -0.5831882953643799, 'ave_value': -0.30808944142557815, 'soft_opc': nan} step=1328




2022-04-20 18:41.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184058/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:41.04 [info     ] FQE_20220420184058: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00016067257846694394, 'time_algorithm_update': 0.0034907424306295006, 'loss': 0.003915144068873431, 'time_step': 0.0037237147250807428, 'init_value': -0.6520153284072876, 'ave_value': -0.34155689530013766, 'soft_opc': nan} step=1494




2022-04-20 18:41.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184058/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:41.04 [info     ] FQE_20220420184058: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00016038532716682158, 'time_algorithm_update': 0.0034143565649009614, 'loss': 0.004017572720950267, 'time_step': 0.003643643425171634, 'init_value': -0.7416729927062988, 'ave_value': -0.39559078595458386, 'soft_opc': nan} step=1660




2022-04-20 18:41.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184058/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:41.05 [info     ] FQE_20220420184058: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00015785464321274356, 'time_algorithm_update': 0.003526413297078696, 'loss': 0.003914520904668663, 'time_step': 0.003757258495652532, 'init_value': -0.8167471885681152, 'ave_value': -0.42623676275907857, 'soft_opc': nan} step=1826




2022-04-20 18:41.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184058/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:41.06 [info     ] FQE_20220420184058: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00016004062560667475, 'time_algorithm_update': 0.0034389237323439264, 'loss': 0.0038236464788935273, 'time_step': 0.003667877381106457, 'init_value': -0.8778480291366577, 'ave_value': -0.45554816114489693, 'soft_opc': nan} step=1992




2022-04-20 18:41.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184058/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:41.06 [info     ] FQE_20220420184058: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00015758893576013036, 'time_algorithm_update': 0.0034082539110298618, 'loss': 0.003969739684947284, 'time_step': 0.003637599657817059, 'init_value': -0.9268956184387207, 'ave_value': -0.47997591006307366, 'soft_opc': nan} step=2158




2022-04-20 18:41.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184058/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:41.07 [info     ] FQE_20220420184058: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00016240757631968302, 'time_algorithm_update': 0.0035544145538146236, 'loss': 0.004217317952712751, 'time_step': 0.003790868334023349, 'init_value': -1.0453613996505737, 'ave_value': -0.5573548224416922, 'soft_opc': nan} step=2324




2022-04-20 18:41.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184058/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:41.08 [info     ] FQE_20220420184058: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00015585968293339373, 'time_algorithm_update': 0.0034281805337193503, 'loss': 0.004452490866330105, 'time_step': 0.003652071378317224, 'init_value': -1.1068751811981201, 'ave_value': -0.5837283948912528, 'soft_opc': nan} step=2490




2022-04-20 18:41.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184058/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:41.08 [info     ] FQE_20220420184058: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.0001567329268857657, 'time_algorithm_update': 0.0034283370856779166, 'loss': 0.00485173829042925, 'time_step': 0.0036537790872964515, 'init_value': -1.233515977859497, 'ave_value': -0.6720739436484789, 'soft_opc': nan} step=2656




2022-04-20 18:41.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184058/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:41.09 [info     ] FQE_20220420184058: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.0001626474311552852, 'time_algorithm_update': 0.0033655726766011803, 'loss': 0.005197240969626211, 'time_step': 0.003597512302628483, 'init_value': -1.3149051666259766, 'ave_value': -0.7246117323970522, 'soft_opc': nan} step=2822




2022-04-20 18:41.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184058/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:41.10 [info     ] FQE_20220420184058: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00016209303614604905, 'time_algorithm_update': 0.0034755468368530273, 'loss': 0.005436426888399544, 'time_step': 0.0037084487547357397, 'init_value': -1.403795838356018, 'ave_value': -0.786770499116523, 'soft_opc': nan} step=2988




2022-04-20 18:41.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184058/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:41.10 [info     ] FQE_20220420184058: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00015914583780679358, 'time_algorithm_update': 0.0034443183117602244, 'loss': 0.005949140966768904, 'time_step': 0.00367048993168107, 'init_value': -1.433767557144165, 'ave_value': -0.7844010799005208, 'soft_opc': nan} step=3154




2022-04-20 18:41.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184058/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:41.11 [info     ] FQE_20220420184058: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00015699288931237646, 'time_algorithm_update': 0.003447636064276638, 'loss': 0.006200002660660679, 'time_step': 0.003677115382918392, 'init_value': -1.524758219718933, 'ave_value': -0.8406356759325324, 'soft_opc': nan} step=3320




2022-04-20 18:41.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184058/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:41.12 [info     ] FQE_20220420184058: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00015841047447848032, 'time_algorithm_update': 0.003349057163100645, 'loss': 0.006467546337054123, 'time_step': 0.003576746906142637, 'init_value': -1.5622625350952148, 'ave_value': -0.8667919085888078, 'soft_opc': nan} step=3486




2022-04-20 18:41.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184058/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:41.12 [info     ] FQE_20220420184058: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00015956378844847162, 'time_algorithm_update': 0.003467825521905738, 'loss': 0.007050786539070667, 'time_step': 0.003693718508065465, 'init_value': -1.6285080909729004, 'ave_value': -0.881557914725272, 'soft_opc': nan} step=3652




2022-04-20 18:41.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184058/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:41.13 [info     ] FQE_20220420184058: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.0001579839062977986, 'time_algorithm_update': 0.003249004662755024, 'loss': 0.007494102241275047, 'time_step': 0.003476394228188388, 'init_value': -1.6725589036941528, 'ave_value': -0.8788184619086022, 'soft_opc': nan} step=3818




2022-04-20 18:41.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184058/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:41.14 [info     ] FQE_20220420184058: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00015849808612501765, 'time_algorithm_update': 0.0033813011215393803, 'loss': 0.008011627411091116, 'time_step': 0.0036108996494706854, 'init_value': -1.780316710472107, 'ave_value': -0.9539876464495817, 'soft_opc': nan} step=3984




2022-04-20 18:41.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184058/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:41.14 [info     ] FQE_20220420184058: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00015869054449609964, 'time_algorithm_update': 0.0033209309520491636, 'loss': 0.008746516951135394, 'time_step': 0.0035466056272207974, 'init_value': -1.9262324571609497, 'ave_value': -1.0633342341263463, 'soft_opc': nan} step=4150




2022-04-20 18:41.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184058/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:41.15 [info     ] FQE_20220420184058: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00015823237867240445, 'time_algorithm_update': 0.003513492733599192, 'loss': 0.008795185985200736, 'time_step': 0.003738959151578237, 'init_value': -1.9992055892944336, 'ave_value': -1.1075384969238364, 'soft_opc': nan} step=4316




2022-04-20 18:41.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184058/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:41.16 [info     ] FQE_20220420184058: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00015598320099244635, 'time_algorithm_update': 0.003378422863512154, 'loss': 0.00961815778527925, 'time_step': 0.0036052594701927827, 'init_value': -2.0737287998199463, 'ave_value': -1.149439451397189, 'soft_opc': nan} step=4482




2022-04-20 18:41.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184058/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:41.16 [info     ] FQE_20220420184058: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00015811029686985245, 'time_algorithm_update': 0.003560821694063853, 'loss': 0.01009015955103853, 'time_step': 0.0037910406848034226, 'init_value': -2.1071317195892334, 'ave_value': -1.1320467339959708, 'soft_opc': nan} step=4648




2022-04-20 18:41.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184058/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:41.17 [info     ] FQE_20220420184058: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.0001601684524352292, 'time_algorithm_update': 0.003298061439789921, 'loss': 0.010488120901415193, 'time_step': 0.0035283737872020306, 'init_value': -2.183290481567383, 'ave_value': -1.1792762929846339, 'soft_opc': nan} step=4814




2022-04-20 18:41.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184058/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:41.18 [info     ] FQE_20220420184058: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00015926217458334314, 'time_algorithm_update': 0.003452843930347856, 'loss': 0.011063418899528143, 'time_step': 0.003683877278523273, 'init_value': -2.2802176475524902, 'ave_value': -1.2257725455899942, 'soft_opc': nan} step=4980




2022-04-20 18:41.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184058/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:41.18 [info     ] FQE_20220420184058: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00015687942504882812, 'time_algorithm_update': 0.003416304128715791, 'loss': 0.011676988418561867, 'time_step': 0.0036442394716193877, 'init_value': -2.3540332317352295, 'ave_value': -1.2323005835469056, 'soft_opc': nan} step=5146




2022-04-20 18:41.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184058/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:41.19 [info     ] FQE_20220420184058: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.000158297010214932, 'time_algorithm_update': 0.0035046008696039037, 'loss': 0.012165939744634846, 'time_step': 0.003733981086547116, 'init_value': -2.3605031967163086, 'ave_value': -1.200894529839793, 'soft_opc': nan} step=5312




2022-04-20 18:41.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184058/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:41.20 [info     ] FQE_20220420184058: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.000170299805790545, 'time_algorithm_update': 0.0034355829997235036, 'loss': 0.012893354701837638, 'time_step': 0.0036798528878085584, 'init_value': -2.4117789268493652, 'ave_value': -1.2542216095307774, 'soft_opc': nan} step=5478




2022-04-20 18:41.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184058/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:41.20 [info     ] FQE_20220420184058: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00016400182103536217, 'time_algorithm_update': 0.003495010984949319, 'loss': 0.013829015095084232, 'time_step': 0.003732302102697901, 'init_value': -2.5911645889282227, 'ave_value': -1.405367353541078, 'soft_opc': nan} step=5644




2022-04-20 18:41.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184058/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:41.21 [info     ] FQE_20220420184058: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00016092823212405285, 'time_algorithm_update': 0.0034426192203200006, 'loss': 0.01425383255623438, 'time_step': 0.003673020615635148, 'init_value': -2.7212905883789062, 'ave_value': -1.4674607306414678, 'soft_opc': nan} step=5810




2022-04-20 18:41.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184058/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:41.22 [info     ] FQE_20220420184058: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00016318602734301463, 'time_algorithm_update': 0.003462116402315806, 'loss': 0.015390110407613712, 'time_step': 0.003698068929005818, 'init_value': -2.7365713119506836, 'ave_value': -1.4273524388617957, 'soft_opc': nan} step=5976




2022-04-20 18:41.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184058/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:41.22 [info     ] FQE_20220420184058: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00015985822103109704, 'time_algorithm_update': 0.003367044839514307, 'loss': 0.015531586297978479, 'time_step': 0.0035974649061639623, 'init_value': -2.773496150970459, 'ave_value': -1.424849388527428, 'soft_opc': nan} step=6142




2022-04-20 18:41.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184058/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:41.23 [info     ] FQE_20220420184058: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00015917599919330642, 'time_algorithm_update': 0.003525449568966785, 'loss': 0.01602350903046598, 'time_step': 0.003757528511874647, 'init_value': -2.8538553714752197, 'ave_value': -1.4529166099646333, 'soft_opc': nan} step=6308




2022-04-20 18:41.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184058/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:41.24 [info     ] FQE_20220420184058: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.0001599501414471362, 'time_algorithm_update': 0.003379158226840467, 'loss': 0.016638985243121004, 'time_step': 0.0036056889108864658, 'init_value': -2.886395215988159, 'ave_value': -1.4703417116205568, 'soft_opc': nan} step=6474




2022-04-20 18:41.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184058/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:41.25 [info     ] FQE_20220420184058: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.0001610546226961067, 'time_algorithm_update': 0.0034537717520472513, 'loss': 0.017411537946718014, 'time_step': 0.003684346934398973, 'init_value': -2.9918551445007324, 'ave_value': -1.553012234658928, 'soft_opc': nan} step=6640




2022-04-20 18:41.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184058/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:41.25 [info     ] FQE_20220420184058: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.0001611767044986587, 'time_algorithm_update': 0.003434356436671981, 'loss': 0.01748906751864601, 'time_step': 0.0036624397139951408, 'init_value': -3.034579038619995, 'ave_value': -1.6119407588201742, 'soft_opc': nan} step=6806




2022-04-20 18:41.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184058/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:41.26 [info     ] FQE_20220420184058: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00015983524092708724, 'time_algorithm_update': 0.0035369511110236846, 'loss': 0.0184264311487001, 'time_step': 0.0037662767502198735, 'init_value': -3.1185879707336426, 'ave_value': -1.6335484639004707, 'soft_opc': nan} step=6972




2022-04-20 18:41.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184058/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:41.27 [info     ] FQE_20220420184058: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.0001612628798886954, 'time_algorithm_update': 0.0034365553453744174, 'loss': 0.02013671543401475, 'time_step': 0.0036677237016608916, 'init_value': -3.13783597946167, 'ave_value': -1.6439843568277046, 'soft_opc': nan} step=7138




2022-04-20 18:41.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184058/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:41.27 [info     ] FQE_20220420184058: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.0001607400825224727, 'time_algorithm_update': 0.0034498349729790746, 'loss': 0.019958603056989252, 'time_step': 0.003678858998310135, 'init_value': -3.218287229537964, 'ave_value': -1.6801267877644817, 'soft_opc': nan} step=7304




2022-04-20 18:41.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184058/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:41.28 [info     ] FQE_20220420184058: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00016026468162077018, 'time_algorithm_update': 0.00350416999265372, 'loss': 0.019711944713616317, 'time_step': 0.0037326295691800407, 'init_value': -3.260310173034668, 'ave_value': -1.6941158139082364, 'soft_opc': nan} step=7470




2022-04-20 18:41.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184058/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:41.29 [info     ] FQE_20220420184058: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.0001718926142497235, 'time_algorithm_update': 0.003548729850585202, 'loss': 0.02098615906139589, 'time_step': 0.0037920546818928547, 'init_value': -3.2935049533843994, 'ave_value': -1.7041705950847639, 'soft_opc': nan} step=7636




2022-04-20 18:41.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184058/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:41.29 [info     ] FQE_20220420184058: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.0001582855201629271, 'time_algorithm_update': 0.0034230775143726765, 'loss': 0.021020548541592545, 'time_step': 0.003651208188160356, 'init_value': -3.4453470706939697, 'ave_value': -1.8283032471802807, 'soft_opc': nan} step=7802




2022-04-20 18:41.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184058/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:41.30 [info     ] FQE_20220420184058: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00016225246061761696, 'time_algorithm_update': 0.0035718435264495483, 'loss': 0.023005527218366153, 'time_step': 0.003805513841560088, 'init_value': -3.4195926189422607, 'ave_value': -1.7730996700370818, 'soft_opc': nan} step=7968




2022-04-20 18:41.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184058/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:41.31 [info     ] FQE_20220420184058: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00016023739274725857, 'time_algorithm_update': 0.0034891295145793133, 'loss': 0.0230014634701558, 'time_step': 0.0037213520831372365, 'init_value': -3.4964661598205566, 'ave_value': -1.837494781432539, 'soft_opc': nan} step=8134




2022-04-20 18:41.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184058/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:41.31 [info     ] FQE_20220420184058: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00016141368682125965, 'time_algorithm_update': 0.0034397826137312925, 'loss': 0.024396778585063856, 'time_step': 0.003669039312615452, 'init_value': -3.6143293380737305, 'ave_value': -1.9092466557386902, 'soft_opc': nan} step=8300




2022-04-20 18:41.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184058/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

2022-04-20 18:41.32 [info     ] Directory is created at d3rlpy_logs/FQE_20220420184132
2022-04-20 18:41.32 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 18:41.32 [debug    ] Building models...
2022-04-20 18:41.32 [debug    ] Models have been built.
2022-04-20 18:41.32 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420184132/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/355 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 18:41.33 [info     ] FQE_20220420184132: epoch=1 step=355 epoch=1 metrics={'time_sample_batch': 0.00015592172112263424, 'time_algorithm_update': 0.003432559967041016, 'loss': 0.024503165255235115, 'time_step': 0.0036604310425234514, 'init_value': -1.0062607526779175, 'ave_value': -1.0068578767346905, 'soft_opc': nan} step=355




2022-04-20 18:41.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184132/model_355.pt


Epoch 2/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:41.35 [info     ] FQE_20220420184132: epoch=2 step=710 epoch=2 metrics={'time_sample_batch': 0.00015841873598770358, 'time_algorithm_update': 0.003483106049013809, 'loss': 0.023327228925387623, 'time_step': 0.003713085282016808, 'init_value': -2.2680094242095947, 'ave_value': -2.2796383781476064, 'soft_opc': nan} step=710




2022-04-20 18:41.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184132/model_710.pt


Epoch 3/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:41.36 [info     ] FQE_20220420184132: epoch=3 step=1065 epoch=3 metrics={'time_sample_batch': 0.00015809972521284936, 'time_algorithm_update': 0.00338359685011313, 'loss': 0.025634109785019513, 'time_step': 0.003608563248540314, 'init_value': -3.010601043701172, 'ave_value': -3.0038651659528566, 'soft_opc': nan} step=1065




2022-04-20 18:41.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184132/model_1065.pt


Epoch 4/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:41.37 [info     ] FQE_20220420184132: epoch=4 step=1420 epoch=4 metrics={'time_sample_batch': 0.00016307360689404984, 'time_algorithm_update': 0.0034965803925420197, 'loss': 0.030188233628344368, 'time_step': 0.0037351158303274234, 'init_value': -4.110657215118408, 'ave_value': -4.152427532313873, 'soft_opc': nan} step=1420




2022-04-20 18:41.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184132/model_1420.pt


Epoch 5/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:41.39 [info     ] FQE_20220420184132: epoch=5 step=1775 epoch=5 metrics={'time_sample_batch': 0.000161064174813284, 'time_algorithm_update': 0.0034186343072165905, 'loss': 0.03466158454424478, 'time_step': 0.0036543449885408645, 'init_value': -4.702373027801514, 'ave_value': -4.7754077495908795, 'soft_opc': nan} step=1775




2022-04-20 18:41.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184132/model_1775.pt


Epoch 6/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:41.40 [info     ] FQE_20220420184132: epoch=6 step=2130 epoch=6 metrics={'time_sample_batch': 0.00016005542916311345, 'time_algorithm_update': 0.0034249157972738777, 'loss': 0.042553179619282906, 'time_step': 0.003657573377582389, 'init_value': -5.667094707489014, 'ave_value': -5.799000880678341, 'soft_opc': nan} step=2130




2022-04-20 18:41.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184132/model_2130.pt


Epoch 7/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:41.42 [info     ] FQE_20220420184132: epoch=7 step=2485 epoch=7 metrics={'time_sample_batch': 0.00015754028105399977, 'time_algorithm_update': 0.0034461149027649783, 'loss': 0.0499583189939739, 'time_step': 0.0036761082394022337, 'init_value': -6.255517482757568, 'ave_value': -6.439365943119486, 'soft_opc': nan} step=2485




2022-04-20 18:41.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184132/model_2485.pt


Epoch 8/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:41.43 [info     ] FQE_20220420184132: epoch=8 step=2840 epoch=8 metrics={'time_sample_batch': 0.00016224149247290383, 'time_algorithm_update': 0.0034188317580961846, 'loss': 0.06029796591946777, 'time_step': 0.0036525034568679164, 'init_value': -6.922847747802734, 'ave_value': -7.202534096796387, 'soft_opc': nan} step=2840




2022-04-20 18:41.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184132/model_2840.pt


Epoch 9/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:41.44 [info     ] FQE_20220420184132: epoch=9 step=3195 epoch=9 metrics={'time_sample_batch': 0.00016128311694507867, 'time_algorithm_update': 0.003443953017113914, 'loss': 0.0681816506632407, 'time_step': 0.0036811962933607505, 'init_value': -7.519753932952881, 'ave_value': -7.901043832225382, 'soft_opc': nan} step=3195




2022-04-20 18:41.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184132/model_3195.pt


Epoch 10/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:41.46 [info     ] FQE_20220420184132: epoch=10 step=3550 epoch=10 metrics={'time_sample_batch': 0.00015966925822513204, 'time_algorithm_update': 0.0034742724727576885, 'loss': 0.07955881931464857, 'time_step': 0.0037073021203699245, 'init_value': -7.868044376373291, 'ave_value': -8.428483117286456, 'soft_opc': nan} step=3550




2022-04-20 18:41.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184132/model_3550.pt


Epoch 11/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:41.47 [info     ] FQE_20220420184132: epoch=11 step=3905 epoch=11 metrics={'time_sample_batch': 0.0001630702488858935, 'time_algorithm_update': 0.003725419245975118, 'loss': 0.09048354523649937, 'time_step': 0.00396223202557631, 'init_value': -8.524744033813477, 'ave_value': -9.311766683825194, 'soft_opc': nan} step=3905




2022-04-20 18:41.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184132/model_3905.pt


Epoch 12/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:41.49 [info     ] FQE_20220420184132: epoch=12 step=4260 epoch=12 metrics={'time_sample_batch': 0.00016815225842972876, 'time_algorithm_update': 0.004442748217515543, 'loss': 0.10764866209208546, 'time_step': 0.004687528207268514, 'init_value': -8.699462890625, 'ave_value': -9.771872465398781, 'soft_opc': nan} step=4260




2022-04-20 18:41.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184132/model_4260.pt


Epoch 13/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:41.51 [info     ] FQE_20220420184132: epoch=13 step=4615 epoch=13 metrics={'time_sample_batch': 0.00016589634855028608, 'time_algorithm_update': 0.004387230268666442, 'loss': 0.11578217549643047, 'time_step': 0.00462641178722113, 'init_value': -8.993940353393555, 'ave_value': -10.36508160883242, 'soft_opc': nan} step=4615




2022-04-20 18:41.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184132/model_4615.pt


Epoch 14/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:41.52 [info     ] FQE_20220420184132: epoch=14 step=4970 epoch=14 metrics={'time_sample_batch': 0.0001656968828657983, 'time_algorithm_update': 0.004122963085980483, 'loss': 0.1236617532729263, 'time_step': 0.004359965928843324, 'init_value': -8.946513175964355, 'ave_value': -10.552500895244888, 'soft_opc': nan} step=4970




2022-04-20 18:41.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184132/model_4970.pt


Epoch 15/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:41.54 [info     ] FQE_20220420184132: epoch=15 step=5325 epoch=15 metrics={'time_sample_batch': 0.00016795279274524097, 'time_algorithm_update': 0.004437519127214459, 'loss': 0.1336547426459655, 'time_step': 0.004679291013260962, 'init_value': -9.423428535461426, 'ave_value': -11.417661391630135, 'soft_opc': nan} step=5325




2022-04-20 18:41.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184132/model_5325.pt


Epoch 16/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:41.56 [info     ] FQE_20220420184132: epoch=16 step=5680 epoch=16 metrics={'time_sample_batch': 0.00016778690714231679, 'time_algorithm_update': 0.004043707377474073, 'loss': 0.14250226647816075, 'time_step': 0.004285128687469052, 'init_value': -9.649850845336914, 'ave_value': -11.832683126438539, 'soft_opc': nan} step=5680




2022-04-20 18:41.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184132/model_5680.pt


Epoch 17/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:41.58 [info     ] FQE_20220420184132: epoch=17 step=6035 epoch=17 metrics={'time_sample_batch': 0.00016612066349513095, 'time_algorithm_update': 0.004537685152510522, 'loss': 0.15446434449666824, 'time_step': 0.004777359626662563, 'init_value': -9.75811767578125, 'ave_value': -12.300720929178953, 'soft_opc': nan} step=6035




2022-04-20 18:41.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184132/model_6035.pt


Epoch 18/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:41.59 [info     ] FQE_20220420184132: epoch=18 step=6390 epoch=18 metrics={'time_sample_batch': 0.00016570695689026738, 'time_algorithm_update': 0.004342290717111507, 'loss': 0.16278041647353642, 'time_step': 0.004579615257155728, 'init_value': -9.910499572753906, 'ave_value': -12.625110543372548, 'soft_opc': nan} step=6390




2022-04-20 18:41.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184132/model_6390.pt


Epoch 19/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:42.01 [info     ] FQE_20220420184132: epoch=19 step=6745 epoch=19 metrics={'time_sample_batch': 0.00016116424345634354, 'time_algorithm_update': 0.004068457912391341, 'loss': 0.1768763738723708, 'time_step': 0.004299629909891477, 'init_value': -10.41076946258545, 'ave_value': -13.241740718342008, 'soft_opc': nan} step=6745




2022-04-20 18:42.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184132/model_6745.pt


Epoch 20/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:42.03 [info     ] FQE_20220420184132: epoch=20 step=7100 epoch=20 metrics={'time_sample_batch': 0.0001674725975788815, 'time_algorithm_update': 0.004471140848079198, 'loss': 0.18953215332937912, 'time_step': 0.004711548711212588, 'init_value': -10.832725524902344, 'ave_value': -13.70832714844241, 'soft_opc': nan} step=7100




2022-04-20 18:42.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184132/model_7100.pt


Epoch 21/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:42.05 [info     ] FQE_20220420184132: epoch=21 step=7455 epoch=21 metrics={'time_sample_batch': 0.00016134624749841825, 'time_algorithm_update': 0.004097016428557919, 'loss': 0.2078641060634818, 'time_step': 0.004332738527109925, 'init_value': -11.255796432495117, 'ave_value': -14.116095601745247, 'soft_opc': nan} step=7455




2022-04-20 18:42.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184132/model_7455.pt


Epoch 22/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:42.06 [info     ] FQE_20220420184132: epoch=22 step=7810 epoch=22 metrics={'time_sample_batch': 0.00016521870250433264, 'time_algorithm_update': 0.004546088903722629, 'loss': 0.22072917792788693, 'time_step': 0.004786974947217484, 'init_value': -11.739341735839844, 'ave_value': -14.579062330538703, 'soft_opc': nan} step=7810




2022-04-20 18:42.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184132/model_7810.pt


Epoch 23/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:42.08 [info     ] FQE_20220420184132: epoch=23 step=8165 epoch=23 metrics={'time_sample_batch': 0.00016442151136801276, 'time_algorithm_update': 0.004107595497453717, 'loss': 0.23952488250190943, 'time_step': 0.004343868980944996, 'init_value': -12.266891479492188, 'ave_value': -15.094573734441779, 'soft_opc': nan} step=8165




2022-04-20 18:42.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184132/model_8165.pt


Epoch 24/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:42.10 [info     ] FQE_20220420184132: epoch=24 step=8520 epoch=24 metrics={'time_sample_batch': 0.0001654927159698916, 'time_algorithm_update': 0.0042879299378730884, 'loss': 0.2650430927131797, 'time_step': 0.004527953644873391, 'init_value': -13.098589897155762, 'ave_value': -15.614074291716518, 'soft_opc': nan} step=8520




2022-04-20 18:42.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184132/model_8520.pt


Epoch 25/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:42.11 [info     ] FQE_20220420184132: epoch=25 step=8875 epoch=25 metrics={'time_sample_batch': 0.00016544100264428367, 'time_algorithm_update': 0.004352389590840944, 'loss': 0.28199894122884306, 'time_step': 0.004594085585903114, 'init_value': -13.664222717285156, 'ave_value': -16.14521270243204, 'soft_opc': nan} step=8875




2022-04-20 18:42.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184132/model_8875.pt


Epoch 26/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:42.13 [info     ] FQE_20220420184132: epoch=26 step=9230 epoch=26 metrics={'time_sample_batch': 0.00016464179670306998, 'time_algorithm_update': 0.004121243785804426, 'loss': 0.3014816171738883, 'time_step': 0.004358731525045046, 'init_value': -14.28709888458252, 'ave_value': -16.530385310432013, 'soft_opc': nan} step=9230




2022-04-20 18:42.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184132/model_9230.pt


Epoch 27/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:42.15 [info     ] FQE_20220420184132: epoch=27 step=9585 epoch=27 metrics={'time_sample_batch': 0.0001662704306589046, 'time_algorithm_update': 0.004483703156592141, 'loss': 0.3262398092784512, 'time_step': 0.00472339777879312, 'init_value': -14.466065406799316, 'ave_value': -16.475336089005342, 'soft_opc': nan} step=9585




2022-04-20 18:42.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184132/model_9585.pt


Epoch 28/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:42.17 [info     ] FQE_20220420184132: epoch=28 step=9940 epoch=28 metrics={'time_sample_batch': 0.0001636820779719823, 'time_algorithm_update': 0.004047763851326956, 'loss': 0.3457115148469596, 'time_step': 0.004286940668670225, 'init_value': -15.311514854431152, 'ave_value': -17.115451964494344, 'soft_opc': nan} step=9940




2022-04-20 18:42.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184132/model_9940.pt


Epoch 29/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:42.18 [info     ] FQE_20220420184132: epoch=29 step=10295 epoch=29 metrics={'time_sample_batch': 0.00016904683180258306, 'time_algorithm_update': 0.004495842356077382, 'loss': 0.3704531498882972, 'time_step': 0.004742325527567259, 'init_value': -15.888577461242676, 'ave_value': -17.664327876831727, 'soft_opc': nan} step=10295




2022-04-20 18:42.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184132/model_10295.pt


Epoch 30/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:42.20 [info     ] FQE_20220420184132: epoch=30 step=10650 epoch=30 metrics={'time_sample_batch': 0.00016711060429962587, 'time_algorithm_update': 0.004298447219418808, 'loss': 0.39079207870846905, 'time_step': 0.004541855798640721, 'init_value': -16.17180824279785, 'ave_value': -17.91577563888778, 'soft_opc': nan} step=10650




2022-04-20 18:42.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184132/model_10650.pt


Epoch 31/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:42.22 [info     ] FQE_20220420184132: epoch=31 step=11005 epoch=31 metrics={'time_sample_batch': 0.00017116506334761497, 'time_algorithm_update': 0.004244887660926497, 'loss': 0.4144921734404396, 'time_step': 0.004490513197133239, 'init_value': -16.825937271118164, 'ave_value': -18.423215428512528, 'soft_opc': nan} step=11005




2022-04-20 18:42.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184132/model_11005.pt


Epoch 32/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:42.24 [info     ] FQE_20220420184132: epoch=32 step=11360 epoch=32 metrics={'time_sample_batch': 0.00016520795687823228, 'time_algorithm_update': 0.004461426130482849, 'loss': 0.42936533790239145, 'time_step': 0.004698097202139841, 'init_value': -16.658817291259766, 'ave_value': -18.358854101267205, 'soft_opc': nan} step=11360




2022-04-20 18:42.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184132/model_11360.pt


Epoch 33/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:42.25 [info     ] FQE_20220420184132: epoch=33 step=11715 epoch=33 metrics={'time_sample_batch': 0.00016560755984883912, 'time_algorithm_update': 0.004137293721588564, 'loss': 0.45001736768114736, 'time_step': 0.004378434302101672, 'init_value': -17.2721004486084, 'ave_value': -18.772106298041248, 'soft_opc': nan} step=11715




2022-04-20 18:42.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184132/model_11715.pt


Epoch 34/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:42.27 [info     ] FQE_20220420184132: epoch=34 step=12070 epoch=34 metrics={'time_sample_batch': 0.00016861566355530646, 'time_algorithm_update': 0.004515153589382977, 'loss': 0.46511043846922023, 'time_step': 0.004760605180767221, 'init_value': -17.238840103149414, 'ave_value': -18.94435059982382, 'soft_opc': nan} step=12070




2022-04-20 18:42.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184132/model_12070.pt


Epoch 35/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:42.29 [info     ] FQE_20220420184132: epoch=35 step=12425 epoch=35 metrics={'time_sample_batch': 0.00016400578995825538, 'time_algorithm_update': 0.004131983367490097, 'loss': 0.478475512438257, 'time_step': 0.004368497284365372, 'init_value': -17.567529678344727, 'ave_value': -19.239979351647534, 'soft_opc': nan} step=12425




2022-04-20 18:42.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184132/model_12425.pt


Epoch 36/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:42.30 [info     ] FQE_20220420184132: epoch=36 step=12780 epoch=36 metrics={'time_sample_batch': 0.00016714552758445204, 'time_algorithm_update': 0.004194631039256781, 'loss': 0.49120405581320675, 'time_step': 0.004436688355996575, 'init_value': -17.65117073059082, 'ave_value': -19.531736168542942, 'soft_opc': nan} step=12780




2022-04-20 18:42.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184132/model_12780.pt


Epoch 37/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:42.32 [info     ] FQE_20220420184132: epoch=37 step=13135 epoch=37 metrics={'time_sample_batch': 0.00016620662850393375, 'time_algorithm_update': 0.0044792215589066625, 'loss': 0.5083647543590674, 'time_step': 0.004720265428784867, 'init_value': -17.755535125732422, 'ave_value': -19.76645831130493, 'soft_opc': nan} step=13135




2022-04-20 18:42.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184132/model_13135.pt


Epoch 38/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:42.34 [info     ] FQE_20220420184132: epoch=38 step=13490 epoch=38 metrics={'time_sample_batch': 0.00016564516954019036, 'time_algorithm_update': 0.00415625102083448, 'loss': 0.5101261997988946, 'time_step': 0.004398606528698558, 'init_value': -17.34023094177246, 'ave_value': -19.535438868634767, 'soft_opc': nan} step=13490




2022-04-20 18:42.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184132/model_13490.pt


Epoch 39/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:42.36 [info     ] FQE_20220420184132: epoch=39 step=13845 epoch=39 metrics={'time_sample_batch': 0.00017031347247916208, 'time_algorithm_update': 0.004563293994312555, 'loss': 0.5163535338157499, 'time_step': 0.004811143203520439, 'init_value': -17.12704849243164, 'ave_value': -19.4743154492596, 'soft_opc': nan} step=13845




2022-04-20 18:42.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184132/model_13845.pt


Epoch 40/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:42.37 [info     ] FQE_20220420184132: epoch=40 step=14200 epoch=40 metrics={'time_sample_batch': 0.00016877886275170554, 'time_algorithm_update': 0.004164140325197032, 'loss': 0.516919517506596, 'time_step': 0.004408300426644338, 'init_value': -16.830886840820312, 'ave_value': -19.237079689067524, 'soft_opc': nan} step=14200




2022-04-20 18:42.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184132/model_14200.pt


Epoch 41/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:42.39 [info     ] FQE_20220420184132: epoch=41 step=14555 epoch=41 metrics={'time_sample_batch': 0.0001641757051709672, 'time_algorithm_update': 0.004376778804080587, 'loss': 0.5196599446394494, 'time_step': 0.004617479485525212, 'init_value': -16.64183235168457, 'ave_value': -19.147523417609097, 'soft_opc': nan} step=14555




2022-04-20 18:42.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184132/model_14555.pt


Epoch 42/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:42.41 [info     ] FQE_20220420184132: epoch=42 step=14910 epoch=42 metrics={'time_sample_batch': 0.0001665437725228323, 'time_algorithm_update': 0.004330749914679729, 'loss': 0.5162749177815629, 'time_step': 0.004571707147947499, 'init_value': -16.538990020751953, 'ave_value': -19.15876907464764, 'soft_opc': nan} step=14910




2022-04-20 18:42.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184132/model_14910.pt


Epoch 43/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:42.42 [info     ] FQE_20220420184132: epoch=43 step=15265 epoch=43 metrics={'time_sample_batch': 0.0001710132813789475, 'time_algorithm_update': 0.004129479636608715, 'loss': 0.524750091591981, 'time_step': 0.0043766062024613505, 'init_value': -16.373842239379883, 'ave_value': -19.140819217830398, 'soft_opc': nan} step=15265




2022-04-20 18:42.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184132/model_15265.pt


Epoch 44/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:42.44 [info     ] FQE_20220420184132: epoch=44 step=15620 epoch=44 metrics={'time_sample_batch': 0.00016941486949652014, 'time_algorithm_update': 0.00451359211559027, 'loss': 0.5284027849284696, 'time_step': 0.004757514470060106, 'init_value': -16.569459915161133, 'ave_value': -19.287642732426104, 'soft_opc': nan} step=15620




2022-04-20 18:42.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184132/model_15620.pt


Epoch 45/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:42.46 [info     ] FQE_20220420184132: epoch=45 step=15975 epoch=45 metrics={'time_sample_batch': 0.00016499908877090668, 'time_algorithm_update': 0.004127667655407543, 'loss': 0.5383975007479459, 'time_step': 0.004368698093253122, 'init_value': -16.556522369384766, 'ave_value': -19.26254526258541, 'soft_opc': nan} step=15975




2022-04-20 18:42.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184132/model_15975.pt


Epoch 46/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:42.48 [info     ] FQE_20220420184132: epoch=46 step=16330 epoch=46 metrics={'time_sample_batch': 0.000166649213978942, 'time_algorithm_update': 0.004489392965612277, 'loss': 0.5476925624298378, 'time_step': 0.004732896912265831, 'init_value': -16.798303604125977, 'ave_value': -19.44170907644874, 'soft_opc': nan} step=16330




2022-04-20 18:42.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184132/model_16330.pt


Epoch 47/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:42.49 [info     ] FQE_20220420184132: epoch=47 step=16685 epoch=47 metrics={'time_sample_batch': 0.0001632233740578235, 'time_algorithm_update': 0.004084875214267784, 'loss': 0.5535091328421529, 'time_step': 0.00432335356591453, 'init_value': -16.534175872802734, 'ave_value': -19.37813831339932, 'soft_opc': nan} step=16685




2022-04-20 18:42.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184132/model_16685.pt


Epoch 48/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:42.51 [info     ] FQE_20220420184132: epoch=48 step=17040 epoch=48 metrics={'time_sample_batch': 0.00016522004570759517, 'time_algorithm_update': 0.004103571260479134, 'loss': 0.5614482196546356, 'time_step': 0.004343067760198889, 'init_value': -16.650707244873047, 'ave_value': -19.417615904250546, 'soft_opc': nan} step=17040




2022-04-20 18:42.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184132/model_17040.pt


Epoch 49/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:42.53 [info     ] FQE_20220420184132: epoch=49 step=17395 epoch=49 metrics={'time_sample_batch': 0.00016341679532762984, 'time_algorithm_update': 0.004508837847642495, 'loss': 0.5564703649146037, 'time_step': 0.004748725891113281, 'init_value': -16.740299224853516, 'ave_value': -19.182639355951753, 'soft_opc': nan} step=17395




2022-04-20 18:42.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184132/model_17395.pt


Epoch 50/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 18:42.54 [info     ] FQE_20220420184132: epoch=50 step=17750 epoch=50 metrics={'time_sample_batch': 0.0001630554736500055, 'time_algorithm_update': 0.004083936986788898, 'loss': 0.5765244566166485, 'time_step': 0.00432058119438064, 'init_value': -16.97334861755371, 'ave_value': -19.656255343644382, 'soft_opc': nan} step=17750




2022-04-20 18:42.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420184132/model_17750.pt
search iteration:  21
using hyper params:  [0.0001250006601044309, 0.00882612559558456, 8.893177582029885e-05, 5]
2022-04-20 18:42.55 [debug    ] RoundIterator is selected.
2022-04-20 18:42.55 [info     ] Directory is created at d3rlpy_logs/CQL_20220420184255
2022-04-20 18:42.55 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 18:42.55 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 18:42.55 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220420184255/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0001250006601044309, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'w

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:43.04 [info     ] CQL_20220420184255: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.00036233070998163946, 'time_algorithm_update': 0.025829028664973743, 'temp_loss': 4.650757991779617, 'temp': 0.9850310134260278, 'alpha_loss': -14.200611767015959, 'alpha': 1.0157766307306568, 'critic_loss': 32.53707601591857, 'actor_loss': 4.066817372732344, 'time_step': 0.026289398210090503, 'td_error': 10.380683920044174, 'init_value': -12.131223678588867, 'ave_value': -7.19189156488163} step=342
2022-04-20 18:43.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420184255/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:43.13 [info     ] CQL_20220420184255: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.000364344022427386, 'time_algorithm_update': 0.02582759954775983, 'temp_loss': 4.219484005058021, 'temp': 0.9564254329218502, 'alpha_loss': -5.011387642184327, 'alpha': 1.0370332075838458, 'critic_loss': 26.3887491449278, 'actor_loss': 11.544607916770623, 'time_step': 0.02629200129481087, 'td_error': 36.50516661797722, 'init_value': -25.8264102935791, 'ave_value': -15.092183057609946} step=684
2022-04-20 18:43.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420184255/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:43.23 [info     ] CQL_20220420184255: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.00036301808050501417, 'time_algorithm_update': 0.025885998854163098, 'temp_loss': 3.5445366093986914, 'temp': 0.9308644862551438, 'alpha_loss': 3.15832040761189, 'alpha': 1.0400122187988103, 'critic_loss': 71.71885907580281, 'actor_loss': 22.6300915249607, 'time_step': 0.02635189176302904, 'td_error': 73.16682050248087, 'init_value': -42.706520080566406, 'ave_value': -24.70243521301999} step=1026
2022-04-20 18:43.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420184255/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:43.32 [info     ] CQL_20220420184255: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.0003587467628612853, 'time_algorithm_update': 0.02550573864875481, 'temp_loss': 2.5817504451985944, 'temp': 0.9091621930139107, 'alpha_loss': 13.574447100622612, 'alpha': 1.0080906299122594, 'critic_loss': 187.24702765927677, 'actor_loss': 36.89398665177195, 'time_step': 0.02596418062845866, 'td_error': 291.7779409532198, 'init_value': -65.95423889160156, 'ave_value': -37.90082283773535} step=1368
2022-04-20 18:43.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420184255/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:43.41 [info     ] CQL_20220420184255: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.0003609936139736956, 'time_algorithm_update': 0.025687882774754575, 'temp_loss': 1.6847373710738287, 'temp': 0.8930047980526037, 'alpha_loss': 19.388368847774483, 'alpha': 0.9546899293598375, 'critic_loss': 406.02433982090645, 'actor_loss': 53.90631511755157, 'time_step': 0.026150702035915085, 'td_error': 151.14302968594095, 'init_value': -88.72917175292969, 'ave_value': -51.28147412231108} step=1710
2022-04-20 18:43.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420184255/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:43.51 [info     ] CQL_20220420184255: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.00036109400074384364, 'time_algorithm_update': 0.025728189457229704, 'temp_loss': 1.1744512965281804, 'temp': 0.8800089449910392, 'alpha_loss': 19.37015135804115, 'alpha': 0.9152506391207377, 'critic_loss': 636.2082263433446, 'actor_loss': 68.40473824216609, 'time_step': 0.026191178818195188, 'td_error': 264.8539588789854, 'init_value': -113.8466796875, 'ave_value': -66.09059804595537} step=2052
2022-04-20 18:43.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420184255/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:44.00 [info     ] CQL_20220420184255: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.0003692169635616548, 'time_algorithm_update': 0.026028205079641955, 'temp_loss': 0.5721180464266337, 'temp': 0.8709939235483694, 'alpha_loss': 23.74125154952557, 'alpha': 0.8761894945164173, 'critic_loss': 866.4525490922538, 'actor_loss': 83.60778186195775, 'time_step': 0.0264993759623745, 'td_error': 489.1839836781363, 'init_value': -141.19110107421875, 'ave_value': -82.12951489722407} step=2394
2022-04-20 18:44.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420184255/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:44.10 [info     ] CQL_20220420184255: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.0003637479759796321, 'time_algorithm_update': 0.0261367322408665, 'temp_loss': 0.061041378225499425, 'temp': 0.8672033952690704, 'alpha_loss': 28.399480599408957, 'alpha': 0.8369953928635134, 'critic_loss': 1122.6687245508383, 'actor_loss': 99.69161304674651, 'time_step': 0.026602178289179216, 'td_error': 727.1256908365287, 'init_value': -164.8941192626953, 'ave_value': -92.96187585990708} step=2736
2022-04-20 18:44.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420184255/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:44.19 [info     ] CQL_20220420184255: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.0003644590489348473, 'time_algorithm_update': 0.026104209018729584, 'temp_loss': -0.32933684173775346, 'temp': 0.8693697147550639, 'alpha_loss': 32.98720977738587, 'alpha': 0.7993863767002061, 'critic_loss': 1409.053022418106, 'actor_loss': 116.97998696042781, 'time_step': 0.0265693559981229, 'td_error': 1104.5896889415503, 'init_value': -196.71145629882812, 'ave_value': -111.82181019566886} step=3078
2022-04-20 18:44.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420184255/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:44.29 [info     ] CQL_20220420184255: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.0003702912414283083, 'time_algorithm_update': 0.02774588476147568, 'temp_loss': -0.7020261361214676, 'temp': 0.878671310624184, 'alpha_loss': 38.39230976327818, 'alpha': 0.7637412110615892, 'critic_loss': 1728.8743339672424, 'actor_loss': 135.16178340800323, 'time_step': 0.02821950884590372, 'td_error': 2193.9807760410154, 'init_value': -235.052001953125, 'ave_value': -128.61696678244851} step=3420
2022-04-20 18:44.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420184255/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:44.39 [info     ] CQL_20220420184255: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.00036348306644729705, 'time_algorithm_update': 0.025959192660816928, 'temp_loss': -0.8491721969773198, 'temp': 0.8945503963364495, 'alpha_loss': 43.43357088970162, 'alpha': 0.7306688919402006, 'critic_loss': 2075.6336809124864, 'actor_loss': 153.75997940420407, 'time_step': 0.02642496705752367, 'td_error': 3231.1040246505063, 'init_value': -272.9256896972656, 'ave_value': -143.89851062011076} step=3762
2022-04-20 18:44.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420184255/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:44.48 [info     ] CQL_20220420184255: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.00036943028544821933, 'time_algorithm_update': 0.025238824169538175, 'temp_loss': -1.0606670833535885, 'temp': 0.9158056726929737, 'alpha_loss': 50.692477973581056, 'alpha': 0.6985713687905094, 'critic_loss': 2437.5712797823007, 'actor_loss': 173.4294045431572, 'time_step': 0.025707190496879712, 'td_error': 4971.795521344219, 'init_value': -320.81793212890625, 'ave_value': -164.22337436640316} step=4104
2022-04-20 18:44.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420184255/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:44.57 [info     ] CQL_20220420184255: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.0003627740848831266, 'time_algorithm_update': 0.024885394419842992, 'temp_loss': -1.259095558471847, 'temp': 0.9428290174021359, 'alpha_loss': 58.5193188873648, 'alpha': 0.6681954336096669, 'critic_loss': 2832.3260790730083, 'actor_loss': 194.31720836260166, 'time_step': 0.025349161778277125, 'td_error': 7070.150645099803, 'init_value': -400.3316650390625, 'ave_value': -188.4017614154746} step=4446
2022-04-20 18:44.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420184255/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:45.06 [info     ] CQL_20220420184255: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.0003628242782682006, 'time_algorithm_update': 0.025332250093158922, 'temp_loss': -1.2206554876869185, 'temp': 0.9727546689454575, 'alpha_loss': 58.771082666185166, 'alpha': 0.6401542803348853, 'critic_loss': 3245.8880265442253, 'actor_loss': 213.52908989978812, 'time_step': 0.025798514572500486, 'td_error': 9523.142212552355, 'init_value': -444.20867919921875, 'ave_value': -206.75116722516648} step=4788
2022-04-20 18:45.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420184255/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:45.15 [info     ] CQL_20220420184255: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.0003751390858700401, 'time_algorithm_update': 0.024312229881509703, 'temp_loss': -1.1898196020670104, 'temp': 1.0024254836185633, 'alpha_loss': 68.79449468467668, 'alpha': 0.6137969873802006, 'critic_loss': 3648.7191526178726, 'actor_loss': 233.73923537047983, 'time_step': 0.02478401075329697, 'td_error': 15788.465993696873, 'init_value': -504.84161376953125, 'ave_value': -231.672596886652} step=5130
2022-04-20 18:45.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420184255/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:45.24 [info     ] CQL_20220420184255: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.0003621062340095029, 'time_algorithm_update': 0.02426552633095903, 'temp_loss': -1.1507848374632716, 'temp': 1.0312530712077492, 'alpha_loss': 74.0118951239781, 'alpha': 0.588637479041752, 'critic_loss': 4065.2320977818895, 'actor_loss': 253.84755617554424, 'time_step': 0.02472592794407181, 'td_error': 16748.707379301468, 'init_value': -539.18408203125, 'ave_value': -247.12176124200897} step=5472
2022-04-20 18:45.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420184255/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:45.33 [info     ] CQL_20220420184255: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.00036207416601348344, 'time_algorithm_update': 0.024783319897121854, 'temp_loss': -1.053899079931584, 'temp': 1.0591678852923432, 'alpha_loss': 68.05538302973697, 'alpha': 0.56672031889882, 'critic_loss': 4428.700681452166, 'actor_loss': 267.5524906359221, 'time_step': 0.0252458651860555, 'td_error': 16389.996868502734, 'init_value': -556.2882080078125, 'ave_value': -253.16079013709012} step=5814
2022-04-20 18:45.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420184255/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:45.42 [info     ] CQL_20220420184255: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.0003601654231199744, 'time_algorithm_update': 0.025007845365513138, 'temp_loss': -1.120194647265108, 'temp': 1.0876700243754693, 'alpha_loss': 69.45506209100199, 'alpha': 0.5471653554871766, 'critic_loss': 4710.459521198831, 'actor_loss': 280.50001356336804, 'time_step': 0.025468249070016963, 'td_error': 18358.630882566646, 'init_value': -598.6219482421875, 'ave_value': -274.6009584016843} step=6156
2022-04-20 18:45.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420184255/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:45.51 [info     ] CQL_20220420184255: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.00035893917083740234, 'time_algorithm_update': 0.02487284119366205, 'temp_loss': -1.2075902752534688, 'temp': 1.1210320668610914, 'alpha_loss': 68.20363493690714, 'alpha': 0.528669617155142, 'critic_loss': 4983.267105548703, 'actor_loss': 293.40488915694385, 'time_step': 0.025331440027694257, 'td_error': 12737.811112818592, 'init_value': -608.0685424804688, 'ave_value': -277.26296450433966} step=6498
2022-04-20 18:45.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420184255/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:45.59 [info     ] CQL_20220420184255: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.0003256267971462674, 'time_algorithm_update': 0.02202459176381429, 'temp_loss': -1.2264678374542828, 'temp': 1.1539259082392643, 'alpha_loss': 73.50158878794888, 'alpha': 0.5109214836742446, 'critic_loss': 5246.33696688825, 'actor_loss': 304.9182116413674, 'time_step': 0.022442270440664904, 'td_error': 27412.990119343023, 'init_value': -648.0702514648438, 'ave_value': -292.43608708326076} step=6840
2022-04-20 18:45.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420184255/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:46.08 [info     ] CQL_20220420184255: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.00034479230468036136, 'time_algorithm_update': 0.02354851521943745, 'temp_loss': -1.288050475263456, 'temp': 1.190757349917763, 'alpha_loss': 83.03263356671695, 'alpha': 0.49199828601371476, 'critic_loss': 5536.438966985334, 'actor_loss': 319.43550092016744, 'time_step': 0.023989802912661905, 'td_error': 22800.520489606595, 'init_value': -666.3154296875, 'ave_value': -305.45035674289807} step=7182
2022-04-20 18:46.08 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420184255/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:46.17 [info     ] CQL_20220420184255: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.00036147602817468476, 'time_algorithm_update': 0.024703877014026307, 'temp_loss': -1.2813651211864767, 'temp': 1.2266515730417262, 'alpha_loss': 84.32521329288595, 'alpha': 0.4744537708006407, 'critic_loss': 5840.44987121939, 'actor_loss': 333.5990342703479, 'time_step': 0.025161541693391857, 'td_error': 43459.362144440776, 'init_value': -712.3744506835938, 'ave_value': -331.4455492968354} step=7524
2022-04-20 18:46.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420184255/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:46.26 [info     ] CQL_20220420184255: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.0003570687701130471, 'time_algorithm_update': 0.024478589582164385, 'temp_loss': -1.2746325819725879, 'temp': 1.2639788058069017, 'alpha_loss': 98.07759598681801, 'alpha': 0.4564649252689373, 'critic_loss': 6147.982230560125, 'actor_loss': 348.0400979560718, 'time_step': 0.02493453862374289, 'td_error': 70741.48870029853, 'init_value': -729.974609375, 'ave_value': -344.89790305624945} step=7866
2022-04-20 18:46.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420184255/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:46.35 [info     ] CQL_20220420184255: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.0003694769931815521, 'time_algorithm_update': 0.024746594373245685, 'temp_loss': -1.3407924575240988, 'temp': 1.3022488979568259, 'alpha_loss': 108.30797735850017, 'alpha': 0.4384713518863533, 'critic_loss': 6507.703000788103, 'actor_loss': 365.36231932166027, 'time_step': 0.02521485891955638, 'td_error': 79409.25638983514, 'init_value': -756.945068359375, 'ave_value': -355.19281733045773} step=8208
2022-04-20 18:46.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420184255/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:46.44 [info     ] CQL_20220420184255: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.00036161057433189704, 'time_algorithm_update': 0.024570170898883664, 'temp_loss': -1.285551596890416, 'temp': 1.3414116404209917, 'alpha_loss': 105.081173043502, 'alpha': 0.4220522994186446, 'critic_loss': 6864.245802494517, 'actor_loss': 380.10691958421853, 'time_step': 0.02502819460037856, 'td_error': 81903.43988619091, 'init_value': -797.1913452148438, 'ave_value': -380.9045615329627} step=8550
2022-04-20 18:46.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420184255/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:46.53 [info     ] CQL_20220420184255: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.00036106820692095843, 'time_algorithm_update': 0.024884648490370365, 'temp_loss': -1.2619019608661446, 'temp': 1.3802875855512786, 'alpha_loss': 113.83845832333927, 'alpha': 0.40639306082014454, 'critic_loss': 7242.893953022204, 'actor_loss': 396.89444647894965, 'time_step': 0.02534719517356471, 'td_error': 80898.80094352488, 'init_value': -819.7874755859375, 'ave_value': -392.3376054951273} step=8892
2022-04-20 18:46.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420184255/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:47.02 [info     ] CQL_20220420184255: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.00036699381488108495, 'time_algorithm_update': 0.02472782413861905, 'temp_loss': -1.237235825508833, 'temp': 1.4206210399231716, 'alpha_loss': 127.19743999681975, 'alpha': 0.3911082655714269, 'critic_loss': 7626.773718761422, 'actor_loss': 415.30702914410864, 'time_step': 0.025195494032742686, 'td_error': 69592.47152622206, 'init_value': -854.46875, 'ave_value': -416.1106007278583} step=9234
2022-04-20 18:47.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420184255/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:47.11 [info     ] CQL_20220420184255: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.0003717426668133652, 'time_algorithm_update': 0.02627134462546187, 'temp_loss': -1.3244364057280864, 'temp': 1.4605826543785676, 'alpha_loss': 132.24620112480477, 'alpha': 0.37663351016783575, 'critic_loss': 8049.555378517909, 'actor_loss': 434.14527161358393, 'time_step': 0.02674622284738641, 'td_error': 85005.9093372515, 'init_value': -861.9228515625, 'ave_value': -426.74044353501284} step=9576
2022-04-20 18:47.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420184255/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:47.21 [info     ] CQL_20220420184255: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.00037274026034171123, 'time_algorithm_update': 0.02611312322449266, 'temp_loss': -1.1689205923014216, 'temp': 1.499981121013039, 'alpha_loss': 131.2215944145158, 'alpha': 0.3629582386616378, 'critic_loss': 8479.99623937774, 'actor_loss': 453.00550458584615, 'time_step': 0.02658789478547392, 'td_error': 207483.78358862773, 'init_value': -949.66162109375, 'ave_value': -474.7251640406244} step=9918
2022-04-20 18:47.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420184255/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:47.30 [info     ] CQL_20220420184255: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.00036553402393184906, 'time_algorithm_update': 0.02599144260785733, 'temp_loss': -1.2031818520909512, 'temp': 1.5403210182636105, 'alpha_loss': 139.21224734099985, 'alpha': 0.3501961530126326, 'critic_loss': 8919.162904616685, 'actor_loss': 473.6830280147798, 'time_step': 0.026459236591182954, 'td_error': 219023.9118964923, 'init_value': -999.3547973632812, 'ave_value': -512.1318352025787} step=10260
2022-04-20 18:47.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420184255/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:47.40 [info     ] CQL_20220420184255: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.0003686780818024574, 'time_algorithm_update': 0.026082667690968654, 'temp_loss': -1.1284989917226005, 'temp': 1.583434169752556, 'alpha_loss': 146.97037474872076, 'alpha': 0.3371123626218205, 'critic_loss': 9434.824979726334, 'actor_loss': 496.81380136947183, 'time_step': 0.02655573476824844, 'td_error': 187464.5143593347, 'init_value': -1007.5115966796875, 'ave_value': -513.7518428209531} step=10602
2022-04-20 18:47.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420184255/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:47.49 [info     ] CQL_20220420184255: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.0003697893075775682, 'time_algorithm_update': 0.026172821284734714, 'temp_loss': -1.0539276232801333, 'temp': 1.621002451718202, 'alpha_loss': 147.13642423752455, 'alpha': 0.3253507864231255, 'critic_loss': 9938.669125205592, 'actor_loss': 518.8517225834362, 'time_step': 0.02664471439450805, 'td_error': 252742.76146503878, 'init_value': -1054.489013671875, 'ave_value': -534.1449679127537} step=10944
2022-04-20 18:47.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420184255/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:47.59 [info     ] CQL_20220420184255: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.000370953515259146, 'time_algorithm_update': 0.026042511588648745, 'temp_loss': -1.0157936370730052, 'temp': 1.6619906188451756, 'alpha_loss': 149.27061820448492, 'alpha': 0.31422433553383367, 'critic_loss': 10439.554651806926, 'actor_loss': 541.4011226899443, 'time_step': 0.026514351019385266, 'td_error': 284093.60381262103, 'init_value': -1193.46337890625, 'ave_value': -606.3886977040647} step=11286
2022-04-20 18:47.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420184255/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:48.08 [info     ] CQL_20220420184255: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.000369947556166621, 'time_algorithm_update': 0.026258760028415255, 'temp_loss': -1.0579943300949202, 'temp': 1.702155411940569, 'alpha_loss': 149.66299814369248, 'alpha': 0.303672590799499, 'critic_loss': 10930.516381693165, 'actor_loss': 562.3373255144086, 'time_step': 0.0267286579511319, 'td_error': 310624.4939766513, 'init_value': -1203.7958984375, 'ave_value': -615.6505594621518} step=11628
2022-04-20 18:48.08 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420184255/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:48.18 [info     ] CQL_20220420184255: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.00037277581398947196, 'time_algorithm_update': 0.026140942211039582, 'temp_loss': -0.8343443589731615, 'temp': 1.7418011587265638, 'alpha_loss': 145.00472961113468, 'alpha': 0.293555868647949, 'critic_loss': 11420.5143686038, 'actor_loss': 581.9761312384354, 'time_step': 0.026616747616327298, 'td_error': 308434.6804689645, 'init_value': -1195.81689453125, 'ave_value': -608.7619852437408} step=11970
2022-04-20 18:48.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420184255/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:48.28 [info     ] CQL_20220420184255: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.0003669303760193942, 'time_algorithm_update': 0.026359583899291637, 'temp_loss': -0.7031513966881392, 'temp': 1.778564056806397, 'alpha_loss': 136.19416500671565, 'alpha': 0.28387634159877284, 'critic_loss': 11877.468727156433, 'actor_loss': 599.2426289340906, 'time_step': 0.02682821513616551, 'td_error': 330450.5474136796, 'init_value': -1244.244140625, 'ave_value': -635.364758260883} step=12312
2022-04-20 18:48.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420184255/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:48.37 [info     ] CQL_20220420184255: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.0003664723613805938, 'time_algorithm_update': 0.026179236975329662, 'temp_loss': -0.5212122932599302, 'temp': 1.8038340862731488, 'alpha_loss': 118.80197690662585, 'alpha': 0.27635517908118623, 'critic_loss': 12142.72071311906, 'actor_loss': 607.2053912424901, 'time_step': 0.026649321031849287, 'td_error': 333647.6027756125, 'init_value': -1275.538818359375, 'ave_value': -661.8014059187935} step=12654
2022-04-20 18:48.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420184255/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:48.47 [info     ] CQL_20220420184255: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.00036556330340647557, 'time_algorithm_update': 0.02648784751780549, 'temp_loss': -0.49961833270233974, 'temp': 1.8323021911041082, 'alpha_loss': 133.483483292206, 'alpha': 0.2681117123156263, 'critic_loss': 12449.697673953764, 'actor_loss': 623.1255219219721, 'time_step': 0.026956373487996777, 'td_error': 201225.86585958433, 'init_value': -1307.9859619140625, 'ave_value': -665.9200389171365} step=12996
2022-04-20 18:48.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420184255/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:48.56 [info     ] CQL_20220420184255: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.00037011974736263876, 'time_algorithm_update': 0.025967433438663593, 'temp_loss': -0.38284036802530985, 'temp': 1.8566925511722676, 'alpha_loss': 124.47687132874428, 'alpha': 0.2600426219883021, 'critic_loss': 12791.222173679642, 'actor_loss': 636.2957350524546, 'time_step': 0.026438130272759333, 'td_error': 342503.2949450515, 'init_value': -1367.2484130859375, 'ave_value': -690.7657564041186} step=13338
2022-04-20 18:48.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420184255/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:49.06 [info     ] CQL_20220420184255: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.0003610249848393669, 'time_algorithm_update': 0.025867374319779247, 'temp_loss': -0.2715686742782767, 'temp': 1.8755386541461387, 'alpha_loss': 130.5851218630696, 'alpha': 0.2520752795345602, 'critic_loss': 13103.568285133406, 'actor_loss': 650.9001066865977, 'time_step': 0.026329834558810408, 'td_error': 358913.7424483352, 'init_value': -1466.0438232421875, 'ave_value': -750.8421196294985} step=13680
2022-04-20 18:49.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420184255/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:49.15 [info     ] CQL_20220420184255: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.00036815662830196624, 'time_algorithm_update': 0.0260326095491822, 'temp_loss': -0.21587558524214734, 'temp': 1.8900975065621717, 'alpha_loss': 112.13943398347375, 'alpha': 0.24471123390204724, 'critic_loss': 13435.396021792763, 'actor_loss': 661.6083923696774, 'time_step': 0.026504701341104785, 'td_error': 239392.73730895936, 'init_value': -1446.72119140625, 'ave_value': -717.8520909911441} step=14022
2022-04-20 18:49.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420184255/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:49.25 [info     ] CQL_20220420184255: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.00036572922042935914, 'time_algorithm_update': 0.025662628530758864, 'temp_loss': 0.036984491221911726, 'temp': 1.894555526170117, 'alpha_loss': 90.34888015643895, 'alpha': 0.23849530295844665, 'critic_loss': 13656.243144074379, 'actor_loss': 667.8704601979396, 'time_step': 0.026129460474203902, 'td_error': 357229.97862879816, 'init_value': -1447.28564453125, 'ave_value': -729.4559596347218} step=14364
2022-04-20 18:49.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420184255/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:49.34 [info     ] CQL_20220420184255: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.0003639884859497784, 'time_algorithm_update': 0.026045309172736272, 'temp_loss': 0.20811261164776065, 'temp': 1.890351847249862, 'alpha_loss': 102.60421420537938, 'alpha': 0.23213721650559999, 'critic_loss': 13846.096476950841, 'actor_loss': 676.8870619389049, 'time_step': 0.026510309754756458, 'td_error': 274275.29816987383, 'init_value': -1537.6397705078125, 'ave_value': -753.745162332064} step=14706
2022-04-20 18:49.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420184255/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:49.44 [info     ] CQL_20220420184255: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.00036662154727511935, 'time_algorithm_update': 0.025769757945635165, 'temp_loss': 0.17007334686118608, 'temp': 1.8739304263689365, 'alpha_loss': 86.16200735136779, 'alpha': 0.22586722092496025, 'critic_loss': 13996.892495317068, 'actor_loss': 680.4093036316989, 'time_step': 0.026238951766700075, 'td_error': 205925.75317169048, 'init_value': -1562.32421875, 'ave_value': -753.4872490116692} step=15048
2022-04-20 18:49.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420184255/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:49.53 [info     ] CQL_20220420184255: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.00036716740033779926, 'time_algorithm_update': 0.026207963625590008, 'temp_loss': 0.16688776543440179, 'temp': 1.8622512196936802, 'alpha_loss': 79.49945154803538, 'alpha': 0.22026924000323167, 'critic_loss': 14138.511441771747, 'actor_loss': 686.1493490118729, 'time_step': 0.026677108647530538, 'td_error': 265685.6136819733, 'init_value': -1561.9727783203125, 'ave_value': -748.2813673997912} step=15390
2022-04-20 18:49.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420184255/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:50.03 [info     ] CQL_20220420184255: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.00037026544760542304, 'time_algorithm_update': 0.02586607835446185, 'temp_loss': 0.3151977258402179, 'temp': 1.844851557274311, 'alpha_loss': 77.36361179296036, 'alpha': 0.21469760259166795, 'critic_loss': 14264.697999474598, 'actor_loss': 690.9284339592471, 'time_step': 0.026338465729652094, 'td_error': 154970.3617171699, 'init_value': -1634.463134765625, 'ave_value': -772.7909235434852} step=15732
2022-04-20 18:50.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420184255/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:50.11 [info     ] CQL_20220420184255: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.0003678924158999794, 'time_algorithm_update': 0.024639338080646, 'temp_loss': 0.3765559291674031, 'temp': 1.816519656376532, 'alpha_loss': 59.876135954382825, 'alpha': 0.20983588455887567, 'critic_loss': 14307.61464044225, 'actor_loss': 687.6690806784825, 'time_step': 0.025109586659927813, 'td_error': 160679.649222632, 'init_value': -1654.156005859375, 'ave_value': -772.6917421655162} step=16074
2022-04-20 18:50.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420184255/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:50.20 [info     ] CQL_20220420184255: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.00036249662700452303, 'time_algorithm_update': 0.024475011212086818, 'temp_loss': 0.46286319211473936, 'temp': 1.7853892393279494, 'alpha_loss': 43.27557605992981, 'alpha': 0.20562329618205802, 'critic_loss': 14255.089298359831, 'actor_loss': 680.8638822321307, 'time_step': 0.024938241780152796, 'td_error': 82907.1217490545, 'init_value': -1655.6849365234375, 'ave_value': -762.5582865633695} step=16416
2022-04-20 18:50.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420184255/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:50.29 [info     ] CQL_20220420184255: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.00036915213043926755, 'time_algorithm_update': 0.024446491609539902, 'temp_loss': 0.398288794602567, 'temp': 1.7529935390628568, 'alpha_loss': 45.103046093767844, 'alpha': 0.20182784145687058, 'critic_loss': 14166.736293859649, 'actor_loss': 677.3930393687466, 'time_step': 0.02491674994864659, 'td_error': 83192.81865035678, 'init_value': -1677.1243896484375, 'ave_value': -759.2335644081355} step=16758
2022-04-20 18:50.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420184255/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:50.38 [info     ] CQL_20220420184255: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.00036070081922743056, 'time_algorithm_update': 0.024371909119232357, 'temp_loss': 0.5146221917405812, 'temp': 1.7208694690152218, 'alpha_loss': 31.95730457040999, 'alpha': 0.19821943765320973, 'critic_loss': 14062.973461485746, 'actor_loss': 670.5210558796487, 'time_step': 0.0248315083353143, 'td_error': 50319.29033997352, 'init_value': -1699.118896484375, 'ave_value': -751.5194657116122} step=17100
2022-04-20 18:50.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420184255/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.5191

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 18:50.39 [info     ] FQE_20220420185038: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00015194875648222775, 'time_algorithm_update': 0.0034343219665159664, 'loss': 0.007217510160148503, 'time_step': 0.003660247986575207, 'init_value': -0.20327457785606384, 'ave_value': -0.14383569837522667, 'soft_opc': nan} step=166




2022-04-20 18:50.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185038/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.40 [info     ] FQE_20220420185038: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00015527799905064595, 'time_algorithm_update': 0.003383696797382401, 'loss': 0.005712890174206212, 'time_step': 0.0036123947924878224, 'init_value': -0.3436667025089264, 'ave_value': -0.23277030488190886, 'soft_opc': nan} step=332




2022-04-20 18:50.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185038/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.40 [info     ] FQE_20220420185038: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00015489451856498258, 'time_algorithm_update': 0.0034265891615166723, 'loss': 0.005150713457120021, 'time_step': 0.0036482135933565804, 'init_value': -0.429338276386261, 'ave_value': -0.286266186356813, 'soft_opc': nan} step=498




2022-04-20 18:50.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185038/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.41 [info     ] FQE_20220420185038: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00015463455613837186, 'time_algorithm_update': 0.00345320443072951, 'loss': 0.0050130558567660225, 'time_step': 0.0036800180573061288, 'init_value': -0.538535475730896, 'ave_value': -0.35813441487733977, 'soft_opc': nan} step=664




2022-04-20 18:50.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185038/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.42 [info     ] FQE_20220420185038: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00015192721263471856, 'time_algorithm_update': 0.0033871940819613904, 'loss': 0.004616305829553181, 'time_step': 0.0036088314401098043, 'init_value': -0.6438854336738586, 'ave_value': -0.42536916625224525, 'soft_opc': nan} step=830




2022-04-20 18:50.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185038/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.42 [info     ] FQE_20220420185038: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.0001557835613388613, 'time_algorithm_update': 0.0034358386533806123, 'loss': 0.004346182070537863, 'time_step': 0.003665213125297822, 'init_value': -0.6733518242835999, 'ave_value': -0.42544167284509754, 'soft_opc': nan} step=996




2022-04-20 18:50.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185038/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.43 [info     ] FQE_20220420185038: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.0001522360077823501, 'time_algorithm_update': 0.003516432750655944, 'loss': 0.004361348633418779, 'time_step': 0.003736934029912374, 'init_value': -0.6948091983795166, 'ave_value': -0.41606636488692716, 'soft_opc': nan} step=1162




2022-04-20 18:50.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185038/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.44 [info     ] FQE_20220420185038: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00015848515981651214, 'time_algorithm_update': 0.003481790243861187, 'loss': 0.004078399918760252, 'time_step': 0.003714449434395296, 'init_value': -0.7782660126686096, 'ave_value': -0.45197626251458856, 'soft_opc': nan} step=1328




2022-04-20 18:50.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185038/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.44 [info     ] FQE_20220420185038: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00015787762331675333, 'time_algorithm_update': 0.0034522320850785956, 'loss': 0.004006311729065064, 'time_step': 0.0036803182349147566, 'init_value': -0.8383650779724121, 'ave_value': -0.49681077172459515, 'soft_opc': nan} step=1494




2022-04-20 18:50.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185038/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.45 [info     ] FQE_20220420185038: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00015608230269098855, 'time_algorithm_update': 0.003538943198790033, 'loss': 0.003894006628655346, 'time_step': 0.0037683521408632577, 'init_value': -0.9182894229888916, 'ave_value': -0.5348473674005216, 'soft_opc': nan} step=1660




2022-04-20 18:50.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185038/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.46 [info     ] FQE_20220420185038: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00015921334186232234, 'time_algorithm_update': 0.0034540834197078845, 'loss': 0.003915684586615268, 'time_step': 0.0036872194474001966, 'init_value': -0.9408385753631592, 'ave_value': -0.5243803396636436, 'soft_opc': nan} step=1826




2022-04-20 18:50.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185038/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.47 [info     ] FQE_20220420185038: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00015352863863290074, 'time_algorithm_update': 0.003428473530045475, 'loss': 0.003917263597754918, 'time_step': 0.0036520598882652192, 'init_value': -1.0047789812088013, 'ave_value': -0.561595058981497, 'soft_opc': nan} step=1992




2022-04-20 18:50.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185038/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.47 [info     ] FQE_20220420185038: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.0001578589519822454, 'time_algorithm_update': 0.0034181525908320784, 'loss': 0.004147643919091627, 'time_step': 0.003649724535195224, 'init_value': -1.1218522787094116, 'ave_value': -0.6424619552083649, 'soft_opc': nan} step=2158




2022-04-20 18:50.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185038/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.48 [info     ] FQE_20220420185038: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.0001571178436279297, 'time_algorithm_update': 0.0035022755703294135, 'loss': 0.004086098852511271, 'time_step': 0.0037310482507728667, 'init_value': -1.2374686002731323, 'ave_value': -0.7288706965635422, 'soft_opc': nan} step=2324




2022-04-20 18:50.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185038/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.49 [info     ] FQE_20220420185038: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00015180656708866717, 'time_algorithm_update': 0.0035047057163284487, 'loss': 0.004190725275786616, 'time_step': 0.003727484898394849, 'init_value': -1.2656084299087524, 'ave_value': -0.7387382398871285, 'soft_opc': nan} step=2490




2022-04-20 18:50.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185038/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.49 [info     ] FQE_20220420185038: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00015984960349209337, 'time_algorithm_update': 0.0026800876640411743, 'loss': 0.004545148142265627, 'time_step': 0.0029150118310767486, 'init_value': -1.3947707414627075, 'ave_value': -0.821667139871499, 'soft_opc': nan} step=2656




2022-04-20 18:50.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185038/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.50 [info     ] FQE_20220420185038: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.0001563307750655944, 'time_algorithm_update': 0.0035143099635480397, 'loss': 0.004930226445052189, 'time_step': 0.0037401627345257497, 'init_value': -1.5016214847564697, 'ave_value': -0.8944735249227568, 'soft_opc': nan} step=2822




2022-04-20 18:50.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185038/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.50 [info     ] FQE_20220420185038: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.0001533778317003365, 'time_algorithm_update': 0.0033494650599468186, 'loss': 0.004948165593107781, 'time_step': 0.0035735742155327856, 'init_value': -1.5392060279846191, 'ave_value': -0.9163137304823141, 'soft_opc': nan} step=2988




2022-04-20 18:50.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185038/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.51 [info     ] FQE_20220420185038: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00015899503087422935, 'time_algorithm_update': 0.0036393676895693124, 'loss': 0.0055825418571055115, 'time_step': 0.0038737345890826488, 'init_value': -1.6276198625564575, 'ave_value': -0.9766561182039614, 'soft_opc': nan} step=3154




2022-04-20 18:50.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185038/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.52 [info     ] FQE_20220420185038: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00015731748328151474, 'time_algorithm_update': 0.00350257287542504, 'loss': 0.005676341316023146, 'time_step': 0.003729893500546375, 'init_value': -1.7489869594573975, 'ave_value': -1.0438640507340835, 'soft_opc': nan} step=3320




2022-04-20 18:50.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185038/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.53 [info     ] FQE_20220420185038: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00015565286199730564, 'time_algorithm_update': 0.0034734384123101293, 'loss': 0.006144612738638219, 'time_step': 0.003700226186269737, 'init_value': -1.8028490543365479, 'ave_value': -1.0802035001601586, 'soft_opc': nan} step=3486




2022-04-20 18:50.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185038/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.53 [info     ] FQE_20220420185038: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.0001557950513908662, 'time_algorithm_update': 0.0033733184079089797, 'loss': 0.006820065721852638, 'time_step': 0.0036004264670682243, 'init_value': -1.9557255506515503, 'ave_value': -1.2152301961170124, 'soft_opc': nan} step=3652




2022-04-20 18:50.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185038/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.54 [info     ] FQE_20220420185038: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00015497638518551746, 'time_algorithm_update': 0.0035327917121979126, 'loss': 0.007231040490953903, 'time_step': 0.003762164747858622, 'init_value': -2.037557601928711, 'ave_value': -1.257987689659805, 'soft_opc': nan} step=3818




2022-04-20 18:50.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185038/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.55 [info     ] FQE_20220420185038: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00015354012868490564, 'time_algorithm_update': 0.003477201404341732, 'loss': 0.0077686194121467335, 'time_step': 0.0036991805915372916, 'init_value': -2.140528678894043, 'ave_value': -1.3391413825874527, 'soft_opc': nan} step=3984




2022-04-20 18:50.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185038/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.55 [info     ] FQE_20220420185038: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00015811891440885612, 'time_algorithm_update': 0.0035419205585158014, 'loss': 0.008437724957938295, 'time_step': 0.003774303987801793, 'init_value': -2.2125067710876465, 'ave_value': -1.386490999299798, 'soft_opc': nan} step=4150




2022-04-20 18:50.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185038/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.56 [info     ] FQE_20220420185038: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00015652036092367517, 'time_algorithm_update': 0.0035801450890230843, 'loss': 0.008703615415437394, 'time_step': 0.003810786339173834, 'init_value': -2.3327560424804688, 'ave_value': -1.467469321977901, 'soft_opc': nan} step=4316




2022-04-20 18:50.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185038/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.57 [info     ] FQE_20220420185038: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00015951926449695266, 'time_algorithm_update': 0.003524936825396067, 'loss': 0.009761590797437287, 'time_step': 0.003757091889898461, 'init_value': -2.341949939727783, 'ave_value': -1.4593745319260838, 'soft_opc': nan} step=4482




2022-04-20 18:50.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185038/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.57 [info     ] FQE_20220420185038: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00015700007059487952, 'time_algorithm_update': 0.0035670291946594975, 'loss': 0.010421454929606428, 'time_step': 0.0037969393902514354, 'init_value': -2.4551172256469727, 'ave_value': -1.5171737742966864, 'soft_opc': nan} step=4648




2022-04-20 18:50.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185038/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.58 [info     ] FQE_20220420185038: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00015560977430228727, 'time_algorithm_update': 0.0034299069140330853, 'loss': 0.01057072199883882, 'time_step': 0.0036590084972151792, 'init_value': -2.557237386703491, 'ave_value': -1.6265913676713837, 'soft_opc': nan} step=4814




2022-04-20 18:50.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185038/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.59 [info     ] FQE_20220420185038: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00016040256224482893, 'time_algorithm_update': 0.003525726766471403, 'loss': 0.010906891710074416, 'time_step': 0.0037558696356164403, 'init_value': -2.615394353866577, 'ave_value': -1.6440976876526365, 'soft_opc': nan} step=4980




2022-04-20 18:50.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185038/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:50.59 [info     ] FQE_20220420185038: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00015777134033570807, 'time_algorithm_update': 0.0034301209162516766, 'loss': 0.012588341594168759, 'time_step': 0.0036617933985698655, 'init_value': -2.680332899093628, 'ave_value': -1.684552482983752, 'soft_opc': nan} step=5146




2022-04-20 18:50.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185038/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:51.00 [info     ] FQE_20220420185038: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.0001567774508372847, 'time_algorithm_update': 0.003577535410961473, 'loss': 0.012599048736303237, 'time_step': 0.0038065996514745504, 'init_value': -2.757279634475708, 'ave_value': -1.702737726943215, 'soft_opc': nan} step=5312




2022-04-20 18:51.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185038/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:51.01 [info     ] FQE_20220420185038: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00015640546040362622, 'time_algorithm_update': 0.0033280102603406793, 'loss': 0.013132744558514601, 'time_step': 0.0035535312560667476, 'init_value': -2.820415496826172, 'ave_value': -1.7115411301332248, 'soft_opc': nan} step=5478




2022-04-20 18:51.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185038/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:51.01 [info     ] FQE_20220420185038: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.0001593569675123835, 'time_algorithm_update': 0.003567115370049534, 'loss': 0.014287738668092763, 'time_step': 0.003796959497842444, 'init_value': -2.974418878555298, 'ave_value': -1.8045073375634446, 'soft_opc': nan} step=5644




2022-04-20 18:51.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185038/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:51.02 [info     ] FQE_20220420185038: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00015968012522502118, 'time_algorithm_update': 0.003409984600113099, 'loss': 0.015385475687834108, 'time_step': 0.0036401145429496304, 'init_value': -3.038404703140259, 'ave_value': -1.8366612654078651, 'soft_opc': nan} step=5810




2022-04-20 18:51.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185038/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:51.03 [info     ] FQE_20220420185038: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00016044277742684605, 'time_algorithm_update': 0.0036144328404621906, 'loss': 0.01621735521174227, 'time_step': 0.0038507070886083395, 'init_value': -3.0690419673919678, 'ave_value': -1.830731862036696, 'soft_opc': nan} step=5976




2022-04-20 18:51.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185038/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:51.04 [info     ] FQE_20220420185038: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.0001628901585038886, 'time_algorithm_update': 0.0034700818808681994, 'loss': 0.017006471600938947, 'time_step': 0.0037092315145285733, 'init_value': -3.2380571365356445, 'ave_value': -1.918209220128352, 'soft_opc': nan} step=6142




2022-04-20 18:51.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185038/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:51.04 [info     ] FQE_20220420185038: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00015863309423607517, 'time_algorithm_update': 0.0035417611340442337, 'loss': 0.017895992639816803, 'time_step': 0.0037699320230139307, 'init_value': -3.2850871086120605, 'ave_value': -1.900830601959548, 'soft_opc': nan} step=6308




2022-04-20 18:51.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185038/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:51.05 [info     ] FQE_20220420185038: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00015655770359269107, 'time_algorithm_update': 0.003479344299040645, 'loss': 0.01987274669332385, 'time_step': 0.0037069507392056018, 'init_value': -3.4970552921295166, 'ave_value': -2.088038979262231, 'soft_opc': nan} step=6474




2022-04-20 18:51.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185038/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:51.06 [info     ] FQE_20220420185038: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00016004924314567842, 'time_algorithm_update': 0.0035141103238944546, 'loss': 0.020744213975365668, 'time_step': 0.003745284425206931, 'init_value': -3.598240852355957, 'ave_value': -2.1008093710325926, 'soft_opc': nan} step=6640




2022-04-20 18:51.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185038/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:51.06 [info     ] FQE_20220420185038: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00016259285340826195, 'time_algorithm_update': 0.00348893418369523, 'loss': 0.022075869781890863, 'time_step': 0.003724691379501159, 'init_value': -3.6827750205993652, 'ave_value': -2.1548530959599725, 'soft_opc': nan} step=6806




2022-04-20 18:51.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185038/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:51.07 [info     ] FQE_20220420185038: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.0001602072313607457, 'time_algorithm_update': 0.0035672762307776026, 'loss': 0.023209049499661, 'time_step': 0.003799933985055211, 'init_value': -3.797976016998291, 'ave_value': -2.208598069556326, 'soft_opc': nan} step=6972




2022-04-20 18:51.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185038/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:51.08 [info     ] FQE_20220420185038: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00015544604106121753, 'time_algorithm_update': 0.0033668035484222046, 'loss': 0.024284709450446564, 'time_step': 0.0035896732146481433, 'init_value': -3.9745423793792725, 'ave_value': -2.3209362514396745, 'soft_opc': nan} step=7138




2022-04-20 18:51.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185038/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:51.08 [info     ] FQE_20220420185038: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00016668187566550382, 'time_algorithm_update': 0.003634751561176346, 'loss': 0.02529289776246142, 'time_step': 0.0038750832339367234, 'init_value': -4.028508186340332, 'ave_value': -2.349978182803806, 'soft_opc': nan} step=7304




2022-04-20 18:51.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185038/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:51.09 [info     ] FQE_20220420185038: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00015967294394251812, 'time_algorithm_update': 0.0034577530550669476, 'loss': 0.026370856182997574, 'time_step': 0.0036856510553015284, 'init_value': -4.1145853996276855, 'ave_value': -2.370962773412802, 'soft_opc': nan} step=7470




2022-04-20 18:51.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185038/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:51.10 [info     ] FQE_20220420185038: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00016093110463705408, 'time_algorithm_update': 0.003493703991533762, 'loss': 0.027504995236655765, 'time_step': 0.0037285146943057874, 'init_value': -4.217879772186279, 'ave_value': -2.4057858475022487, 'soft_opc': nan} step=7636




2022-04-20 18:51.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185038/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:51.10 [info     ] FQE_20220420185038: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.0001605203352778791, 'time_algorithm_update': 0.003371630806520761, 'loss': 0.029349403210399472, 'time_step': 0.003603668097990105, 'init_value': -4.3849287033081055, 'ave_value': -2.515907946571305, 'soft_opc': nan} step=7802




2022-04-20 18:51.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185038/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:51.11 [info     ] FQE_20220420185038: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00015882842512015836, 'time_algorithm_update': 0.003524155501859734, 'loss': 0.03181182263988486, 'time_step': 0.0037581662097609186, 'init_value': -4.510931491851807, 'ave_value': -2.5902993239575647, 'soft_opc': nan} step=7968




2022-04-20 18:51.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185038/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:51.12 [info     ] FQE_20220420185038: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.000160409743527332, 'time_algorithm_update': 0.003545173679489687, 'loss': 0.032293564468869065, 'time_step': 0.0037821430757821323, 'init_value': -4.578798294067383, 'ave_value': -2.534427948750824, 'soft_opc': nan} step=8134




2022-04-20 18:51.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185038/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:51.12 [info     ] FQE_20220420185038: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.0001605447516383895, 'time_algorithm_update': 0.003511127219142684, 'loss': 0.0347814583163484, 'time_step': 0.003744356603507536, 'init_value': -4.718873500823975, 'ave_value': -2.635885237418283, 'soft_opc': nan} step=8300




2022-04-20 18:51.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185038/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

2022-04-20 18:51.13 [info     ] Directory is created at d3rlpy_logs/FQE_20220420185113
2022-04-20 18:51.13 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 18:51.13 [debug    ] Building models...
2022-04-20 18:51.13 [debug    ] Models have been built.
2022-04-20 18:51.13 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420185113/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 18:51.14 [info     ] FQE_20220420185113: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00016184324441954147, 'time_algorithm_update': 0.003479543120362038, 'loss': 0.02165552312887252, 'time_step': 0.0037158429622650146, 'init_value': -1.200583815574646, 'ave_value': -1.2116039031999069, 'soft_opc': nan} step=344




2022-04-20 18:51.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185113/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.16 [info     ] FQE_20220420185113: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00015882905139479527, 'time_algorithm_update': 0.003509031478748765, 'loss': 0.02109887398411195, 'time_step': 0.0037399960118670796, 'init_value': -2.1391491889953613, 'ave_value': -2.1299487446208256, 'soft_opc': nan} step=688




2022-04-20 18:51.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185113/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.17 [info     ] FQE_20220420185113: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.0001637880192246548, 'time_algorithm_update': 0.003495853307635285, 'loss': 0.025009509062307866, 'time_step': 0.003735459128091502, 'init_value': -3.277808666229248, 'ave_value': -3.2552015791336695, 'soft_opc': nan} step=1032




2022-04-20 18:51.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185113/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.18 [info     ] FQE_20220420185113: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00016122016795845918, 'time_algorithm_update': 0.0034844695135604503, 'loss': 0.026987828723111645, 'time_step': 0.003718728935995767, 'init_value': -4.122800827026367, 'ave_value': -4.122609229721465, 'soft_opc': nan} step=1376




2022-04-20 18:51.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185113/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.20 [info     ] FQE_20220420185113: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00016048827836679858, 'time_algorithm_update': 0.0035252938436907393, 'loss': 0.03362141836835288, 'time_step': 0.0037589974181596623, 'init_value': -5.194838523864746, 'ave_value': -5.226969131800505, 'soft_opc': nan} step=1720




2022-04-20 18:51.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185113/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.21 [info     ] FQE_20220420185113: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.0001629438511160917, 'time_algorithm_update': 0.0034362694551778395, 'loss': 0.03866420455580188, 'time_step': 0.003675704778626908, 'init_value': -5.660180568695068, 'ave_value': -5.755615827817101, 'soft_opc': nan} step=2064




2022-04-20 18:51.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185113/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.23 [info     ] FQE_20220420185113: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.0001611744248589804, 'time_algorithm_update': 0.003526604452798533, 'loss': 0.04693027119812837, 'time_step': 0.003765264915865521, 'init_value': -6.362447738647461, 'ave_value': -6.609528782131435, 'soft_opc': nan} step=2408




2022-04-20 18:51.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185113/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.24 [info     ] FQE_20220420185113: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00016568081323490588, 'time_algorithm_update': 0.0034747089064398476, 'loss': 0.05570174752041524, 'time_step': 0.0037161084108574445, 'init_value': -6.811062812805176, 'ave_value': -7.2082766958185145, 'soft_opc': nan} step=2752




2022-04-20 18:51.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185113/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.25 [info     ] FQE_20220420185113: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.0001625432524570199, 'time_algorithm_update': 0.0034876299458880757, 'loss': 0.06743302418536312, 'time_step': 0.003727486660314161, 'init_value': -7.3022661209106445, 'ave_value': -7.909891514278747, 'soft_opc': nan} step=3096




2022-04-20 18:51.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185113/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.27 [info     ] FQE_20220420185113: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00016192156215046727, 'time_algorithm_update': 0.0035277930803077166, 'loss': 0.08184848319425053, 'time_step': 0.0037672422653020816, 'init_value': -7.662656307220459, 'ave_value': -8.5652100583723, 'soft_opc': nan} step=3440




2022-04-20 18:51.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185113/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.28 [info     ] FQE_20220420185113: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00016445406647615655, 'time_algorithm_update': 0.003486628449240396, 'loss': 0.08997486233386363, 'time_step': 0.003721819367519645, 'init_value': -8.185286521911621, 'ave_value': -9.335834974050522, 'soft_opc': nan} step=3784




2022-04-20 18:51.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185113/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.30 [info     ] FQE_20220420185113: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00016536823538846747, 'time_algorithm_update': 0.0035392953905948373, 'loss': 0.10753961417520809, 'time_step': 0.003777369510295779, 'init_value': -8.56151294708252, 'ave_value': -10.050946932950536, 'soft_opc': nan} step=4128




2022-04-20 18:51.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185113/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.31 [info     ] FQE_20220420185113: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00016372494919355526, 'time_algorithm_update': 0.003514881743941196, 'loss': 0.1175740349409712, 'time_step': 0.003752135953237844, 'init_value': -9.04265022277832, 'ave_value': -10.754448182948002, 'soft_opc': nan} step=4472




2022-04-20 18:51.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185113/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.32 [info     ] FQE_20220420185113: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00016154868658198866, 'time_algorithm_update': 0.0035686451335285984, 'loss': 0.1313461405712418, 'time_step': 0.0038002161092536395, 'init_value': -9.780332565307617, 'ave_value': -11.810904623420388, 'soft_opc': nan} step=4816




2022-04-20 18:51.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185113/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.34 [info     ] FQE_20220420185113: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00016562120859013048, 'time_algorithm_update': 0.003523079461829607, 'loss': 0.14312349036769118, 'time_step': 0.0037660591824110164, 'init_value': -9.953184127807617, 'ave_value': -12.171782040166425, 'soft_opc': nan} step=5160




2022-04-20 18:51.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185113/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.35 [info     ] FQE_20220420185113: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00016414564709330714, 'time_algorithm_update': 0.00350654263829076, 'loss': 0.15622032261561863, 'time_step': 0.0037452633990797887, 'init_value': -10.727689743041992, 'ave_value': -13.21437666292663, 'soft_opc': nan} step=5504




2022-04-20 18:51.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185113/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.37 [info     ] FQE_20220420185113: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00016577853712924692, 'time_algorithm_update': 0.003444813711698665, 'loss': 0.17537231831594782, 'time_step': 0.003682737433633139, 'init_value': -10.606637001037598, 'ave_value': -13.372888643354983, 'soft_opc': nan} step=5848




2022-04-20 18:51.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185113/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.38 [info     ] FQE_20220420185113: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.0001623360223548357, 'time_algorithm_update': 0.003521716178849686, 'loss': 0.18109420688043154, 'time_step': 0.003761629032534222, 'init_value': -10.922683715820312, 'ave_value': -13.937288388636736, 'soft_opc': nan} step=6192




2022-04-20 18:51.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185113/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.39 [info     ] FQE_20220420185113: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.0001620504745217257, 'time_algorithm_update': 0.003465809101282164, 'loss': 0.19372241571004134, 'time_step': 0.0037030847959740217, 'init_value': -11.196762084960938, 'ave_value': -14.433590412139893, 'soft_opc': nan} step=6536




2022-04-20 18:51.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185113/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.41 [info     ] FQE_20220420185113: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.0001647312973820886, 'time_algorithm_update': 0.0035060761972915294, 'loss': 0.203911627713201, 'time_step': 0.0037430282249007116, 'init_value': -11.361551284790039, 'ave_value': -14.91597826948037, 'soft_opc': nan} step=6880




2022-04-20 18:51.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185113/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.42 [info     ] FQE_20220420185113: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.0001645774342292963, 'time_algorithm_update': 0.003457580887994101, 'loss': 0.2124552942276391, 'time_step': 0.003698170878166376, 'init_value': -11.503512382507324, 'ave_value': -15.458869036871034, 'soft_opc': nan} step=7224




2022-04-20 18:51.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185113/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.44 [info     ] FQE_20220420185113: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.0001634220744288245, 'time_algorithm_update': 0.003477571315543596, 'loss': 0.22077505652749435, 'time_step': 0.003716633763424186, 'init_value': -11.957517623901367, 'ave_value': -16.026549493219402, 'soft_opc': nan} step=7568




2022-04-20 18:51.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185113/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.45 [info     ] FQE_20220420185113: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00016319127969963607, 'time_algorithm_update': 0.0034786961799444156, 'loss': 0.23132169647900344, 'time_step': 0.0037121918312338895, 'init_value': -12.392790794372559, 'ave_value': -16.760390458316415, 'soft_opc': nan} step=7912




2022-04-20 18:51.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185113/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.46 [info     ] FQE_20220420185113: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.0001635398975638456, 'time_algorithm_update': 0.003528872201609057, 'loss': 0.24026842886408747, 'time_step': 0.00377047200535619, 'init_value': -12.391881942749023, 'ave_value': -16.94883498538722, 'soft_opc': nan} step=8256




2022-04-20 18:51.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185113/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.48 [info     ] FQE_20220420185113: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00016363900761271632, 'time_algorithm_update': 0.003479493911876235, 'loss': 0.2483559419904554, 'time_step': 0.003718171701874844, 'init_value': -12.441657066345215, 'ave_value': -17.2209165891817, 'soft_opc': nan} step=8600




2022-04-20 18:51.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185113/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.49 [info     ] FQE_20220420185113: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00016120076179504395, 'time_algorithm_update': 0.003479712924291921, 'loss': 0.258382678102312, 'time_step': 0.003712470448294351, 'init_value': -12.854059219360352, 'ave_value': -17.917030747489886, 'soft_opc': nan} step=8944




2022-04-20 18:51.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185113/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.51 [info     ] FQE_20220420185113: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00016947471818258596, 'time_algorithm_update': 0.003580997156542401, 'loss': 0.2571319500471704, 'time_step': 0.0038291326788968817, 'init_value': -12.722275733947754, 'ave_value': -17.949143230216997, 'soft_opc': nan} step=9288




2022-04-20 18:51.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185113/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.52 [info     ] FQE_20220420185113: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00016280870104944982, 'time_algorithm_update': 0.003453373908996582, 'loss': 0.26727222536993755, 'time_step': 0.0036904444528180497, 'init_value': -12.732423782348633, 'ave_value': -18.34957823449964, 'soft_opc': nan} step=9632




2022-04-20 18:51.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185113/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.53 [info     ] FQE_20220420185113: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00016710993855498558, 'time_algorithm_update': 0.0034867746885432756, 'loss': 0.2668370564013373, 'time_step': 0.0037311523459678474, 'init_value': -13.124631881713867, 'ave_value': -18.98176126673415, 'soft_opc': nan} step=9976




2022-04-20 18:51.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185113/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.55 [info     ] FQE_20220420185113: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00016455109729323277, 'time_algorithm_update': 0.0034808828387149545, 'loss': 0.2758925805778004, 'time_step': 0.003718963889188545, 'init_value': -13.307394027709961, 'ave_value': -19.512076793382835, 'soft_opc': nan} step=10320




2022-04-20 18:51.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185113/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.56 [info     ] FQE_20220420185113: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00016621586888335472, 'time_algorithm_update': 0.003567285315935002, 'loss': 0.28650737900915013, 'time_step': 0.0038067629170972245, 'init_value': -13.379345893859863, 'ave_value': -19.875599033392227, 'soft_opc': nan} step=10664




2022-04-20 18:51.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185113/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.58 [info     ] FQE_20220420185113: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00016475555508635765, 'time_algorithm_update': 0.0035464507202769436, 'loss': 0.28952640732262974, 'time_step': 0.0037868119949518247, 'init_value': -13.130083084106445, 'ave_value': -19.865263483438405, 'soft_opc': nan} step=11008




2022-04-20 18:51.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185113/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:51.59 [info     ] FQE_20220420185113: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00016795410666354868, 'time_algorithm_update': 0.003487993118374847, 'loss': 0.301571452362073, 'time_step': 0.0037318939386412156, 'init_value': -13.354532241821289, 'ave_value': -20.30672594716957, 'soft_opc': nan} step=11352




2022-04-20 18:51.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185113/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:52.00 [info     ] FQE_20220420185113: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.0001644117887630019, 'time_algorithm_update': 0.0032232147316600002, 'loss': 0.3101956106689867, 'time_step': 0.003462817779807157, 'init_value': -13.386990547180176, 'ave_value': -20.627141119257825, 'soft_opc': nan} step=11696




2022-04-20 18:52.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185113/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:52.01 [info     ] FQE_20220420185113: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.0001566444718560507, 'time_algorithm_update': 0.0021089294622110765, 'loss': 0.3234879748088979, 'time_step': 0.0023377260496450026, 'init_value': -13.209318161010742, 'ave_value': -20.75689891195512, 'soft_opc': nan} step=12040




2022-04-20 18:52.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185113/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:52.02 [info     ] FQE_20220420185113: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00015667566033296807, 'time_algorithm_update': 0.002129016920577648, 'loss': 0.33004921511205476, 'time_step': 0.0023568508236907247, 'init_value': -13.323651313781738, 'ave_value': -21.224515528695004, 'soft_opc': nan} step=12384




2022-04-20 18:52.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185113/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:52.03 [info     ] FQE_20220420185113: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00015660565952922023, 'time_algorithm_update': 0.0020922263001286706, 'loss': 0.35521501680662815, 'time_step': 0.0023199493108793747, 'init_value': -13.763914108276367, 'ave_value': -21.847496857997534, 'soft_opc': nan} step=12728




2022-04-20 18:52.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185113/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:52.04 [info     ] FQE_20220420185113: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.0001574720061102579, 'time_algorithm_update': 0.0021079411340314286, 'loss': 0.3767611960999581, 'time_step': 0.002337270697882009, 'init_value': -13.648574829101562, 'ave_value': -22.032816135963877, 'soft_opc': nan} step=13072




2022-04-20 18:52.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185113/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:52.05 [info     ] FQE_20220420185113: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.0001587354859640432, 'time_algorithm_update': 0.0021288055320118748, 'loss': 0.3842144680566826, 'time_step': 0.0023577774679938027, 'init_value': -14.127091407775879, 'ave_value': -22.70798590167149, 'soft_opc': nan} step=13416




2022-04-20 18:52.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185113/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:52.06 [info     ] FQE_20220420185113: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00015818518261576808, 'time_algorithm_update': 0.0021213445552559786, 'loss': 0.39992636170368207, 'time_step': 0.002350685208342796, 'init_value': -13.486288070678711, 'ave_value': -22.238527536888917, 'soft_opc': nan} step=13760




2022-04-20 18:52.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185113/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:52.07 [info     ] FQE_20220420185113: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.0001539199851280035, 'time_algorithm_update': 0.002105809921442076, 'loss': 0.40544297019301284, 'time_step': 0.0023321862830672155, 'init_value': -13.795228958129883, 'ave_value': -22.72574847728551, 'soft_opc': nan} step=14104




2022-04-20 18:52.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185113/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:52.07 [info     ] FQE_20220420185113: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00015707556591477504, 'time_algorithm_update': 0.0020963348621545834, 'loss': 0.424086268422692, 'time_step': 0.002328655054402906, 'init_value': -14.001521110534668, 'ave_value': -23.11669062386762, 'soft_opc': nan} step=14448




2022-04-20 18:52.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185113/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:52.08 [info     ] FQE_20220420185113: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00015770349391671114, 'time_algorithm_update': 0.0021308168422344118, 'loss': 0.4426393124144958, 'time_step': 0.0023594013480252997, 'init_value': -14.112651824951172, 'ave_value': -23.30826708738197, 'soft_opc': nan} step=14792




2022-04-20 18:52.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185113/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:52.09 [info     ] FQE_20220420185113: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00015638387480447458, 'time_algorithm_update': 0.0021004628303439117, 'loss': 0.45360748098383463, 'time_step': 0.0023300120996874434, 'init_value': -14.109654426574707, 'ave_value': -23.65414638347454, 'soft_opc': nan} step=15136




2022-04-20 18:52.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185113/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:52.10 [info     ] FQE_20220420185113: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.0001532781955807708, 'time_algorithm_update': 0.0020542428937069205, 'loss': 0.4723322127545036, 'time_step': 0.002279908851135609, 'init_value': -14.461003303527832, 'ave_value': -24.02151262801778, 'soft_opc': nan} step=15480




2022-04-20 18:52.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185113/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:52.11 [info     ] FQE_20220420185113: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00015771735546200774, 'time_algorithm_update': 0.0020814884540646577, 'loss': 0.48633482411157253, 'time_step': 0.002309078393980514, 'init_value': -14.781904220581055, 'ave_value': -24.618040598855092, 'soft_opc': nan} step=15824




2022-04-20 18:52.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185113/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:52.12 [info     ] FQE_20220420185113: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.0001546428647152213, 'time_algorithm_update': 0.002095105343086775, 'loss': 0.509362498531118, 'time_step': 0.0023219793341880623, 'init_value': -14.7559175491333, 'ave_value': -24.68368781524467, 'soft_opc': nan} step=16168




2022-04-20 18:52.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185113/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:52.13 [info     ] FQE_20220420185113: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00015531168427578238, 'time_algorithm_update': 0.0020583715549735136, 'loss': 0.5248065222390429, 'time_step': 0.0022856620855109637, 'init_value': -14.770401000976562, 'ave_value': -24.85930283028666, 'soft_opc': nan} step=16512




2022-04-20 18:52.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185113/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:52.14 [info     ] FQE_20220420185113: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00015518623729084813, 'time_algorithm_update': 0.0020913627258566923, 'loss': 0.5388373703706663, 'time_step': 0.002317908891411715, 'init_value': -14.957907676696777, 'ave_value': -25.116249942316397, 'soft_opc': nan} step=16856




2022-04-20 18:52.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185113/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:52.14 [info     ] FQE_20220420185113: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00015509890955547954, 'time_algorithm_update': 0.0020819410335185915, 'loss': 0.5653583526037373, 'time_step': 0.0023091546324796454, 'init_value': -15.286438941955566, 'ave_value': -25.47074169271567, 'soft_opc': nan} step=17200




2022-04-20 18:52.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185113/model_17200.pt
search iteration:  22
using hyper params:  [0.0038369999618399235, 0.00975765688829244, 4.103893594210427e-05, 3]
2022-04-20 18:52.14 [debug    ] RoundIterator is selected.
2022-04-20 18:52.14 [info     ] Directory is created at d3rlpy_logs/CQL_20220420185214
2022-04-20 18:52.14 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 18:52.14 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 18:52.14 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220420185214/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0038369999618399235, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'w

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:52.21 [info     ] CQL_20220420185214: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.0003343625375401904, 'time_algorithm_update': 0.017822065548590053, 'temp_loss': 4.57142816788969, 'temp': 0.9930694601689166, 'alpha_loss': -12.365806762237995, 'alpha': 1.0153031628034268, 'critic_loss': 23.403340147252667, 'actor_loss': 2.2286443392697133, 'time_step': 0.01825156825327734, 'td_error': 5.408149650574013, 'init_value': -7.271858215332031, 'ave_value': -3.893506821303687} step=342
2022-04-20 18:52.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185214/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:52.27 [info     ] CQL_20220420185214: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.00033027874796014086, 'time_algorithm_update': 0.01771993664970175, 'temp_loss': 3.358252111234163, 'temp': 0.9807382968434116, 'alpha_loss': -2.8956929806303995, 'alpha': 1.0335323211045293, 'critic_loss': 23.374754347996404, 'actor_loss': 6.8901284514812, 'time_step': 0.018148208222194026, 'td_error': 7.551042732828749, 'init_value': -12.828165054321289, 'ave_value': -6.85225768578415} step=684
2022-04-20 18:52.27 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185214/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:52.34 [info     ] CQL_20220420185214: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.0003287443640636422, 'time_algorithm_update': 0.017698966271696034, 'temp_loss': 2.4209507815321984, 'temp': 0.9706660143116064, 'alpha_loss': 2.660157753067494, 'alpha': 1.0328304115791767, 'critic_loss': 50.326276293972086, 'actor_loss': 11.400149130681802, 'time_step': 0.018127251786795275, 'td_error': 10.396714993889685, 'init_value': -20.16470718383789, 'ave_value': -9.793406938143947} step=1026
2022-04-20 18:52.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185214/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:52.40 [info     ] CQL_20220420185214: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.0003262256321154143, 'time_algorithm_update': 0.01763125609236154, 'temp_loss': 1.8337991903399864, 'temp': 0.9620187925665002, 'alpha_loss': 6.378030521130701, 'alpha': 1.0091471337435538, 'critic_loss': 94.08786713449578, 'actor_loss': 16.30202441466482, 'time_step': 0.018055470366227, 'td_error': 16.594334917597685, 'init_value': -27.296688079833984, 'ave_value': -13.855968229020233} step=1368
2022-04-20 18:52.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185214/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:52.47 [info     ] CQL_20220420185214: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.00032660068824277286, 'time_algorithm_update': 0.017722516729120622, 'temp_loss': 1.3526583807161676, 'temp': 0.9544376596721292, 'alpha_loss': 9.008418762195877, 'alpha': 0.9707902108716686, 'critic_loss': 144.9928280791344, 'actor_loss': 21.199624951122797, 'time_step': 0.01814583588761893, 'td_error': 27.812047161690266, 'init_value': -33.98271942138672, 'ave_value': -18.06271413613614} step=1710
2022-04-20 18:52.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185214/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:52.53 [info     ] CQL_20220420185214: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.0003291403341014483, 'time_algorithm_update': 0.017702690342016388, 'temp_loss': 1.0370036034643302, 'temp': 0.9479434364720395, 'alpha_loss': 10.48439263600355, 'alpha': 0.9293399742472241, 'critic_loss': 205.07906341552734, 'actor_loss': 26.356882039566486, 'time_step': 0.0181290943023057, 'td_error': 27.127313306987315, 'init_value': -40.84696578979492, 'ave_value': -21.645180777817426} step=2052
2022-04-20 18:52.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185214/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:53.00 [info     ] CQL_20220420185214: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.0003333091735839844, 'time_algorithm_update': 0.01779763949544806, 'temp_loss': 0.7090041407585492, 'temp': 0.9422884836182956, 'alpha_loss': 11.362479014703405, 'alpha': 0.8905089091487796, 'critic_loss': 266.0577209204958, 'actor_loss': 31.06493408359282, 'time_step': 0.01823104613008555, 'td_error': 34.629015793825786, 'init_value': -53.442840576171875, 'ave_value': -28.08922398282198} step=2394
2022-04-20 18:53.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185214/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:53.06 [info     ] CQL_20220420185214: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.00032647241625869485, 'time_algorithm_update': 0.01778564815632781, 'temp_loss': 0.5415483280711355, 'temp': 0.9377607398214396, 'alpha_loss': 11.856818807055378, 'alpha': 0.8547465287105381, 'critic_loss': 331.2313401071649, 'actor_loss': 36.0306078314084, 'time_step': 0.018210835624159427, 'td_error': 40.36083284380864, 'init_value': -59.518089294433594, 'ave_value': -31.893330532823864} step=2736
2022-04-20 18:53.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185214/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:53.13 [info     ] CQL_20220420185214: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.00032583663338109066, 'time_algorithm_update': 0.017801909418831096, 'temp_loss': 0.35318574379062095, 'temp': 0.9338136768480491, 'alpha_loss': 11.858074508912383, 'alpha': 0.8223367631435394, 'critic_loss': 385.953739723964, 'actor_loss': 40.25842479795043, 'time_step': 0.018228152342009963, 'td_error': 53.430641092130934, 'init_value': -69.5887451171875, 'ave_value': -35.428763326962255} step=3078
2022-04-20 18:53.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185214/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:53.19 [info     ] CQL_20220420185214: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.00033346602791234066, 'time_algorithm_update': 0.01788357196495547, 'temp_loss': 0.18073880143071475, 'temp': 0.931124786884464, 'alpha_loss': 12.373109248646518, 'alpha': 0.7919178158916228, 'critic_loss': 443.8553440919396, 'actor_loss': 44.811578360217354, 'time_step': 0.01831586946520889, 'td_error': 84.10302657429862, 'init_value': -78.14106750488281, 'ave_value': -39.64031944613803} step=3420
2022-04-20 18:53.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185214/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:53.25 [info     ] CQL_20220420185214: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.00033034706673426936, 'time_algorithm_update': 0.017794679479989393, 'temp_loss': 0.11118057631609733, 'temp': 0.9293860657173291, 'alpha_loss': 12.349775819053427, 'alpha': 0.7630659378411477, 'critic_loss': 508.97579902515076, 'actor_loss': 49.32155332509537, 'time_step': 0.018221180341397112, 'td_error': 83.91753221187378, 'init_value': -86.82561492919922, 'ave_value': -43.969651403313016} step=3762
2022-04-20 18:53.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185214/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:53.32 [info     ] CQL_20220420185214: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.00033199090009544327, 'time_algorithm_update': 0.01799647152772424, 'temp_loss': -0.02495306539104173, 'temp': 0.9288912267364257, 'alpha_loss': 12.59386500141077, 'alpha': 0.7358282956114987, 'critic_loss': 572.857242249606, 'actor_loss': 53.71817431533546, 'time_step': 0.018426071133529932, 'td_error': 104.80644571559431, 'init_value': -95.92509460449219, 'ave_value': -48.505498124173094} step=4104
2022-04-20 18:53.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185214/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:53.39 [info     ] CQL_20220420185214: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.0003326901218347382, 'time_algorithm_update': 0.017902457226089567, 'temp_loss': -0.10187016999926309, 'temp': 0.9300334270586047, 'alpha_loss': 12.558147770619533, 'alpha': 0.7100204252011595, 'critic_loss': 629.575343260291, 'actor_loss': 57.704102298669646, 'time_step': 0.018334615997403686, 'td_error': 104.26823089740441, 'init_value': -105.17900085449219, 'ave_value': -50.75633066656823} step=4446
2022-04-20 18:53.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185214/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:53.45 [info     ] CQL_20220420185214: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.00033028920491536457, 'time_algorithm_update': 0.017764195364121108, 'temp_loss': -0.09417823201034502, 'temp': 0.9318021861781851, 'alpha_loss': 12.453699077081959, 'alpha': 0.6856447857374336, 'critic_loss': 690.1361553348296, 'actor_loss': 61.736192714401156, 'time_step': 0.01819241395470692, 'td_error': 119.92524829395103, 'init_value': -110.1553726196289, 'ave_value': -53.69408468823712} step=4788
2022-04-20 18:53.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185214/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:53.51 [info     ] CQL_20220420185214: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.0003275264773452491, 'time_algorithm_update': 0.01768833433675487, 'temp_loss': -0.1748579055811578, 'temp': 0.9345015128802138, 'alpha_loss': 12.602079203254299, 'alpha': 0.6623767450538992, 'critic_loss': 740.7432468704313, 'actor_loss': 65.37364801328782, 'time_step': 0.018115678028753628, 'td_error': 157.64190425002852, 'init_value': -118.1373062133789, 'ave_value': -56.9160539317809} step=5130
2022-04-20 18:53.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185214/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:53.58 [info     ] CQL_20220420185214: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.00032888867004572995, 'time_algorithm_update': 0.01764989387222201, 'temp_loss': -0.22942928095723977, 'temp': 0.9391055211686251, 'alpha_loss': 12.777434316992062, 'alpha': 0.6398571260839875, 'critic_loss': 784.3565122370135, 'actor_loss': 68.60894971702531, 'time_step': 0.018076799069231715, 'td_error': 213.95841518712297, 'init_value': -128.60964965820312, 'ave_value': -61.022579624332685} step=5472
2022-04-20 18:53.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185214/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:54.04 [info     ] CQL_20220420185214: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.00032875830667060716, 'time_algorithm_update': 0.017802432266592283, 'temp_loss': -0.27823097229940674, 'temp': 0.9460608105213322, 'alpha_loss': 12.604386707495527, 'alpha': 0.6180368660137667, 'critic_loss': 825.2885319224575, 'actor_loss': 71.6895347617523, 'time_step': 0.018230570687188044, 'td_error': 281.224516308401, 'init_value': -136.87033081054688, 'ave_value': -64.407832602237} step=5814
2022-04-20 18:54.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185214/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:54.11 [info     ] CQL_20220420185214: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.00032698829271639995, 'time_algorithm_update': 0.01775818679764954, 'temp_loss': -0.2967754218465926, 'temp': 0.953940294639409, 'alpha_loss': 13.230134542922528, 'alpha': 0.5969732612894293, 'critic_loss': 867.106476722405, 'actor_loss': 74.79272582517032, 'time_step': 0.01817962788698966, 'td_error': 353.41651312829373, 'init_value': -143.78201293945312, 'ave_value': -66.42669521241038} step=6156
2022-04-20 18:54.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185214/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:54.17 [info     ] CQL_20220420185214: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.0003253716474388078, 'time_algorithm_update': 0.017729218243158353, 'temp_loss': -0.27073227222028534, 'temp': 0.9621823348148525, 'alpha_loss': 13.078856285552533, 'alpha': 0.5764921030454468, 'critic_loss': 917.6348052443119, 'actor_loss': 78.0603032809252, 'time_step': 0.01815046831878305, 'td_error': 470.42479901674716, 'init_value': -148.8442840576172, 'ave_value': -66.78975191717548} step=6498
2022-04-20 18:54.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185214/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:54.24 [info     ] CQL_20220420185214: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.0003282584642109118, 'time_algorithm_update': 0.017824713249652707, 'temp_loss': -0.28396586132676976, 'temp': 0.9701342615816329, 'alpha_loss': 13.105273126858718, 'alpha': 0.5571447619569232, 'critic_loss': 955.764272767898, 'actor_loss': 80.63323129129688, 'time_step': 0.018249343013205722, 'td_error': 497.4587181564477, 'init_value': -158.0897674560547, 'ave_value': -70.21532090599673} step=6840
2022-04-20 18:54.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185214/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:54.30 [info     ] CQL_20220420185214: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.00033021530909845004, 'time_algorithm_update': 0.01774272862930744, 'temp_loss': -0.3118761247078403, 'temp': 0.9791408433900242, 'alpha_loss': 13.599945025137293, 'alpha': 0.5380939130197492, 'critic_loss': 1000.0747810943782, 'actor_loss': 83.99745805082266, 'time_step': 0.01816963940335993, 'td_error': 144.75700458451504, 'init_value': -165.65908813476562, 'ave_value': -73.2474658301325} step=7182
2022-04-20 18:54.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185214/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:54.37 [info     ] CQL_20220420185214: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.0003279726407681292, 'time_algorithm_update': 0.017738814939532364, 'temp_loss': -0.30813550035746995, 'temp': 0.9885126984955972, 'alpha_loss': 13.373242743531165, 'alpha': 0.5201845745942746, 'critic_loss': 1049.5701120833905, 'actor_loss': 87.1787543045847, 'time_step': 0.01816252449102569, 'td_error': 706.9101244448547, 'init_value': -181.80648803710938, 'ave_value': -76.90233000629188} step=7524
2022-04-20 18:54.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185214/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:54.43 [info     ] CQL_20220420185214: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.0003250955838209007, 'time_algorithm_update': 0.017621950796473097, 'temp_loss': -0.2377574219646161, 'temp': 0.9971169506945805, 'alpha_loss': 12.990299822991354, 'alpha': 0.5026775351044728, 'critic_loss': 1093.984416047035, 'actor_loss': 89.72883960656952, 'time_step': 0.018046133699472885, 'td_error': 724.1857695895166, 'init_value': -186.43316650390625, 'ave_value': -76.2001384166165} step=7866
2022-04-20 18:54.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185214/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:54.50 [info     ] CQL_20220420185214: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.00033100934056510704, 'time_algorithm_update': 0.017773518785398606, 'temp_loss': -0.17340152554366497, 'temp': 1.003390291629479, 'alpha_loss': 11.925881006564314, 'alpha': 0.4873082771984457, 'critic_loss': 1140.9178593507288, 'actor_loss': 92.26185904608832, 'time_step': 0.018204672991880895, 'td_error': 561.282523049429, 'init_value': -198.45565795898438, 'ave_value': -80.67788136596683} step=8208
2022-04-20 18:54.50 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185214/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:54.56 [info     ] CQL_20220420185214: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.0003237884644179316, 'time_algorithm_update': 0.01756111711089374, 'temp_loss': -0.16207540235672777, 'temp': 1.0090050749611437, 'alpha_loss': 11.169406982890347, 'alpha': 0.4727142230111953, 'critic_loss': 1167.325820833619, 'actor_loss': 94.0433394448799, 'time_step': 0.01798012033540603, 'td_error': 846.2328335671075, 'init_value': -204.78805541992188, 'ave_value': -81.03357153175382} step=8550
2022-04-20 18:54.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185214/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:55.02 [info     ] CQL_20220420185214: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.00032668504101491115, 'time_algorithm_update': 0.017613895455299067, 'temp_loss': -0.09340036555863264, 'temp': 1.0132480417078698, 'alpha_loss': 11.424646947816102, 'alpha': 0.45862984047298544, 'critic_loss': 1191.264325415182, 'actor_loss': 95.9666523849755, 'time_step': 0.018038018405088903, 'td_error': 1180.1397379397206, 'init_value': -214.6997528076172, 'ave_value': -85.59310028895467} step=8892
2022-04-20 18:55.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185214/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:55.09 [info     ] CQL_20220420185214: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.0003258317534686529, 'time_algorithm_update': 0.017577007500051756, 'temp_loss': -0.11568279753912959, 'temp': 1.0172819205892016, 'alpha_loss': 9.915374969181261, 'alpha': 0.44503826024936655, 'critic_loss': 1211.7760673656799, 'actor_loss': 97.64228823031598, 'time_step': 0.018001923784177903, 'td_error': 126.88990995813738, 'init_value': -208.8860626220703, 'ave_value': -84.86758664994962} step=9234
2022-04-20 18:55.09 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185214/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:55.15 [info     ] CQL_20220420185214: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.0003280458394546955, 'time_algorithm_update': 0.017679552585757964, 'temp_loss': -0.07406777763270844, 'temp': 1.0205296908902843, 'alpha_loss': 8.10734024033909, 'alpha': 0.43437982947505704, 'critic_loss': 1220.5918987787259, 'actor_loss': 98.5983739038657, 'time_step': 0.018105339585689075, 'td_error': 118.18236042669051, 'init_value': -217.7913360595703, 'ave_value': -86.0253600035346} step=9576
2022-04-20 18:55.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185214/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:55.22 [info     ] CQL_20220420185214: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.0003349948347660533, 'time_algorithm_update': 0.017978951247812014, 'temp_loss': -0.06702253455568476, 'temp': 1.0235917449694627, 'alpha_loss': 6.9378856315947415, 'alpha': 0.42429173872833365, 'critic_loss': 1228.6178572805304, 'actor_loss': 99.17169113605343, 'time_step': 0.018410640850401762, 'td_error': 107.27889837753325, 'init_value': -218.66421508789062, 'ave_value': -86.72127746475978} step=9918
2022-04-20 18:55.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185214/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:55.28 [info     ] CQL_20220420185214: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.0003270538229691355, 'time_algorithm_update': 0.017776786235340854, 'temp_loss': -0.02501834894132893, 'temp': 1.025284645153068, 'alpha_loss': 6.653881556806509, 'alpha': 0.4146645945763727, 'critic_loss': 1235.346488729555, 'actor_loss': 100.35123845150596, 'time_step': 0.018199857912565533, 'td_error': 91.91975297448083, 'init_value': -223.33309936523438, 'ave_value': -87.16852356589861} step=10260
2022-04-20 18:55.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185214/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:55.35 [info     ] CQL_20220420185214: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.0003275815506427609, 'time_algorithm_update': 0.017858027714734884, 'temp_loss': -0.03420881563198497, 'temp': 1.0260894333409984, 'alpha_loss': 6.574706098489594, 'alpha': 0.40467357801066506, 'critic_loss': 1242.3208775213586, 'actor_loss': 101.10508879722907, 'time_step': 0.01828209698548791, 'td_error': 94.93088031673301, 'init_value': -220.75283813476562, 'ave_value': -88.99894729103457} step=10602
2022-04-20 18:55.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185214/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:55.41 [info     ] CQL_20220420185214: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.0003325548785471777, 'time_algorithm_update': 0.017822542385748256, 'temp_loss': -0.008987133636286384, 'temp': 1.0266396295257478, 'alpha_loss': 6.436893649965699, 'alpha': 0.3943168620268504, 'critic_loss': 1251.8576044450726, 'actor_loss': 102.1145085340355, 'time_step': 0.018254736013579787, 'td_error': 91.53059834656477, 'init_value': -225.3622589111328, 'ave_value': -89.29854324899345} step=10944
2022-04-20 18:55.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185214/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:55.47 [info     ] CQL_20220420185214: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.000299416090312757, 'time_algorithm_update': 0.01594934686582688, 'temp_loss': -0.02610810587934235, 'temp': 1.0276768385318287, 'alpha_loss': 6.183462422493606, 'alpha': 0.3838275310240294, 'critic_loss': 1259.209746243661, 'actor_loss': 102.88948859945376, 'time_step': 0.01633808278200919, 'td_error': 91.4328236555675, 'init_value': -223.7528076171875, 'ave_value': -88.61022536100468} step=11286
2022-04-20 18:55.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185214/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:55.53 [info     ] CQL_20220420185214: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.0003285909953870271, 'time_algorithm_update': 0.017690759653236434, 'temp_loss': 0.003373380935592958, 'temp': 1.0274785635067007, 'alpha_loss': 6.167488266850076, 'alpha': 0.3732495704415249, 'critic_loss': 1272.5155893069261, 'actor_loss': 103.99498079534163, 'time_step': 0.01811742434027599, 'td_error': 87.77118197596995, 'init_value': -225.29574584960938, 'ave_value': -91.56248770599899} step=11628
2022-04-20 18:55.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185214/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:55.59 [info     ] CQL_20220420185214: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.0002943967518053557, 'time_algorithm_update': 0.015782692976165236, 'temp_loss': 0.005518108617231163, 'temp': 1.0279296702808804, 'alpha_loss': 5.965609287657933, 'alpha': 0.3626022446050979, 'critic_loss': 1274.8681431820519, 'actor_loss': 104.67765324018156, 'time_step': 0.016161323290819315, 'td_error': 86.02732382710415, 'init_value': -219.59765625, 'ave_value': -89.44813136136881} step=11970
2022-04-20 18:55.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185214/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:56.06 [info     ] CQL_20220420185214: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.000330853183367099, 'time_algorithm_update': 0.01777612326437967, 'temp_loss': 0.053449201005461966, 'temp': 1.026641387339921, 'alpha_loss': 5.794086955444158, 'alpha': 0.352191956768259, 'critic_loss': 1280.8320323207922, 'actor_loss': 105.19067405120671, 'time_step': 0.01820561132932964, 'td_error': 81.36806540021851, 'init_value': -222.90933227539062, 'ave_value': -90.79718780415016} step=12312
2022-04-20 18:56.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185214/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:56.12 [info     ] CQL_20220420185214: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.0003339965441073591, 'time_algorithm_update': 0.017966663628293758, 'temp_loss': 0.03809187098335453, 'temp': 1.024366349504705, 'alpha_loss': 5.669509296528777, 'alpha': 0.3419013118360475, 'critic_loss': 1285.8596716094435, 'actor_loss': 105.79477203101443, 'time_step': 0.018399912711472538, 'td_error': 80.86927906267694, 'init_value': -223.5310516357422, 'ave_value': -92.75177402308262} step=12654
2022-04-20 18:56.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185214/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:56.19 [info     ] CQL_20220420185214: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.0003303080274347673, 'time_algorithm_update': 0.017933251564962824, 'temp_loss': 0.047533215998470435, 'temp': 1.0229655549540158, 'alpha_loss': 5.522561129770781, 'alpha': 0.3316587033676125, 'critic_loss': 1289.0829614115041, 'actor_loss': 106.29207276461418, 'time_step': 0.018361399745383457, 'td_error': 84.93603167824752, 'init_value': -220.6627960205078, 'ave_value': -93.07949553400982} step=12996
2022-04-20 18:56.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185214/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:56.25 [info     ] CQL_20220420185214: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.0003334848504317434, 'time_algorithm_update': 0.017921108251426652, 'temp_loss': 0.073980152852049, 'temp': 1.0209395292906733, 'alpha_loss': 5.427761510798805, 'alpha': 0.32165576874861246, 'critic_loss': 1290.6293106525266, 'actor_loss': 106.58051197431242, 'time_step': 0.018354298775656183, 'td_error': 72.78824739014603, 'init_value': -217.10757446289062, 'ave_value': -92.36177513224852} step=13338
2022-04-20 18:56.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185214/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:56.32 [info     ] CQL_20220420185214: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.0003307758018984432, 'time_algorithm_update': 0.017988229355616878, 'temp_loss': 0.06050696301312126, 'temp': 1.0184592859089723, 'alpha_loss': 5.190228999009606, 'alpha': 0.31200274380675536, 'critic_loss': 1291.5270264385736, 'actor_loss': 106.81976057353772, 'time_step': 0.018417099763078298, 'td_error': 76.76185109988153, 'init_value': -213.7270050048828, 'ave_value': -91.77788828808781} step=13680
2022-04-20 18:56.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185214/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:56.38 [info     ] CQL_20220420185214: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.000340578151725189, 'time_algorithm_update': 0.01785708658876475, 'temp_loss': 0.12047578699407521, 'temp': 1.013896512009247, 'alpha_loss': 5.072265692621643, 'alpha': 0.3026370488935047, 'critic_loss': 1292.9340736433776, 'actor_loss': 107.0866131029631, 'time_step': 0.018298265529654877, 'td_error': 76.05537676933243, 'init_value': -215.98959350585938, 'ave_value': -93.56752791743726} step=14022
2022-04-20 18:56.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185214/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:56.45 [info     ] CQL_20220420185214: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.00033263365427652997, 'time_algorithm_update': 0.01783580389636302, 'temp_loss': 0.09115285692159196, 'temp': 1.0090279833615174, 'alpha_loss': 4.901910240190071, 'alpha': 0.29351273283623813, 'critic_loss': 1294.3767146952669, 'actor_loss': 107.47603825797812, 'time_step': 0.018266834013643322, 'td_error': 75.05614141618595, 'init_value': -215.9343719482422, 'ave_value': -94.45066676877741} step=14364
2022-04-20 18:56.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185214/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:56.51 [info     ] CQL_20220420185214: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.0003275250830845526, 'time_algorithm_update': 0.017841199685258474, 'temp_loss': 0.09223496645405801, 'temp': 1.0056912787476477, 'alpha_loss': 4.865228464728908, 'alpha': 0.2844939654507832, 'critic_loss': 1292.5790794327943, 'actor_loss': 107.62791773032026, 'time_step': 0.01826501798908613, 'td_error': 81.26682649291922, 'init_value': -214.1334686279297, 'ave_value': -94.87856946387538} step=14706
2022-04-20 18:56.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185214/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:56.58 [info     ] CQL_20220420185214: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.000331987414443702, 'time_algorithm_update': 0.017718203583656, 'temp_loss': 0.06741214972207246, 'temp': 1.0019815417758204, 'alpha_loss': 4.691667901145087, 'alpha': 0.27579740805235525, 'critic_loss': 1290.9597412466305, 'actor_loss': 107.78328095820912, 'time_step': 0.018149160502249736, 'td_error': 73.52826306048767, 'init_value': -215.6545867919922, 'ave_value': -96.8927422576532} step=15048
2022-04-20 18:56.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185214/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:57.04 [info     ] CQL_20220420185214: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.00033153427971733943, 'time_algorithm_update': 0.017906927923012895, 'temp_loss': 0.09758435959290516, 'temp': 0.9984779584477519, 'alpha_loss': 4.534604390462239, 'alpha': 0.2674566612780443, 'critic_loss': 1291.052438479418, 'actor_loss': 107.9063332540947, 'time_step': 0.018339502881144918, 'td_error': 69.9592226590806, 'init_value': -211.03671264648438, 'ave_value': -94.60325169833595} step=15390
2022-04-20 18:57.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185214/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:57.11 [info     ] CQL_20220420185214: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.0003322216502407141, 'time_algorithm_update': 0.017901402467872665, 'temp_loss': 0.1321340871005379, 'temp': 0.994044325853649, 'alpha_loss': 4.379813487069649, 'alpha': 0.25931664647763236, 'critic_loss': 1292.4376734683387, 'actor_loss': 108.16946600752267, 'time_step': 0.018327015882347062, 'td_error': 66.94423730935405, 'init_value': -202.29249572753906, 'ave_value': -92.47421837026404} step=15732
2022-04-20 18:57.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185214/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:57.17 [info     ] CQL_20220420185214: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.00033170925943475023, 'time_algorithm_update': 0.018014514655397648, 'temp_loss': 0.12665730990382315, 'temp': 0.9891915882540028, 'alpha_loss': 4.125708395277548, 'alpha': 0.25162022791759314, 'critic_loss': 1291.9264612030565, 'actor_loss': 108.37257302713672, 'time_step': 0.01844406267355757, 'td_error': 71.22755323337377, 'init_value': -206.07534790039062, 'ave_value': -94.85203778337554} step=16074
2022-04-20 18:57.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185214/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:57.24 [info     ] CQL_20220420185214: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.0003280263198049445, 'time_algorithm_update': 0.018991998761718035, 'temp_loss': 0.1505266301148729, 'temp': 0.9841064099679914, 'alpha_loss': 4.030362850741336, 'alpha': 0.2441603789726893, 'critic_loss': 1291.9133017021313, 'actor_loss': 108.51867756090667, 'time_step': 0.0194186230151974, 'td_error': 77.55537076026948, 'init_value': -200.7816619873047, 'ave_value': -93.40683746226266} step=16416
2022-04-20 18:57.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185214/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:57.31 [info     ] CQL_20220420185214: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.00033601613072623985, 'time_algorithm_update': 0.018030695747910885, 'temp_loss': 0.14773073342590654, 'temp': 0.9774392879845804, 'alpha_loss': 3.904728637801276, 'alpha': 0.23681773014410198, 'critic_loss': 1291.6879179658945, 'actor_loss': 108.63201223897656, 'time_step': 0.01846736634683888, 'td_error': 74.52502770825845, 'init_value': -202.00782775878906, 'ave_value': -94.81817384832581} step=16758
2022-04-20 18:57.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185214/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:57.37 [info     ] CQL_20220420185214: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.00033597987994813083, 'time_algorithm_update': 0.017936875248513028, 'temp_loss': 0.1693712657382875, 'temp': 0.9714081104387317, 'alpha_loss': 3.7553786227577612, 'alpha': 0.22974427767664368, 'critic_loss': 1287.7875912314967, 'actor_loss': 108.52315550240857, 'time_step': 0.018375108813681797, 'td_error': 68.68350790758147, 'init_value': -200.13304138183594, 'ave_value': -95.31849496329959} step=17100
2022-04-20 18:57.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185214/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 18:57.38 [info     ] FQE_20220420185737: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00015819503600338856, 'time_algorithm_update': 0.0020871420940720893, 'loss': 0.0075951760539403525, 'time_step': 0.0023149941340986505, 'init_value': -0.5576121211051941, 'ave_value': -0.5336676145928937, 'soft_opc': nan} step=166




2022-04-20 18:57.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185737/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:57.38 [info     ] FQE_20220420185737: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00015533688556717103, 'time_algorithm_update': 0.0021011053797710373, 'loss': 0.005949463259080896, 'time_step': 0.002327025654804276, 'init_value': -0.6782310605049133, 'ave_value': -0.6126594988880931, 'soft_opc': nan} step=332




2022-04-20 18:57.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185737/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:57.39 [info     ] FQE_20220420185737: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.000154604394751859, 'time_algorithm_update': 0.002080918794654938, 'loss': 0.005384682659452489, 'time_step': 0.002303384872804205, 'init_value': -0.6964214444160461, 'ave_value': -0.6194024668284902, 'soft_opc': nan} step=498




2022-04-20 18:57.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185737/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:57.39 [info     ] FQE_20220420185737: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00015506112431905357, 'time_algorithm_update': 0.002063370612730463, 'loss': 0.005255019954259855, 'time_step': 0.002287208315837814, 'init_value': -0.747897744178772, 'ave_value': -0.6459436465155434, 'soft_opc': nan} step=664




2022-04-20 18:57.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185737/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:57.40 [info     ] FQE_20220420185737: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00015388195773205124, 'time_algorithm_update': 0.0021006342876388365, 'loss': 0.005060780158053231, 'time_step': 0.002322620656116899, 'init_value': -0.8094295263290405, 'ave_value': -0.6853975375761857, 'soft_opc': nan} step=830




2022-04-20 18:57.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185737/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:57.40 [info     ] FQE_20220420185737: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00015595734837543532, 'time_algorithm_update': 0.002102741275925234, 'loss': 0.00482791579322867, 'time_step': 0.0023255994520991682, 'init_value': -0.8032512664794922, 'ave_value': -0.6585682342852558, 'soft_opc': nan} step=996




2022-04-20 18:57.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185737/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:57.40 [info     ] FQE_20220420185737: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00015432863350374153, 'time_algorithm_update': 0.002076026905013854, 'loss': 0.00469248726296362, 'time_step': 0.002302067825593144, 'init_value': -0.8415704369544983, 'ave_value': -0.660028283136922, 'soft_opc': nan} step=1162




2022-04-20 18:57.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185737/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:57.41 [info     ] FQE_20220420185737: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00015500511031552968, 'time_algorithm_update': 0.0020166592425610646, 'loss': 0.004536503855789821, 'time_step': 0.002240270017141319, 'init_value': -0.8808188438415527, 'ave_value': -0.6907649184535215, 'soft_opc': nan} step=1328




2022-04-20 18:57.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185737/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:57.41 [info     ] FQE_20220420185737: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.0001538517963455384, 'time_algorithm_update': 0.002082184136631977, 'loss': 0.0042874074095007346, 'time_step': 0.0023022602839642262, 'init_value': -0.9110926389694214, 'ave_value': -0.6897842506299148, 'soft_opc': nan} step=1494




2022-04-20 18:57.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185737/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:57.42 [info     ] FQE_20220420185737: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00015513580965708537, 'time_algorithm_update': 0.0021094327949615845, 'loss': 0.004170543260322547, 'time_step': 0.0023357437317629895, 'init_value': -0.9840846061706543, 'ave_value': -0.7247672223158785, 'soft_opc': nan} step=1660




2022-04-20 18:57.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185737/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:57.42 [info     ] FQE_20220420185737: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00015847654227750847, 'time_algorithm_update': 0.002093210277787174, 'loss': 0.004093345638269177, 'time_step': 0.0023209028933421673, 'init_value': -1.0623242855072021, 'ave_value': -0.7611697427659958, 'soft_opc': nan} step=1826




2022-04-20 18:57.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185737/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:57.43 [info     ] FQE_20220420185737: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00015380583613751884, 'time_algorithm_update': 0.0021162420870309852, 'loss': 0.004144493817261992, 'time_step': 0.0023380446146769695, 'init_value': -1.0758577585220337, 'ave_value': -0.7564356525739034, 'soft_opc': nan} step=1992




2022-04-20 18:57.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185737/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:57.43 [info     ] FQE_20220420185737: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00015533688556717103, 'time_algorithm_update': 0.002092255167214267, 'loss': 0.004245667914012784, 'time_step': 0.002315549965364387, 'init_value': -1.1508017778396606, 'ave_value': -0.8019514965648586, 'soft_opc': nan} step=2158




2022-04-20 18:57.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185737/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:57.43 [info     ] FQE_20220420185737: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.000159196106784315, 'time_algorithm_update': 0.0021228976996548205, 'loss': 0.004184142006168434, 'time_step': 0.0023522980241890415, 'init_value': -1.228009819984436, 'ave_value': -0.8226496981742146, 'soft_opc': nan} step=2324




2022-04-20 18:57.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185737/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:57.44 [info     ] FQE_20220420185737: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00015767511115016708, 'time_algorithm_update': 0.0020881431648530156, 'loss': 0.004291893428343023, 'time_step': 0.002315492515104363, 'init_value': -1.2799491882324219, 'ave_value': -0.8487076636235993, 'soft_opc': nan} step=2490




2022-04-20 18:57.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185737/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:57.44 [info     ] FQE_20220420185737: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00015469057014189572, 'time_algorithm_update': 0.002119862889669028, 'loss': 0.004538264110507668, 'time_step': 0.002344352653227657, 'init_value': -1.3269157409667969, 'ave_value': -0.8637561090491913, 'soft_opc': nan} step=2656




2022-04-20 18:57.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185737/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:57.45 [info     ] FQE_20220420185737: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.0001559185694499188, 'time_algorithm_update': 0.0021203153104667203, 'loss': 0.004777839417844248, 'time_step': 0.002344233443938106, 'init_value': -1.4376049041748047, 'ave_value': -0.9184855056238604, 'soft_opc': nan} step=2822




2022-04-20 18:57.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185737/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:57.45 [info     ] FQE_20220420185737: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00015989412744361233, 'time_algorithm_update': 0.002132359757480851, 'loss': 0.005298967004041131, 'time_step': 0.002364231879452625, 'init_value': -1.479723572731018, 'ave_value': -0.9357886096714316, 'soft_opc': nan} step=2988




2022-04-20 18:57.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185737/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:57.46 [info     ] FQE_20220420185737: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.000153757003416498, 'time_algorithm_update': 0.0020974055830254613, 'loss': 0.0057026656331909615, 'time_step': 0.0023226292736559027, 'init_value': -1.5510700941085815, 'ave_value': -0.9706144901009294, 'soft_opc': nan} step=3154




2022-04-20 18:57.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185737/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:57.46 [info     ] FQE_20220420185737: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.0001599372151386307, 'time_algorithm_update': 0.0020977675196636155, 'loss': 0.005960065130907369, 'time_step': 0.0023274378604199513, 'init_value': -1.6153202056884766, 'ave_value': -0.9928596488862961, 'soft_opc': nan} step=3320




2022-04-20 18:57.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185737/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:57.47 [info     ] FQE_20220420185737: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00015774405146219643, 'time_algorithm_update': 0.0021079319069184452, 'loss': 0.00666720216745982, 'time_step': 0.0023318629666983365, 'init_value': -1.6775472164154053, 'ave_value': -1.058697642285276, 'soft_opc': nan} step=3486




2022-04-20 18:57.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185737/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:57.47 [info     ] FQE_20220420185737: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.000158348715448954, 'time_algorithm_update': 0.0020972562123493976, 'loss': 0.007251346009048487, 'time_step': 0.0023236174181283237, 'init_value': -1.76558256149292, 'ave_value': -1.1255640582995372, 'soft_opc': nan} step=3652




2022-04-20 18:57.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185737/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:57.47 [info     ] FQE_20220420185737: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00015307909034820925, 'time_algorithm_update': 0.002066632351243352, 'loss': 0.007439395902308086, 'time_step': 0.0022896714957363635, 'init_value': -1.8440841436386108, 'ave_value': -1.1839721532689558, 'soft_opc': nan} step=3818




2022-04-20 18:57.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185737/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:57.48 [info     ] FQE_20220420185737: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00015523778386862882, 'time_algorithm_update': 0.002109471573887101, 'loss': 0.0075386090797417045, 'time_step': 0.0023364245173442795, 'init_value': -1.866796851158142, 'ave_value': -1.1760064413888498, 'soft_opc': nan} step=3984




2022-04-20 18:57.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185737/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:57.48 [info     ] FQE_20220420185737: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00015615698802902038, 'time_algorithm_update': 0.0020976195852440522, 'loss': 0.008591241889117083, 'time_step': 0.002327557069709502, 'init_value': -1.9237887859344482, 'ave_value': -1.2217864738726938, 'soft_opc': nan} step=4150




2022-04-20 18:57.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185737/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:57.49 [info     ] FQE_20220420185737: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00015406867107713078, 'time_algorithm_update': 0.0020947772336293415, 'loss': 0.008968775628375855, 'time_step': 0.0023205696818340256, 'init_value': -1.9715895652770996, 'ave_value': -1.2615389038447862, 'soft_opc': nan} step=4316




2022-04-20 18:57.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185737/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:57.49 [info     ] FQE_20220420185737: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00015535842941468022, 'time_algorithm_update': 0.0020715903086834645, 'loss': 0.009268628096022835, 'time_step': 0.002296203590301146, 'init_value': -1.9936254024505615, 'ave_value': -1.2742040793772216, 'soft_opc': nan} step=4482




2022-04-20 18:57.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185737/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:57.50 [info     ] FQE_20220420185737: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00015716236757944865, 'time_algorithm_update': 0.002076176275689918, 'loss': 0.010380750583030328, 'time_step': 0.002300124570547816, 'init_value': -2.080357551574707, 'ave_value': -1.3396333506515434, 'soft_opc': nan} step=4648




2022-04-20 18:57.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185737/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:57.50 [info     ] FQE_20220420185737: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00015540869839220163, 'time_algorithm_update': 0.00208050084401326, 'loss': 0.010497517002948436, 'time_step': 0.002299038760633354, 'init_value': -2.1572656631469727, 'ave_value': -1.3904445508176142, 'soft_opc': nan} step=4814




2022-04-20 18:57.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185737/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:57.50 [info     ] FQE_20220420185737: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.0001604700663003577, 'time_algorithm_update': 0.002144592354096562, 'loss': 0.01105941543622077, 'time_step': 0.002375134502548769, 'init_value': -2.181757926940918, 'ave_value': -1.39666623369799, 'soft_opc': nan} step=4980




2022-04-20 18:57.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185737/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:57.51 [info     ] FQE_20220420185737: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.0001531595207122435, 'time_algorithm_update': 0.0020774875778749764, 'loss': 0.011866448384692824, 'time_step': 0.0023027055234794156, 'init_value': -2.2632689476013184, 'ave_value': -1.4789279906062391, 'soft_opc': nan} step=5146




2022-04-20 18:57.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185737/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:57.51 [info     ] FQE_20220420185737: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00015683346484080855, 'time_algorithm_update': 0.002079460994306817, 'loss': 0.011890613591068435, 'time_step': 0.002303413597934217, 'init_value': -2.322155475616455, 'ave_value': -1.5219344693798202, 'soft_opc': nan} step=5312




2022-04-20 18:57.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185737/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:57.52 [info     ] FQE_20220420185737: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00015941298151590737, 'time_algorithm_update': 0.002109990062483822, 'loss': 0.012905968646995768, 'time_step': 0.0023399691983877896, 'init_value': -2.4281160831451416, 'ave_value': -1.6216471381821074, 'soft_opc': nan} step=5478




2022-04-20 18:57.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185737/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:57.52 [info     ] FQE_20220420185737: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.0001532313335372741, 'time_algorithm_update': 0.0020109429416886294, 'loss': 0.013797444480061755, 'time_step': 0.0022346671805324324, 'init_value': -2.42755126953125, 'ave_value': -1.6165782939743352, 'soft_opc': nan} step=5644




2022-04-20 18:57.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185737/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:57.53 [info     ] FQE_20220420185737: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00015632646629609257, 'time_algorithm_update': 0.002041170396000506, 'loss': 0.014252214503884092, 'time_step': 0.002267547400600939, 'init_value': -2.5563807487487793, 'ave_value': -1.7201607375934318, 'soft_opc': nan} step=5810




2022-04-20 18:57.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185737/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:57.53 [info     ] FQE_20220420185737: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00015361625027943807, 'time_algorithm_update': 0.0020727824015789723, 'loss': 0.015108420918218747, 'time_step': 0.0022936958864510776, 'init_value': -2.522798776626587, 'ave_value': -1.6750796397661303, 'soft_opc': nan} step=5976




2022-04-20 18:57.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185737/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:57.53 [info     ] FQE_20220420185737: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00015199471669024732, 'time_algorithm_update': 0.0020233421440584115, 'loss': 0.015442769700964263, 'time_step': 0.002243294773331608, 'init_value': -2.503108263015747, 'ave_value': -1.6424209305966222, 'soft_opc': nan} step=6142




2022-04-20 18:57.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185737/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:57.54 [info     ] FQE_20220420185737: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00015407872487263507, 'time_algorithm_update': 0.0020564751452710256, 'loss': 0.016095548685842072, 'time_step': 0.0022796334990535876, 'init_value': -2.5837464332580566, 'ave_value': -1.6952336265428647, 'soft_opc': nan} step=6308




2022-04-20 18:57.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185737/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:57.54 [info     ] FQE_20220420185737: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00015945606921092574, 'time_algorithm_update': 0.0021306491759886226, 'loss': 0.016641884272166313, 'time_step': 0.002358940710504371, 'init_value': -2.6096582412719727, 'ave_value': -1.7124578272302946, 'soft_opc': nan} step=6474




2022-04-20 18:57.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185737/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:57.55 [info     ] FQE_20220420185737: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.0001567070742687547, 'time_algorithm_update': 0.002108188996832055, 'loss': 0.01774216757902135, 'time_step': 0.0023354335003588573, 'init_value': -2.624635696411133, 'ave_value': -1.7401000281913324, 'soft_opc': nan} step=6640




2022-04-20 18:57.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185737/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:57.55 [info     ] FQE_20220420185737: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00015761335212064077, 'time_algorithm_update': 0.002101343798350139, 'loss': 0.01860126270071031, 'time_step': 0.002330073391098574, 'init_value': -2.6699910163879395, 'ave_value': -1.7690043371002953, 'soft_opc': nan} step=6806




2022-04-20 18:57.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185737/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:57.56 [info     ] FQE_20220420185737: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00015707331967641073, 'time_algorithm_update': 0.0020944655659687087, 'loss': 0.019253481574306064, 'time_step': 0.0023183248129235693, 'init_value': -2.728513240814209, 'ave_value': -1.8185605248620917, 'soft_opc': nan} step=6972




2022-04-20 18:57.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185737/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:57.56 [info     ] FQE_20220420185737: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00015263385083301957, 'time_algorithm_update': 0.0020794710481023214, 'loss': 0.020560742813128842, 'time_step': 0.002299881843199213, 'init_value': -2.807136058807373, 'ave_value': -1.884409374602743, 'soft_opc': nan} step=7138




2022-04-20 18:57.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185737/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:57.56 [info     ] FQE_20220420185737: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.0001603005880332855, 'time_algorithm_update': 0.002108594021165227, 'loss': 0.021593392621838004, 'time_step': 0.002337148390620588, 'init_value': -2.905679225921631, 'ave_value': -1.9764904822180938, 'soft_opc': nan} step=7304




2022-04-20 18:57.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185737/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:57.57 [info     ] FQE_20220420185737: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00015246868133544922, 'time_algorithm_update': 0.002077389912432935, 'loss': 0.021988550454927945, 'time_step': 0.002297102686870529, 'init_value': -2.896787166595459, 'ave_value': -1.9594587806108836, 'soft_opc': nan} step=7470




2022-04-20 18:57.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185737/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:57.57 [info     ] FQE_20220420185737: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00015263385083301957, 'time_algorithm_update': 0.0020253069429512485, 'loss': 0.022633557906374335, 'time_step': 0.0022477313696619974, 'init_value': -2.8654189109802246, 'ave_value': -1.9358463421061232, 'soft_opc': nan} step=7636




2022-04-20 18:57.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185737/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:57.58 [info     ] FQE_20220420185737: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00015776703156620623, 'time_algorithm_update': 0.0021371381828583867, 'loss': 0.02279804158234879, 'time_step': 0.0023654957851731635, 'init_value': -2.9437601566314697, 'ave_value': -2.01608074116143, 'soft_opc': nan} step=7802




2022-04-20 18:57.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185737/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:57.58 [info     ] FQE_20220420185737: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00015318824584225574, 'time_algorithm_update': 0.002076411821756018, 'loss': 0.02462876529968624, 'time_step': 0.0022994452212230267, 'init_value': -2.9612374305725098, 'ave_value': -2.0499039772595906, 'soft_opc': nan} step=7968




2022-04-20 18:57.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185737/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:57.59 [info     ] FQE_20220420185737: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00015831568154943995, 'time_algorithm_update': 0.002114718218883836, 'loss': 0.026290856761835426, 'time_step': 0.002344940082136407, 'init_value': -3.0505316257476807, 'ave_value': -2.1046625289831074, 'soft_opc': nan} step=8134




2022-04-20 18:57.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185737/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 18:57.59 [info     ] FQE_20220420185737: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.0001519817903817418, 'time_algorithm_update': 0.0020785202462989165, 'loss': 0.027716936931401176, 'time_step': 0.00229921829269593, 'init_value': -3.10345458984375, 'ave_value': -2.148379487275809, 'soft_opc': nan} step=8300




2022-04-20 18:57.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185737/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

2022-04-20 18:57.59 [info     ] Directory is created at d3rlpy_logs/FQE_20220420185759
2022-04-20 18:57.59 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 18:57.59 [debug    ] Building models...
2022-04-20 18:57.59 [debug    ] Models have been built.
2022-04-20 18:57.59 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420185759/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 18:58.00 [info     ] FQE_20220420185759: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00015849706738494163, 'time_algorithm_update': 0.002133798460627711, 'loss': 0.0216666100476327, 'time_step': 0.0023622013801752133, 'init_value': -1.2050707340240479, 'ave_value': -1.201447406182955, 'soft_opc': nan} step=344




2022-04-20 18:58.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185759/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:58.01 [info     ] FQE_20220420185759: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00015133896539377612, 'time_algorithm_update': 0.00202176806538604, 'loss': 0.021041371207142813, 'time_step': 0.0022395710612452308, 'init_value': -1.9802038669586182, 'ave_value': -1.9534978509486258, 'soft_opc': nan} step=688




2022-04-20 18:58.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185759/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:58.02 [info     ] FQE_20220420185759: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.0001548057378724564, 'time_algorithm_update': 0.0020989221195841946, 'loss': 0.025447113650612706, 'time_step': 0.002322983603144801, 'init_value': -2.9172725677490234, 'ave_value': -2.869222761301307, 'soft_opc': nan} step=1032




2022-04-20 18:58.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185759/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:58.03 [info     ] FQE_20220420185759: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00015319017476813737, 'time_algorithm_update': 0.002046097156613372, 'loss': 0.026847715107832362, 'time_step': 0.002264283424200014, 'init_value': -3.5751571655273438, 'ave_value': -3.510111238035533, 'soft_opc': nan} step=1376




2022-04-20 18:58.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185759/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:58.04 [info     ] FQE_20220420185759: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00015494989794354107, 'time_algorithm_update': 0.002081942419673121, 'loss': 0.033203880130692365, 'time_step': 0.0023047986418701883, 'init_value': -4.4754109382629395, 'ave_value': -4.362760655675922, 'soft_opc': nan} step=1720




2022-04-20 18:58.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185759/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:58.05 [info     ] FQE_20220420185759: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00015642407328583474, 'time_algorithm_update': 0.0020388413307278657, 'loss': 0.0376056567576259, 'time_step': 0.0022642473841822425, 'init_value': -4.958654880523682, 'ave_value': -4.838374617427319, 'soft_opc': nan} step=2064




2022-04-20 18:58.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185759/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:58.05 [info     ] FQE_20220420185759: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00015477177708647972, 'time_algorithm_update': 0.002063633397568104, 'loss': 0.04426533852021621, 'time_step': 0.0022883539976075637, 'init_value': -5.6666059494018555, 'ave_value': -5.524378406169178, 'soft_opc': nan} step=2408




2022-04-20 18:58.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185759/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:58.06 [info     ] FQE_20220420185759: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00015558752902718477, 'time_algorithm_update': 0.0020469122154768123, 'loss': 0.050619134443898706, 'time_step': 0.002271255781484205, 'init_value': -6.155998229980469, 'ave_value': -5.977339733224194, 'soft_opc': nan} step=2752




2022-04-20 18:58.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185759/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:58.07 [info     ] FQE_20220420185759: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00015508574108744777, 'time_algorithm_update': 0.0020649772743846096, 'loss': 0.05781559770666929, 'time_step': 0.0022902038208273954, 'init_value': -6.60678768157959, 'ave_value': -6.409849404671171, 'soft_opc': nan} step=3096




2022-04-20 18:58.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185759/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:58.08 [info     ] FQE_20220420185759: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00015113797298697539, 'time_algorithm_update': 0.002023049565248711, 'loss': 0.06714822389293722, 'time_step': 0.0022460963836936063, 'init_value': -7.216403961181641, 'ave_value': -7.038047858280642, 'soft_opc': nan} step=3440




2022-04-20 18:58.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185759/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:58.09 [info     ] FQE_20220420185759: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00015198352725006813, 'time_algorithm_update': 0.0020430351412573525, 'loss': 0.07554477211466969, 'time_step': 0.0022645405558652655, 'init_value': -7.60357666015625, 'ave_value': -7.450941625183767, 'soft_opc': nan} step=3784




2022-04-20 18:58.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185759/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:58.10 [info     ] FQE_20220420185759: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00015659387721571812, 'time_algorithm_update': 0.0020756541296493175, 'loss': 0.08620109595087638, 'time_step': 0.0023035012012304263, 'init_value': -8.162847518920898, 'ave_value': -8.085551175166358, 'soft_opc': nan} step=4128




2022-04-20 18:58.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185759/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:58.11 [info     ] FQE_20220420185759: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.0001542692960694779, 'time_algorithm_update': 0.0020655157954193827, 'loss': 0.09482878007991023, 'time_step': 0.002292109783305678, 'init_value': -8.402913093566895, 'ave_value': -8.413046987690366, 'soft_opc': nan} step=4472




2022-04-20 18:58.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185759/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:58.11 [info     ] FQE_20220420185759: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00015394424283227256, 'time_algorithm_update': 0.0020290446835894917, 'loss': 0.10270361434993182, 'time_step': 0.002249570086944935, 'init_value': -8.830273628234863, 'ave_value': -8.971456681742325, 'soft_opc': nan} step=4816




2022-04-20 18:58.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185759/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:58.12 [info     ] FQE_20220420185759: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00015578782835672068, 'time_algorithm_update': 0.002035387033639952, 'loss': 0.11124775674042486, 'time_step': 0.0022607875424762104, 'init_value': -9.154329299926758, 'ave_value': -9.407947131011401, 'soft_opc': nan} step=5160




2022-04-20 18:58.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185759/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:58.13 [info     ] FQE_20220420185759: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00015400661978610727, 'time_algorithm_update': 0.0020746533260789027, 'loss': 0.12138181280474676, 'time_step': 0.00229993601178014, 'init_value': -9.507247924804688, 'ave_value': -9.891459222070806, 'soft_opc': nan} step=5504




2022-04-20 18:58.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185759/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:58.14 [info     ] FQE_20220420185759: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00015272927838702534, 'time_algorithm_update': 0.0020838885806327644, 'loss': 0.12931976457895322, 'time_step': 0.002305084882780563, 'init_value': -9.735248565673828, 'ave_value': -10.278107758416786, 'soft_opc': nan} step=5848




2022-04-20 18:58.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185759/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:58.15 [info     ] FQE_20220420185759: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00015261006909747456, 'time_algorithm_update': 0.0020653321299442026, 'loss': 0.14160947463485998, 'time_step': 0.0022857715917188066, 'init_value': -9.967613220214844, 'ave_value': -10.624102978717099, 'soft_opc': nan} step=6192




2022-04-20 18:58.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185759/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:58.16 [info     ] FQE_20220420185759: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00015746923380119857, 'time_algorithm_update': 0.002091890157655228, 'loss': 0.15161771608549063, 'time_step': 0.002321207246115041, 'init_value': -10.168554306030273, 'ave_value': -11.040351077417533, 'soft_opc': nan} step=6536




2022-04-20 18:58.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185759/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:58.17 [info     ] FQE_20220420185759: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.0001519280810688817, 'time_algorithm_update': 0.002046060423518336, 'loss': 0.16258934160781113, 'time_step': 0.0022681792115056237, 'init_value': -10.404342651367188, 'ave_value': -11.436134965247936, 'soft_opc': nan} step=6880




2022-04-20 18:58.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185759/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:58.17 [info     ] FQE_20220420185759: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00015948262325553007, 'time_algorithm_update': 0.002079100109810053, 'loss': 0.1752717485813814, 'time_step': 0.0023089903731678806, 'init_value': -10.65444564819336, 'ave_value': -11.769318202527257, 'soft_opc': nan} step=7224




2022-04-20 18:58.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185759/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:58.18 [info     ] FQE_20220420185759: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00015704576359238734, 'time_algorithm_update': 0.0020826923292736675, 'loss': 0.18686606149665666, 'time_step': 0.0023099495921024057, 'init_value': -10.949845314025879, 'ave_value': -12.183962464480249, 'soft_opc': nan} step=7568




2022-04-20 18:58.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185759/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:58.19 [info     ] FQE_20220420185759: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00015714695287305256, 'time_algorithm_update': 0.002117123714713163, 'loss': 0.2015137378299652, 'time_step': 0.0023471581381420757, 'init_value': -11.25925350189209, 'ave_value': -12.576855741132487, 'soft_opc': nan} step=7912




2022-04-20 18:58.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185759/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:58.20 [info     ] FQE_20220420185759: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00016052015992098077, 'time_algorithm_update': 0.002104173566019812, 'loss': 0.21949265554525652, 'time_step': 0.002337018417757611, 'init_value': -11.74540901184082, 'ave_value': -13.227962981580614, 'soft_opc': nan} step=8256




2022-04-20 18:58.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185759/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:58.21 [info     ] FQE_20220420185759: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.000158919844516488, 'time_algorithm_update': 0.002121994661730389, 'loss': 0.23588897944644613, 'time_step': 0.002353038898734159, 'init_value': -11.820207595825195, 'ave_value': -13.44025263960834, 'soft_opc': nan} step=8600




2022-04-20 18:58.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185759/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:58.22 [info     ] FQE_20220420185759: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.0001596995564394219, 'time_algorithm_update': 0.0021196922590566237, 'loss': 0.24323442938391032, 'time_step': 0.002351101747778959, 'init_value': -12.03412914276123, 'ave_value': -13.821815783590884, 'soft_opc': nan} step=8944




2022-04-20 18:58.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185759/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:58.23 [info     ] FQE_20220420185759: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00015880340753599654, 'time_algorithm_update': 0.002117178467817085, 'loss': 0.2628947378320316, 'time_step': 0.00234734388284905, 'init_value': -12.20453929901123, 'ave_value': -14.065813106499814, 'soft_opc': nan} step=9288




2022-04-20 18:58.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185759/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:58.24 [info     ] FQE_20220420185759: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00015981391418811886, 'time_algorithm_update': 0.0021076368731121685, 'loss': 0.2732636693670133, 'time_step': 0.0023366767306660496, 'init_value': -12.582085609436035, 'ave_value': -14.466308458458196, 'soft_opc': nan} step=9632




2022-04-20 18:58.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185759/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:58.25 [info     ] FQE_20220420185759: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00015747061995572821, 'time_algorithm_update': 0.0021372784015744233, 'loss': 0.2867217502654205, 'time_step': 0.0023660895436309103, 'init_value': -12.615431785583496, 'ave_value': -14.625581258146076, 'soft_opc': nan} step=9976




2022-04-20 18:58.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185759/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:58.25 [info     ] FQE_20220420185759: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00015708041745562886, 'time_algorithm_update': 0.00209914390430894, 'loss': 0.28823798550238705, 'time_step': 0.002326999292817227, 'init_value': -12.783209800720215, 'ave_value': -14.969003477155626, 'soft_opc': nan} step=10320




2022-04-20 18:58.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185759/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:58.26 [info     ] FQE_20220420185759: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00016176700592041016, 'time_algorithm_update': 0.0021261676799419313, 'loss': 0.3107689992513854, 'time_step': 0.002361583155254985, 'init_value': -12.99277114868164, 'ave_value': -15.122762121798823, 'soft_opc': nan} step=10664




2022-04-20 18:58.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185759/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:58.27 [info     ] FQE_20220420185759: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.0001602748105692309, 'time_algorithm_update': 0.0021441675895868344, 'loss': 0.3219933129635878, 'time_step': 0.002375781536102295, 'init_value': -13.323724746704102, 'ave_value': -15.433921329531113, 'soft_opc': nan} step=11008




2022-04-20 18:58.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185759/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:58.28 [info     ] FQE_20220420185759: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00015790587247804154, 'time_algorithm_update': 0.002141700927601304, 'loss': 0.33634229642230756, 'time_step': 0.002369975627854813, 'init_value': -13.558134078979492, 'ave_value': -15.725502068445355, 'soft_opc': nan} step=11352




2022-04-20 18:58.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185759/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:58.29 [info     ] FQE_20220420185759: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00015506217646044353, 'time_algorithm_update': 0.0020843044269916624, 'loss': 0.34663440467421575, 'time_step': 0.0023116808991099514, 'init_value': -13.916717529296875, 'ave_value': -16.136147862001582, 'soft_opc': nan} step=11696




2022-04-20 18:58.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185759/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:58.30 [info     ] FQE_20220420185759: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00015787121861480003, 'time_algorithm_update': 0.002081556375636611, 'loss': 0.3602056144141094, 'time_step': 0.002313399037649465, 'init_value': -14.299365997314453, 'ave_value': -16.548379133896784, 'soft_opc': nan} step=12040




2022-04-20 18:58.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185759/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:58.31 [info     ] FQE_20220420185759: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00015670476957809095, 'time_algorithm_update': 0.002131731011146723, 'loss': 0.3701552621543754, 'time_step': 0.002360321754632994, 'init_value': -14.108423233032227, 'ave_value': -16.352999497426524, 'soft_opc': nan} step=12384




2022-04-20 18:58.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185759/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:58.32 [info     ] FQE_20220420185759: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00015747061995572821, 'time_algorithm_update': 0.0021134219890417056, 'loss': 0.37552174204029143, 'time_step': 0.002344532761462899, 'init_value': -14.66016960144043, 'ave_value': -16.827102478157293, 'soft_opc': nan} step=12728




2022-04-20 18:58.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185759/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:58.32 [info     ] FQE_20220420185759: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00016375890997953193, 'time_algorithm_update': 0.0021336792513381602, 'loss': 0.3877600838191981, 'time_step': 0.0023687412572461503, 'init_value': -14.961400985717773, 'ave_value': -17.266567844259846, 'soft_opc': nan} step=13072




2022-04-20 18:58.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185759/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:58.33 [info     ] FQE_20220420185759: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00015696051508881325, 'time_algorithm_update': 0.002115769441737685, 'loss': 0.40221177242414724, 'time_step': 0.0023447933585144754, 'init_value': -15.279767036437988, 'ave_value': -17.64867812164195, 'soft_opc': nan} step=13416




2022-04-20 18:58.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185759/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:58.34 [info     ] FQE_20220420185759: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00015647189561710802, 'time_algorithm_update': 0.0020872035691904467, 'loss': 0.4017014878390487, 'time_step': 0.0023163723391155864, 'init_value': -15.767114639282227, 'ave_value': -18.124604697834265, 'soft_opc': nan} step=13760




2022-04-20 18:58.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185759/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:58.35 [info     ] FQE_20220420185759: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00016139898189278536, 'time_algorithm_update': 0.0021151706229808717, 'loss': 0.4111125388745825, 'time_step': 0.0023494245007980702, 'init_value': -16.16024398803711, 'ave_value': -18.658100608125462, 'soft_opc': nan} step=14104




2022-04-20 18:58.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185759/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:58.36 [info     ] FQE_20220420185759: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00015988252883733704, 'time_algorithm_update': 0.0020877878333246985, 'loss': 0.4202816161262088, 'time_step': 0.002318801574928816, 'init_value': -16.01578712463379, 'ave_value': -18.65553696971756, 'soft_opc': nan} step=14448




2022-04-20 18:58.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185759/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:58.37 [info     ] FQE_20220420185759: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00015804171562194824, 'time_algorithm_update': 0.0021145974480828575, 'loss': 0.42320159466146645, 'time_step': 0.0023474457652069803, 'init_value': -16.226531982421875, 'ave_value': -18.95208940189164, 'soft_opc': nan} step=14792




2022-04-20 18:58.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185759/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:58.38 [info     ] FQE_20220420185759: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00015370721040770065, 'time_algorithm_update': 0.0020461318104766134, 'loss': 0.43205853526177274, 'time_step': 0.002272094404974649, 'init_value': -16.031166076660156, 'ave_value': -18.988637091044907, 'soft_opc': nan} step=15136




2022-04-20 18:58.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185759/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:58.39 [info     ] FQE_20220420185759: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.0001525095728940742, 'time_algorithm_update': 0.0019776883513428446, 'loss': 0.4493154159552136, 'time_step': 0.002200124568717424, 'init_value': -16.373153686523438, 'ave_value': -19.27309330352792, 'soft_opc': nan} step=15480




2022-04-20 18:58.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185759/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:58.39 [info     ] FQE_20220420185759: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00015138886695684387, 'time_algorithm_update': 0.0020230890706528066, 'loss': 0.4510540762336838, 'time_step': 0.002247149168058883, 'init_value': -16.762928009033203, 'ave_value': -19.807860664580318, 'soft_opc': nan} step=15824




2022-04-20 18:58.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185759/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:58.40 [info     ] FQE_20220420185759: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.0001552548519400663, 'time_algorithm_update': 0.002056076776149661, 'loss': 0.44950204571373326, 'time_step': 0.002283946719280509, 'init_value': -16.814311981201172, 'ave_value': -19.958581738455877, 'soft_opc': nan} step=16168




2022-04-20 18:58.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185759/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:58.41 [info     ] FQE_20220420185759: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00015474405399588652, 'time_algorithm_update': 0.0020838996698690016, 'loss': 0.4473141478428747, 'time_step': 0.002309695232746213, 'init_value': -17.085468292236328, 'ave_value': -20.19508298179051, 'soft_opc': nan} step=16512




2022-04-20 18:58.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185759/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:58.42 [info     ] FQE_20220420185759: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00015351661415987237, 'time_algorithm_update': 0.0020240676957507466, 'loss': 0.4468577864233318, 'time_step': 0.0022478505622508913, 'init_value': -17.315216064453125, 'ave_value': -20.27954933189594, 'soft_opc': nan} step=16856




2022-04-20 18:58.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185759/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 18:58.43 [info     ] FQE_20220420185759: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00015455345774805822, 'time_algorithm_update': 0.0020677814649981124, 'loss': 0.458244462860315, 'time_step': 0.002291197000547897, 'init_value': -17.393512725830078, 'ave_value': -20.378696538977795, 'soft_opc': nan} step=17200




2022-04-20 18:58.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420185759/model_17200.pt
search iteration:  23
using hyper params:  [0.008938456789626554, 0.006740063252670138, 9.306144193649559e-05, 5]
2022-04-20 18:58.43 [debug    ] RoundIterator is selected.
2022-04-20 18:58.43 [info     ] Directory is created at d3rlpy_logs/CQL_20220420185843
2022-04-20 18:58.43 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 18:58.43 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 18:58.43 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220420185843/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.008938456789626554, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'we

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:58.49 [info     ] CQL_20220420185843: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.0003476114998086851, 'time_algorithm_update': 0.01762643613313374, 'temp_loss': 4.3562915443676955, 'temp': 0.9875856479357558, 'alpha_loss': -14.709481891832853, 'alpha': 1.0163632585988407, 'critic_loss': 34.042891265356054, 'actor_loss': 3.8510146934038016, 'time_step': 0.01807097175665069, 'td_error': 8.310292748964379, 'init_value': -11.320867538452148, 'ave_value': -6.707830739217466} step=342
2022-04-20 18:58.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185843/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:58.56 [info     ] CQL_20220420185843: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.000350949359916107, 'time_algorithm_update': 0.017559672656812166, 'temp_loss': 3.8684477624837417, 'temp': 0.9588441008712814, 'alpha_loss': -6.936784330167268, 'alpha': 1.0398424654676204, 'critic_loss': 22.45026880119279, 'actor_loss': 8.681313900919685, 'time_step': 0.018011419396651417, 'td_error': 5.5295635339111175, 'init_value': -18.16534423828125, 'ave_value': -10.698719212150385} step=684
2022-04-20 18:58.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185843/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:59.02 [info     ] CQL_20220420185843: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.0003506063717847679, 'time_algorithm_update': 0.017708741433439198, 'temp_loss': 3.027738014856974, 'temp': 0.9344034911247722, 'alpha_loss': -3.075545316374158, 'alpha': 1.0558987974423415, 'critic_loss': 36.12006926954838, 'actor_loss': 14.606936451984428, 'time_step': 0.01815471663112529, 'td_error': 7.177397904769762, 'init_value': -25.669666290283203, 'ave_value': -14.998875063623192} step=1026
2022-04-20 18:59.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185843/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:59.09 [info     ] CQL_20220420185843: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.0003480918226186295, 'time_algorithm_update': 0.0177699745747081, 'temp_loss': 2.408800524926325, 'temp': 0.9129670138596094, 'alpha_loss': 0.3556218021984991, 'alpha': 1.061371261961976, 'critic_loss': 58.44282923246685, 'actor_loss': 20.708640157130727, 'time_step': 0.0182160466735126, 'td_error': 11.145882764970708, 'init_value': -34.28963088989258, 'ave_value': -20.146013288042834} step=1368
2022-04-20 18:59.09 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185843/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:59.15 [info     ] CQL_20220420185843: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.0003448180985032466, 'time_algorithm_update': 0.017550154039037157, 'temp_loss': 1.920340573229985, 'temp': 0.8941307052185661, 'alpha_loss': 3.2306647453653183, 'alpha': 1.0510875363098948, 'critic_loss': 87.22219961289076, 'actor_loss': 26.57481084790146, 'time_step': 0.017990668614705402, 'td_error': 14.985695953798254, 'init_value': -42.798736572265625, 'ave_value': -24.744205852424777} step=1710
2022-04-20 18:59.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185843/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:59.21 [info     ] CQL_20220420185843: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.0003443991231639483, 'time_algorithm_update': 0.01754816930893569, 'temp_loss': 1.5248549554431647, 'temp': 0.8771624720236014, 'alpha_loss': 5.588994452827855, 'alpha': 1.024178465555983, 'critic_loss': 119.94806269595497, 'actor_loss': 32.22377827292994, 'time_step': 0.017985057412532337, 'td_error': 19.262170639820308, 'init_value': -51.29547119140625, 'ave_value': -30.299697426800524} step=2052
2022-04-20 18:59.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185843/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:59.28 [info     ] CQL_20220420185843: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.0003421487863997967, 'time_algorithm_update': 0.01761571008559556, 'temp_loss': 1.268474480039195, 'temp': 0.8618785092356609, 'alpha_loss': 6.916789516370896, 'alpha': 0.9863823871863516, 'critic_loss': 157.6414646572537, 'actor_loss': 37.65756628806131, 'time_step': 0.018051754661470826, 'td_error': 29.166468261246305, 'init_value': -59.94489669799805, 'ave_value': -35.377968999650165} step=2394
2022-04-20 18:59.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185843/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:59.34 [info     ] CQL_20220420185843: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.00034572994499875787, 'time_algorithm_update': 0.017463236524347673, 'temp_loss': 1.0206972506136922, 'temp': 0.8475318591148533, 'alpha_loss': 8.041166107556974, 'alpha': 0.9472529888153076, 'critic_loss': 196.07286013776098, 'actor_loss': 42.73240656044051, 'time_step': 0.017902281549241807, 'td_error': 30.407275626965294, 'init_value': -65.47502136230469, 'ave_value': -38.08273756958947} step=2736
2022-04-20 18:59.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185843/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:59.40 [info     ] CQL_20220420185843: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.00034295397195202565, 'time_algorithm_update': 0.017391404213263975, 'temp_loss': 0.7903109047213319, 'temp': 0.8348607370379375, 'alpha_loss': 8.790793298977858, 'alpha': 0.9084608354415112, 'critic_loss': 234.09760574430052, 'actor_loss': 47.66093364514803, 'time_step': 0.017829344286556133, 'td_error': 34.63703644453831, 'init_value': -74.2059097290039, 'ave_value': -42.723136215838224} step=3078
2022-04-20 18:59.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185843/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:59.47 [info     ] CQL_20220420185843: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.00034836579484549184, 'time_algorithm_update': 0.017567777494240922, 'temp_loss': 0.5846753159744872, 'temp': 0.823799723421621, 'alpha_loss': 9.490854245180275, 'alpha': 0.8716288250789308, 'critic_loss': 270.29922458581757, 'actor_loss': 52.22666471604018, 'time_step': 0.018014382200631483, 'td_error': 41.75616282761204, 'init_value': -82.22523498535156, 'ave_value': -47.64463781926815} step=3420
2022-04-20 18:59.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185843/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 18:59.53 [info     ] CQL_20220420185843: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.0003415722596017938, 'time_algorithm_update': 0.01759352739791424, 'temp_loss': 0.3798104112325182, 'temp': 0.815492295208033, 'alpha_loss': 10.36957904330471, 'alpha': 0.8362410319478888, 'critic_loss': 305.769174230029, 'actor_loss': 56.68153704816138, 'time_step': 0.018030375067950688, 'td_error': 46.734687842954784, 'init_value': -86.55348205566406, 'ave_value': -49.31416519755179} step=3762
2022-04-20 18:59.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185843/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:00.00 [info     ] CQL_20220420185843: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.00034909708458080626, 'time_algorithm_update': 0.017530115027176708, 'temp_loss': 0.24441833817536196, 'temp': 0.8084764037913049, 'alpha_loss': 10.765045048897726, 'alpha': 0.8034395821261824, 'critic_loss': 345.5003769188597, 'actor_loss': 61.021196967677064, 'time_step': 0.017977003465619004, 'td_error': 59.675789787713185, 'init_value': -94.72187805175781, 'ave_value': -54.1622207843076} step=4104
2022-04-20 19:00.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185843/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:00.06 [info     ] CQL_20220420185843: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.00034917446604946204, 'time_algorithm_update': 0.01769474933021947, 'temp_loss': 0.14897061676898024, 'temp': 0.8038804841320417, 'alpha_loss': 11.321909320284748, 'alpha': 0.7721156028279087, 'critic_loss': 386.12682784231083, 'actor_loss': 65.2076165494863, 'time_step': 0.018141001288653813, 'td_error': 73.85971201636598, 'init_value': -104.55335998535156, 'ave_value': -59.24258531290534} step=4446
2022-04-20 19:00.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185843/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:00.13 [info     ] CQL_20220420185843: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.0003456483807480126, 'time_algorithm_update': 0.017823725415949235, 'temp_loss': 0.022596703194168923, 'temp': 0.8015273002504605, 'alpha_loss': 11.55673231018914, 'alpha': 0.7427912560122752, 'critic_loss': 427.73111702545344, 'actor_loss': 69.17883644327085, 'time_step': 0.01826595353801348, 'td_error': 75.2176604126266, 'init_value': -107.1894302368164, 'ave_value': -61.5691284265} step=4788
2022-04-20 19:00.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185843/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:00.19 [info     ] CQL_20220420185843: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.00035164858165540194, 'time_algorithm_update': 0.018032961421542697, 'temp_loss': -0.008202700543473338, 'temp': 0.8016702248687633, 'alpha_loss': 11.294589025932446, 'alpha': 0.7158596728280274, 'critic_loss': 469.48847988195587, 'actor_loss': 73.00663202966166, 'time_step': 0.018483420561628734, 'td_error': 107.29404976976114, 'init_value': -118.6264419555664, 'ave_value': -66.12115675733165} step=5130
2022-04-20 19:00.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185843/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:00.26 [info     ] CQL_20220420185843: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.00034687463303058466, 'time_algorithm_update': 0.017704240062780548, 'temp_loss': -0.08833156840342005, 'temp': 0.8029404786246562, 'alpha_loss': 11.443364154525668, 'alpha': 0.6907358915485137, 'critic_loss': 515.7088401749817, 'actor_loss': 76.8368339761656, 'time_step': 0.018149271345975108, 'td_error': 63.31153769748729, 'init_value': -120.004638671875, 'ave_value': -66.92667547619557} step=5472
2022-04-20 19:00.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185843/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:00.32 [info     ] CQL_20220420185843: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.00035230876409519485, 'time_algorithm_update': 0.01793647509569313, 'temp_loss': -0.13987123649240585, 'temp': 0.8078479043572967, 'alpha_loss': 11.009464743541695, 'alpha': 0.6677556330697578, 'critic_loss': 544.7602667557566, 'actor_loss': 79.64045688562226, 'time_step': 0.018386269870557283, 'td_error': 98.30872710490772, 'init_value': -127.4163589477539, 'ave_value': -72.64448786885016} step=5814
2022-04-20 19:00.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185843/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:00.39 [info     ] CQL_20220420185843: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.00035172944877579897, 'time_algorithm_update': 0.017907774239255672, 'temp_loss': -0.17632690469641782, 'temp': 0.8162849104543876, 'alpha_loss': 11.255802455701327, 'alpha': 0.6450650019255298, 'critic_loss': 582.4671497010348, 'actor_loss': 83.23523138838205, 'time_step': 0.018357313167282015, 'td_error': 91.73762434802634, 'init_value': -136.4934844970703, 'ave_value': -74.81283402891846} step=6156
2022-04-20 19:00.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185843/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:00.45 [info     ] CQL_20220420185843: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.0003479503051579347, 'time_algorithm_update': 0.017965257516381335, 'temp_loss': -0.24590651460035504, 'temp': 0.8274998739797469, 'alpha_loss': 12.298851904116178, 'alpha': 0.622505711010325, 'critic_loss': 628.6767390736362, 'actor_loss': 87.15494865283631, 'time_step': 0.01841051048702664, 'td_error': 153.95003689515383, 'init_value': -144.49484252929688, 'ave_value': -79.57860792255482} step=6498
2022-04-20 19:00.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185843/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:00.52 [info     ] CQL_20220420185843: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.0003536793223598547, 'time_algorithm_update': 0.017866784369039258, 'temp_loss': -0.23485028591246632, 'temp': 0.8433251631887335, 'alpha_loss': 10.277991278129711, 'alpha': 0.601108936189908, 'critic_loss': 676.4060718915616, 'actor_loss': 89.82273240117301, 'time_step': 0.018317783088014836, 'td_error': 89.73716598944691, 'init_value': -143.05484008789062, 'ave_value': -78.28781010771255} step=6840
2022-04-20 19:00.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185843/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:00.58 [info     ] CQL_20220420185843: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.00035463090528521624, 'time_algorithm_update': 0.01784076467592117, 'temp_loss': -0.2148087270536095, 'temp': 0.8552414499528227, 'alpha_loss': 10.057484615615934, 'alpha': 0.5836635521280835, 'critic_loss': 706.3919749120523, 'actor_loss': 92.10721420823482, 'time_step': 0.018295504893475804, 'td_error': 107.223235379014, 'init_value': -151.13345336914062, 'ave_value': -82.90189738268184} step=7182
2022-04-20 19:00.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185843/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:01.05 [info     ] CQL_20220420185843: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.0003529236330623515, 'time_algorithm_update': 0.018037492768806323, 'temp_loss': -0.23664934482229383, 'temp': 0.8709528413083818, 'alpha_loss': 10.421843778320223, 'alpha': 0.565170579486423, 'critic_loss': 748.7947519759686, 'actor_loss': 95.32137807210286, 'time_step': 0.018488616971244588, 'td_error': 147.03473312229727, 'init_value': -155.95558166503906, 'ave_value': -85.39740900943342} step=7524
2022-04-20 19:01.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185843/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:01.11 [info     ] CQL_20220420185843: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.00035329032362553113, 'time_algorithm_update': 0.01794628859960545, 'temp_loss': -0.20878512622538017, 'temp': 0.8845964079363304, 'alpha_loss': 9.98989136316623, 'alpha': 0.5473529360099145, 'critic_loss': 786.4800086662783, 'actor_loss': 97.95665555808976, 'time_step': 0.01839763379236411, 'td_error': 140.07488429755765, 'init_value': -160.55300903320312, 'ave_value': -88.16347258752464} step=7866
2022-04-20 19:01.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185843/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:01.18 [info     ] CQL_20220420185843: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.0003483623091937506, 'time_algorithm_update': 0.018129530705903705, 'temp_loss': -0.24459130760304063, 'temp': 0.9022587093344906, 'alpha_loss': 10.502812750855384, 'alpha': 0.5294914222948732, 'critic_loss': 822.0462350231862, 'actor_loss': 100.63628197831717, 'time_step': 0.018574107460111206, 'td_error': 149.57106609185936, 'init_value': -169.31503295898438, 'ave_value': -91.46276258301963} step=8208
2022-04-20 19:01.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185843/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:01.24 [info     ] CQL_20220420185843: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.00034910126736289576, 'time_algorithm_update': 0.01810807094239352, 'temp_loss': -0.23566031859022135, 'temp': 0.9195768923787345, 'alpha_loss': 10.725507135279695, 'alpha': 0.5115132192422075, 'critic_loss': 865.7020520662006, 'actor_loss': 103.47425190886558, 'time_step': 0.018555031882392034, 'td_error': 148.84767358641054, 'init_value': -171.8671417236328, 'ave_value': -92.0024655905668} step=8550
2022-04-20 19:01.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185843/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:01.31 [info     ] CQL_20220420185843: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.0003487917414882727, 'time_algorithm_update': 0.018259800665559826, 'temp_loss': -0.2376248762556161, 'temp': 0.9355573058128357, 'alpha_loss': 11.028299436234592, 'alpha': 0.4942426933356893, 'critic_loss': 909.8570124754432, 'actor_loss': 106.42595987152635, 'time_step': 0.01870395705016733, 'td_error': 206.00164381786698, 'init_value': -180.33775329589844, 'ave_value': -95.14032383615235} step=8892
2022-04-20 19:01.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185843/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:01.38 [info     ] CQL_20220420185843: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.00035183959537082247, 'time_algorithm_update': 0.018111024683679058, 'temp_loss': -0.18570433143657028, 'temp': 0.951307265382064, 'alpha_loss': 10.773416289112024, 'alpha': 0.47764570383649124, 'critic_loss': 956.7466510973478, 'actor_loss': 109.17787009791324, 'time_step': 0.018558323731896472, 'td_error': 199.02836412600686, 'init_value': -184.7716064453125, 'ave_value': -97.25729884676299} step=9234
2022-04-20 19:01.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185843/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:01.44 [info     ] CQL_20220420185843: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.00034992109265243797, 'time_algorithm_update': 0.017930524391040467, 'temp_loss': -0.20597889436114775, 'temp': 0.9662059779055634, 'alpha_loss': 10.771868104823152, 'alpha': 0.4615579728494611, 'critic_loss': 1000.2407008834749, 'actor_loss': 111.70899543985288, 'time_step': 0.018375663729439006, 'td_error': 237.41134846975078, 'init_value': -195.07568359375, 'ave_value': -100.8904090520753} step=9576
2022-04-20 19:01.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185843/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:01.51 [info     ] CQL_20220420185843: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.0003560007664195278, 'time_algorithm_update': 0.01802326433839854, 'temp_loss': -0.16383145008867944, 'temp': 0.9790948022875869, 'alpha_loss': 11.354257031490928, 'alpha': 0.4460254275833654, 'critic_loss': 1049.5304224560832, 'actor_loss': 114.81133544654178, 'time_step': 0.018480012291356138, 'td_error': 347.13999016365847, 'init_value': -199.09732055664062, 'ave_value': -103.66095570776176} step=9918
2022-04-20 19:01.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185843/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:01.57 [info     ] CQL_20220420185843: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.00035349876559965794, 'time_algorithm_update': 0.017780704805028368, 'temp_loss': -0.16877887918673762, 'temp': 0.9929530909884046, 'alpha_loss': 10.791154010951171, 'alpha': 0.4307441851723264, 'critic_loss': 1099.924207542375, 'actor_loss': 117.28733314826475, 'time_step': 0.01823161498844972, 'td_error': 278.4650187524091, 'init_value': -205.63900756835938, 'ave_value': -106.20144218411532} step=10260
2022-04-20 19:01.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185843/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:02.04 [info     ] CQL_20220420185843: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.00035585506617674353, 'time_algorithm_update': 0.01780498376366688, 'temp_loss': -0.18944997876904338, 'temp': 1.0069716588446969, 'alpha_loss': 11.182372409697862, 'alpha': 0.41646865193258253, 'critic_loss': 1151.4250795241685, 'actor_loss': 120.36465050323665, 'time_step': 0.018259659845229478, 'td_error': 355.7691727154358, 'init_value': -219.10232543945312, 'ave_value': -110.32674247199485} step=10602
2022-04-20 19:02.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185843/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:02.10 [info     ] CQL_20220420185843: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.0003477509258783352, 'time_algorithm_update': 0.01780197913186592, 'temp_loss': -0.16761468274638666, 'temp': 1.0197501468379595, 'alpha_loss': 11.300446957872625, 'alpha': 0.40221679655083437, 'critic_loss': 1204.1880402146724, 'actor_loss': 123.0606247974418, 'time_step': 0.018247965483637583, 'td_error': 337.70155524328374, 'init_value': -220.6544952392578, 'ave_value': -111.17822461364908} step=10944
2022-04-20 19:02.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185843/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:02.17 [info     ] CQL_20220420185843: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.0003516395189608747, 'time_algorithm_update': 0.01770807985673871, 'temp_loss': -0.12864898819454582, 'temp': 1.0336597868573596, 'alpha_loss': 10.978143821682846, 'alpha': 0.38888157869291584, 'critic_loss': 1262.5247749194764, 'actor_loss': 125.81435664215981, 'time_step': 0.018160021793075472, 'td_error': 371.4205013654073, 'init_value': -221.69638061523438, 'ave_value': -112.55313505904296} step=11286
2022-04-20 19:02.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185843/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:02.23 [info     ] CQL_20220420185843: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.00035101767869023554, 'time_algorithm_update': 0.017738750803540326, 'temp_loss': -0.05228366427210688, 'temp': 1.0405579993599339, 'alpha_loss': 10.097450968814872, 'alpha': 0.3764574552140041, 'critic_loss': 1300.9874720880164, 'actor_loss': 127.25589669657032, 'time_step': 0.018186401902583606, 'td_error': 440.61722869117574, 'init_value': -226.7242889404297, 'ave_value': -115.71048902384265} step=11628
2022-04-20 19:02.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185843/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:02.30 [info     ] CQL_20220420185843: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.0003517412999917192, 'time_algorithm_update': 0.017830854270890442, 'temp_loss': -0.048308734999893355, 'temp': 1.0434569029780159, 'alpha_loss': 10.299782513177883, 'alpha': 0.36473845625132845, 'critic_loss': 1338.5601985105993, 'actor_loss': 129.05030358743946, 'time_step': 0.018284788605762502, 'td_error': 486.5962540741864, 'init_value': -226.3203125, 'ave_value': -114.03587188430802} step=11970
2022-04-20 19:02.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185843/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:02.36 [info     ] CQL_20220420185843: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.00034974541580467893, 'time_algorithm_update': 0.017684652094255415, 'temp_loss': -0.023159084212557787, 'temp': 1.0487891453051428, 'alpha_loss': 9.650981593550297, 'alpha': 0.35334125736303496, 'critic_loss': 1364.2532030964455, 'actor_loss': 130.09441266422382, 'time_step': 0.0181320822029783, 'td_error': 559.8449579830565, 'init_value': -234.3590850830078, 'ave_value': -119.7692914573365} step=12312
2022-04-20 19:02.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185843/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:02.43 [info     ] CQL_20220420185843: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.00035767875916776603, 'time_algorithm_update': 0.017719714962251006, 'temp_loss': 0.0033096425257056776, 'temp': 1.0488861018454123, 'alpha_loss': 9.802852035265916, 'alpha': 0.34246645214264854, 'critic_loss': 1395.637542546144, 'actor_loss': 131.69315083821616, 'time_step': 0.018174748671682256, 'td_error': 504.3289391408006, 'init_value': -230.504638671875, 'ave_value': -117.91573760731129} step=12654
2022-04-20 19:02.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185843/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:02.49 [info     ] CQL_20220420185843: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.0003480499947977345, 'time_algorithm_update': 0.01783306626548544, 'temp_loss': 0.022268793148081206, 'temp': 1.0458746189959565, 'alpha_loss': 9.846325959378516, 'alpha': 0.33165474209869117, 'critic_loss': 1422.2570004825704, 'actor_loss': 132.56007777877718, 'time_step': 0.018280015354268035, 'td_error': 600.1666301545471, 'init_value': -225.744384765625, 'ave_value': -116.73396217872028} step=12996
2022-04-20 19:02.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185843/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:02.56 [info     ] CQL_20220420185843: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.00035209474507828207, 'time_algorithm_update': 0.017760315833733095, 'temp_loss': -0.031833421482503066, 'temp': 1.046372533890239, 'alpha_loss': 9.702862891537404, 'alpha': 0.3210212366099943, 'critic_loss': 1445.4177934970076, 'actor_loss': 133.8271823682283, 'time_step': 0.01820872401633458, 'td_error': 579.131471442743, 'init_value': -232.04330444335938, 'ave_value': -119.0090634076351} step=13338
2022-04-20 19:02.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185843/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:03.02 [info     ] CQL_20220420185843: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.00035124354892306856, 'time_algorithm_update': 0.017764469336347972, 'temp_loss': 0.009511449924337934, 'temp': 1.048813149245859, 'alpha_loss': 9.439941739478307, 'alpha': 0.31076763256600026, 'critic_loss': 1468.2765524345532, 'actor_loss': 134.52795916551736, 'time_step': 0.01821396225377133, 'td_error': 685.4538559854852, 'init_value': -235.8571014404297, 'ave_value': -124.42995691727008} step=13680
2022-04-20 19:03.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185843/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:03.08 [info     ] CQL_20220420185843: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.00035154889201560215, 'time_algorithm_update': 0.017697042191934863, 'temp_loss': 0.07940395651875358, 'temp': 1.0455175077008922, 'alpha_loss': 8.747656055361206, 'alpha': 0.3011371849747429, 'critic_loss': 1490.4377651995385, 'actor_loss': 135.37428384078177, 'time_step': 0.0181440554167095, 'td_error': 376.7708093210329, 'init_value': -228.78042602539062, 'ave_value': -121.0284119352054} step=14022
2022-04-20 19:03.08 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185843/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:03.15 [info     ] CQL_20220420185843: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.0003564615695797212, 'time_algorithm_update': 0.017774928382962768, 'temp_loss': 0.13929178915395027, 'temp': 1.0354940103508576, 'alpha_loss': 8.149417348772461, 'alpha': 0.29251562634058165, 'critic_loss': 1498.6488811649076, 'actor_loss': 135.54800758584898, 'time_step': 0.01822740432114629, 'td_error': 522.7128712950885, 'init_value': -228.1420440673828, 'ave_value': -121.85708097769736} step=14364
2022-04-20 19:03.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185843/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:03.21 [info     ] CQL_20220420185843: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.0003497963063201012, 'time_algorithm_update': 0.017962205479716695, 'temp_loss': 0.09273508191108704, 'temp': 1.0250081458286933, 'alpha_loss': 7.77291653030797, 'alpha': 0.28403002598829435, 'critic_loss': 1496.2310705352247, 'actor_loss': 135.349359367326, 'time_step': 0.0184115429370724, 'td_error': 528.9806208530208, 'init_value': -230.33975219726562, 'ave_value': -123.59854327126868} step=14706
2022-04-20 19:03.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185843/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:03.28 [info     ] CQL_20220420185843: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.00035143595689918563, 'time_algorithm_update': 0.017456076298540797, 'temp_loss': 0.13800255781616907, 'temp': 1.0142818431408085, 'alpha_loss': 7.2095116404762045, 'alpha': 0.2760780479824334, 'critic_loss': 1492.1765457956415, 'actor_loss': 135.13598559195535, 'time_step': 0.01790136412570351, 'td_error': 436.0121829683505, 'init_value': -222.2509765625, 'ave_value': -123.19044359750777} step=15048
2022-04-20 19:03.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185843/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:03.34 [info     ] CQL_20220420185843: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.0003458136006405479, 'time_algorithm_update': 0.01745253348211099, 'temp_loss': 0.11287979775092058, 'temp': 1.004447229598698, 'alpha_loss': 7.221675213317425, 'alpha': 0.2681399160309842, 'critic_loss': 1488.059540330318, 'actor_loss': 134.81435175667033, 'time_step': 0.017895458734523483, 'td_error': 420.6667657227625, 'init_value': -225.80410766601562, 'ave_value': -124.36604562577162} step=15390
2022-04-20 19:03.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185843/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:03.41 [info     ] CQL_20220420185843: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.00034669128774899487, 'time_algorithm_update': 0.01749459623593336, 'temp_loss': 0.14343876485326137, 'temp': 0.9932086265575119, 'alpha_loss': 6.7069182172853345, 'alpha': 0.26043166002334905, 'critic_loss': 1483.3828635410955, 'actor_loss': 134.4215921993144, 'time_step': 0.017937377182363767, 'td_error': 291.3083242443714, 'init_value': -219.0309295654297, 'ave_value': -120.91857740121851} step=15732
2022-04-20 19:03.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185843/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:03.47 [info     ] CQL_20220420185843: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.0003493090122066743, 'time_algorithm_update': 0.017544538654082002, 'temp_loss': 0.12872331598174502, 'temp': 0.9808164737377948, 'alpha_loss': 5.69309111366495, 'alpha': 0.2535051665110895, 'critic_loss': 1470.8395168014438, 'actor_loss': 133.5731479577851, 'time_step': 0.017991838399429767, 'td_error': 515.8810849245782, 'init_value': -218.0394744873047, 'ave_value': -121.58642044217402} step=16074
2022-04-20 19:03.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185843/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:03.53 [info     ] CQL_20220420185843: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.0003512665542245608, 'time_algorithm_update': 0.017610864332544874, 'temp_loss': 0.07914090962612141, 'temp': 0.9711011092216648, 'alpha_loss': 6.327030194433112, 'alpha': 0.24654310876340196, 'critic_loss': 1459.9980375948007, 'actor_loss': 133.42804907637034, 'time_step': 0.018058666708873728, 'td_error': 359.16839118510495, 'init_value': -216.3308563232422, 'ave_value': -123.86821276678629} step=16416
2022-04-20 19:03.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185843/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:04.00 [info     ] CQL_20220420185843: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.0003453256093967728, 'time_algorithm_update': 0.017304597542299863, 'temp_loss': 0.13554342961407195, 'temp': 0.9619238017595302, 'alpha_loss': 5.732286081676595, 'alpha': 0.23938665478027354, 'critic_loss': 1454.2966283608598, 'actor_loss': 132.82639239127175, 'time_step': 0.017746907228614852, 'td_error': 354.4850505720958, 'init_value': -215.5140838623047, 'ave_value': -122.61874921687954} step=16758
2022-04-20 19:04.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185843/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:04.06 [info     ] CQL_20220420185843: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.0003429748858624732, 'time_algorithm_update': 0.01746087882950989, 'temp_loss': 0.11606776620648061, 'temp': 0.9497332525880713, 'alpha_loss': 5.387928266971432, 'alpha': 0.2327844731466115, 'critic_loss': 1439.3814861453764, 'actor_loss': 131.93176673309148, 'time_step': 0.017901633915148284, 'td_error': 423.21071310279524, 'init_value': -206.62155151367188, 'ave_value': -120.4508494219835} step=17100
2022-04-20 19:04.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420185843/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.5

Epoch 1/50:   0%|          | 0/177 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 19:04.07 [info     ] FQE_20220420190406: epoch=1 step=177 epoch=1 metrics={'time_sample_batch': 0.00014069255462474068, 'time_algorithm_update': 0.0019394341161695578, 'loss': 0.007247575182509793, 'time_step': 0.002146248090065132, 'init_value': -0.455252081155777, 'ave_value': -0.3819799868015198, 'soft_opc': nan} step=177




2022-04-20 19:04.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190406/model_177.pt


Epoch 2/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:04.07 [info     ] FQE_20220420190406: epoch=2 step=354 epoch=2 metrics={'time_sample_batch': 0.0001413552774547857, 'time_algorithm_update': 0.0019338521580238126, 'loss': 0.006057386648299246, 'time_step': 0.0021470509006478694, 'init_value': -0.5846679210662842, 'ave_value': -0.46348310427533257, 'soft_opc': nan} step=354




2022-04-20 19:04.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190406/model_354.pt


Epoch 3/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:04.07 [info     ] FQE_20220420190406: epoch=3 step=531 epoch=3 metrics={'time_sample_batch': 0.00014061173476741812, 'time_algorithm_update': 0.001974033097089347, 'loss': 0.005844260427816692, 'time_step': 0.00217563014919475, 'init_value': -0.6676757335662842, 'ave_value': -0.5075554191573962, 'soft_opc': nan} step=531




2022-04-20 19:04.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190406/model_531.pt


Epoch 4/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:04.08 [info     ] FQE_20220420190406: epoch=4 step=708 epoch=4 metrics={'time_sample_batch': 0.00014532353244932358, 'time_algorithm_update': 0.0020439436206709867, 'loss': 0.00562875619879003, 'time_step': 0.0022575585855602546, 'init_value': -0.6979935765266418, 'ave_value': -0.5218234464391932, 'soft_opc': nan} step=708




2022-04-20 19:04.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190406/model_708.pt


Epoch 5/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:04.08 [info     ] FQE_20220420190406: epoch=5 step=885 epoch=5 metrics={'time_sample_batch': 0.00014272113304353702, 'time_algorithm_update': 0.0020442574711169227, 'loss': 0.005551520585066686, 'time_step': 0.002247906000600696, 'init_value': -0.7283281683921814, 'ave_value': -0.5295188902689888, 'soft_opc': nan} step=885




2022-04-20 19:04.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190406/model_885.pt


Epoch 6/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:04.09 [info     ] FQE_20220420190406: epoch=6 step=1062 epoch=6 metrics={'time_sample_batch': 0.00014052148592674126, 'time_algorithm_update': 0.0020362859391896736, 'loss': 0.005219923509916259, 'time_step': 0.002236259859160515, 'init_value': -0.7693877816200256, 'ave_value': -0.5410469402891916, 'soft_opc': nan} step=1062




2022-04-20 19:04.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190406/model_1062.pt


Epoch 7/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:04.09 [info     ] FQE_20220420190406: epoch=7 step=1239 epoch=7 metrics={'time_sample_batch': 0.00013721460676462637, 'time_algorithm_update': 0.0019283052218162407, 'loss': 0.0049009521210324125, 'time_step': 0.0021295076036183845, 'init_value': -0.8174257278442383, 'ave_value': -0.5559743080813964, 'soft_opc': nan} step=1239




2022-04-20 19:04.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190406/model_1239.pt


Epoch 8/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:04.10 [info     ] FQE_20220420190406: epoch=8 step=1416 epoch=8 metrics={'time_sample_batch': 0.00014536798337085098, 'time_algorithm_update': 0.0020341280489991616, 'loss': 0.004555730524046694, 'time_step': 0.0022479100415935625, 'init_value': -0.8745644688606262, 'ave_value': -0.5910320865991595, 'soft_opc': nan} step=1416




2022-04-20 19:04.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190406/model_1416.pt


Epoch 9/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:04.10 [info     ] FQE_20220420190406: epoch=9 step=1593 epoch=9 metrics={'time_sample_batch': 0.00013878789998717227, 'time_algorithm_update': 0.0019199120796332925, 'loss': 0.004525547092096449, 'time_step': 0.0021228103314415883, 'init_value': -0.8887373805046082, 'ave_value': -0.5842359195891265, 'soft_opc': nan} step=1593




2022-04-20 19:04.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190406/model_1593.pt


Epoch 10/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:04.10 [info     ] FQE_20220420190406: epoch=10 step=1770 epoch=10 metrics={'time_sample_batch': 0.00014665571309752383, 'time_algorithm_update': 0.002012107331874007, 'loss': 0.004676160405884565, 'time_step': 0.002222323821762861, 'init_value': -0.9267441630363464, 'ave_value': -0.6076616572325227, 'soft_opc': nan} step=1770




2022-04-20 19:04.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190406/model_1770.pt


Epoch 11/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:04.11 [info     ] FQE_20220420190406: epoch=11 step=1947 epoch=11 metrics={'time_sample_batch': 0.00014433887718761035, 'time_algorithm_update': 0.0019926526452188438, 'loss': 0.0044744877594570485, 'time_step': 0.002200102401992022, 'init_value': -0.9715423583984375, 'ave_value': -0.6078822108603701, 'soft_opc': nan} step=1947




2022-04-20 19:04.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190406/model_1947.pt


Epoch 12/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:04.11 [info     ] FQE_20220420190406: epoch=12 step=2124 epoch=12 metrics={'time_sample_batch': 0.00014623679683706855, 'time_algorithm_update': 0.0021400801879537983, 'loss': 0.00430166108552156, 'time_step': 0.002356009294757735, 'init_value': -1.0250906944274902, 'ave_value': -0.6195676342420582, 'soft_opc': nan} step=2124




2022-04-20 19:04.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190406/model_2124.pt


Epoch 13/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:04.12 [info     ] FQE_20220420190406: epoch=13 step=2301 epoch=13 metrics={'time_sample_batch': 0.00014077337448206324, 'time_algorithm_update': 0.0019483458524369925, 'loss': 0.004249559639095418, 'time_step': 0.0021532619066831084, 'init_value': -1.067376732826233, 'ave_value': -0.6540064345650912, 'soft_opc': nan} step=2301




2022-04-20 19:04.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190406/model_2301.pt


Epoch 14/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:04.12 [info     ] FQE_20220420190406: epoch=14 step=2478 epoch=14 metrics={'time_sample_batch': 0.00014537606535658325, 'time_algorithm_update': 0.0020414220411225227, 'loss': 0.004808976656973025, 'time_step': 0.0022524130546440514, 'init_value': -1.1110056638717651, 'ave_value': -0.6655772649899807, 'soft_opc': nan} step=2478




2022-04-20 19:04.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190406/model_2478.pt


Epoch 15/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:04.13 [info     ] FQE_20220420190406: epoch=15 step=2655 epoch=15 metrics={'time_sample_batch': 0.00014635129163494218, 'time_algorithm_update': 0.0021393164403021, 'loss': 0.004919729588190251, 'time_step': 0.002355916351921814, 'init_value': -1.255284309387207, 'ave_value': -0.7658622985733701, 'soft_opc': nan} step=2655




2022-04-20 19:04.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190406/model_2655.pt


Epoch 16/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:04.13 [info     ] FQE_20220420190406: epoch=16 step=2832 epoch=16 metrics={'time_sample_batch': 0.00014875972338315458, 'time_algorithm_update': 0.002080082219872771, 'loss': 0.0052681846615179615, 'time_step': 0.0023027032108630164, 'init_value': -1.2933152914047241, 'ave_value': -0.7597450895299268, 'soft_opc': nan} step=2832




2022-04-20 19:04.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190406/model_2832.pt


Epoch 17/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:04.14 [info     ] FQE_20220420190406: epoch=17 step=3009 epoch=17 metrics={'time_sample_batch': 0.00014371117629573843, 'time_algorithm_update': 0.002004257029732742, 'loss': 0.005512982371097462, 'time_step': 0.002212931207344357, 'init_value': -1.4036712646484375, 'ave_value': -0.8408282556589245, 'soft_opc': nan} step=3009




2022-04-20 19:04.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190406/model_3009.pt


Epoch 18/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:04.14 [info     ] FQE_20220420190406: epoch=18 step=3186 epoch=18 metrics={'time_sample_batch': 0.0001461276900296831, 'time_algorithm_update': 0.0020136550321417338, 'loss': 0.006079486292381745, 'time_step': 0.002225827362577794, 'init_value': -1.472567081451416, 'ave_value': -0.8677648273258898, 'soft_opc': nan} step=3186




2022-04-20 19:04.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190406/model_3186.pt


Epoch 19/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:04.14 [info     ] FQE_20220420190406: epoch=19 step=3363 epoch=19 metrics={'time_sample_batch': 0.0001446688916050108, 'time_algorithm_update': 0.001989469689837957, 'loss': 0.006076970771034011, 'time_step': 0.002198580294679114, 'init_value': -1.5570462942123413, 'ave_value': -0.8935136720820612, 'soft_opc': nan} step=3363




2022-04-20 19:04.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190406/model_3363.pt


Epoch 20/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:04.15 [info     ] FQE_20220420190406: epoch=20 step=3540 epoch=20 metrics={'time_sample_batch': 0.00015040036648680263, 'time_algorithm_update': 0.002029099706876076, 'loss': 0.006779624205766677, 'time_step': 0.0022481013152558923, 'init_value': -1.6558138132095337, 'ave_value': -0.9435256655199324, 'soft_opc': nan} step=3540




2022-04-20 19:04.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190406/model_3540.pt


Epoch 21/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:04.15 [info     ] FQE_20220420190406: epoch=21 step=3717 epoch=21 metrics={'time_sample_batch': 0.00014847820088014765, 'time_algorithm_update': 0.002068417220465881, 'loss': 0.007298017046309081, 'time_step': 0.002286894846770723, 'init_value': -1.7958554029464722, 'ave_value': -1.0325164447071495, 'soft_opc': nan} step=3717




2022-04-20 19:04.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190406/model_3717.pt


Epoch 22/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:04.16 [info     ] FQE_20220420190406: epoch=22 step=3894 epoch=22 metrics={'time_sample_batch': 0.00014098754710396806, 'time_algorithm_update': 0.001973701735674325, 'loss': 0.008066741838435627, 'time_step': 0.0021803729278219624, 'init_value': -1.858938217163086, 'ave_value': -1.0761361207958455, 'soft_opc': nan} step=3894




2022-04-20 19:04.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190406/model_3894.pt


Epoch 23/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:04.16 [info     ] FQE_20220420190406: epoch=23 step=4071 epoch=23 metrics={'time_sample_batch': 0.0001456293009095273, 'time_algorithm_update': 0.002037510360028111, 'loss': 0.008824365045796385, 'time_step': 0.00225105393404341, 'init_value': -1.9823451042175293, 'ave_value': -1.1611870970364448, 'soft_opc': nan} step=4071




2022-04-20 19:04.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190406/model_4071.pt


Epoch 24/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:04.17 [info     ] FQE_20220420190406: epoch=24 step=4248 epoch=24 metrics={'time_sample_batch': 0.00014330977100436972, 'time_algorithm_update': 0.0019743940924520547, 'loss': 0.008704019566382266, 'time_step': 0.0021850119876322773, 'init_value': -2.089617967605591, 'ave_value': -1.2216239188571234, 'soft_opc': nan} step=4248




2022-04-20 19:04.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190406/model_4248.pt


Epoch 25/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:04.17 [info     ] FQE_20220420190406: epoch=25 step=4425 epoch=25 metrics={'time_sample_batch': 0.00014245712150961665, 'time_algorithm_update': 0.0020055353304760603, 'loss': 0.009578850902985298, 'time_step': 0.002219755097297625, 'init_value': -2.2279865741729736, 'ave_value': -1.3290737485563433, 'soft_opc': nan} step=4425




2022-04-20 19:04.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190406/model_4425.pt


Epoch 26/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:04.17 [info     ] FQE_20220420190406: epoch=26 step=4602 epoch=26 metrics={'time_sample_batch': 0.0001406992896128509, 'time_algorithm_update': 0.0019474002601063184, 'loss': 0.010142070940422094, 'time_step': 0.002153383136469092, 'init_value': -2.380342721939087, 'ave_value': -1.4541381446985868, 'soft_opc': nan} step=4602




2022-04-20 19:04.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190406/model_4602.pt


Epoch 27/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:04.18 [info     ] FQE_20220420190406: epoch=27 step=4779 epoch=27 metrics={'time_sample_batch': 0.00014057805982686705, 'time_algorithm_update': 0.002009966952652581, 'loss': 0.011599717108601188, 'time_step': 0.0022161060807395117, 'init_value': -2.470154047012329, 'ave_value': -1.5190809469860238, 'soft_opc': nan} step=4779




2022-04-20 19:04.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190406/model_4779.pt


Epoch 28/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:04.18 [info     ] FQE_20220420190406: epoch=28 step=4956 epoch=28 metrics={'time_sample_batch': 0.00014349161568334546, 'time_algorithm_update': 0.0020256082890397413, 'loss': 0.011777916325961297, 'time_step': 0.0022306657780361712, 'init_value': -2.541764259338379, 'ave_value': -1.4971419038491565, 'soft_opc': nan} step=4956




2022-04-20 19:04.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190406/model_4956.pt


Epoch 29/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:04.19 [info     ] FQE_20220420190406: epoch=29 step=5133 epoch=29 metrics={'time_sample_batch': 0.00014317776523740952, 'time_algorithm_update': 0.002044951174892275, 'loss': 0.012581941554596448, 'time_step': 0.0022627391384146306, 'init_value': -2.578538656234741, 'ave_value': -1.533441120578541, 'soft_opc': nan} step=5133




2022-04-20 19:04.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190406/model_5133.pt


Epoch 30/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:04.19 [info     ] FQE_20220420190406: epoch=30 step=5310 epoch=30 metrics={'time_sample_batch': 0.00014399269879874536, 'time_algorithm_update': 0.0019986090687035165, 'loss': 0.012812991543688763, 'time_step': 0.0022112043563928983, 'init_value': -2.7467031478881836, 'ave_value': -1.647488775656925, 'soft_opc': nan} step=5310




2022-04-20 19:04.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190406/model_5310.pt


Epoch 31/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:04.20 [info     ] FQE_20220420190406: epoch=31 step=5487 epoch=31 metrics={'time_sample_batch': 0.00014264031318621446, 'time_algorithm_update': 0.0019750312223272806, 'loss': 0.013213483744963877, 'time_step': 0.00218490826881538, 'init_value': -2.7920644283294678, 'ave_value': -1.6553223092858498, 'soft_opc': nan} step=5487




2022-04-20 19:04.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190406/model_5487.pt


Epoch 32/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:04.20 [info     ] FQE_20220420190406: epoch=32 step=5664 epoch=32 metrics={'time_sample_batch': 0.00014394285988672978, 'time_algorithm_update': 0.0019174982598945919, 'loss': 0.014902939631256386, 'time_step': 0.0021286482191355217, 'init_value': -2.893709421157837, 'ave_value': -1.683451775166097, 'soft_opc': nan} step=5664




2022-04-20 19:04.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190406/model_5664.pt


Epoch 33/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:04.20 [info     ] FQE_20220420190406: epoch=33 step=5841 epoch=33 metrics={'time_sample_batch': 0.00014052956791247353, 'time_algorithm_update': 0.0019850097807113734, 'loss': 0.015483228658131287, 'time_step': 0.002190109026634087, 'init_value': -2.974557638168335, 'ave_value': -1.7361407761917906, 'soft_opc': nan} step=5841




2022-04-20 19:04.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190406/model_5841.pt


Epoch 34/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:04.21 [info     ] FQE_20220420190406: epoch=34 step=6018 epoch=34 metrics={'time_sample_batch': 0.00014117477977343198, 'time_algorithm_update': 0.0019177663124213785, 'loss': 0.01571143408816329, 'time_step': 0.0021220021328683626, 'init_value': -3.0911881923675537, 'ave_value': -1.7949814297243472, 'soft_opc': nan} step=6018




2022-04-20 19:04.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190406/model_6018.pt


Epoch 35/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:04.21 [info     ] FQE_20220420190406: epoch=35 step=6195 epoch=35 metrics={'time_sample_batch': 0.0001456872218072751, 'time_algorithm_update': 0.002012301299531581, 'loss': 0.0164931219204008, 'time_step': 0.002226374243612343, 'init_value': -3.1221845149993896, 'ave_value': -1.7997668561566196, 'soft_opc': nan} step=6195




2022-04-20 19:04.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190406/model_6195.pt


Epoch 36/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:04.22 [info     ] FQE_20220420190406: epoch=36 step=6372 epoch=36 metrics={'time_sample_batch': 0.00014938742427502647, 'time_algorithm_update': 0.002019144047451558, 'loss': 0.017615872064321223, 'time_step': 0.002237776578482935, 'init_value': -3.1759729385375977, 'ave_value': -1.813303595913893, 'soft_opc': nan} step=6372




2022-04-20 19:04.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190406/model_6372.pt


Epoch 37/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:04.22 [info     ] FQE_20220420190406: epoch=37 step=6549 epoch=37 metrics={'time_sample_batch': 0.00014256757531462417, 'time_algorithm_update': 0.0019771621725653524, 'loss': 0.01844553926849138, 'time_step': 0.0021842926909021064, 'init_value': -3.2798306941986084, 'ave_value': -1.8773109200774125, 'soft_opc': nan} step=6549




2022-04-20 19:04.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190406/model_6549.pt


Epoch 38/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:04.23 [info     ] FQE_20220420190406: epoch=38 step=6726 epoch=38 metrics={'time_sample_batch': 0.00014930525742008188, 'time_algorithm_update': 0.0021497327729133563, 'loss': 0.01814789904773467, 'time_step': 0.002369907616221972, 'init_value': -3.3421058654785156, 'ave_value': -1.8765742635874598, 'soft_opc': nan} step=6726




2022-04-20 19:04.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190406/model_6726.pt


Epoch 39/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:04.23 [info     ] FQE_20220420190406: epoch=39 step=6903 epoch=39 metrics={'time_sample_batch': 0.00014506221491064728, 'time_algorithm_update': 0.0020164177242645437, 'loss': 0.019238493837400387, 'time_step': 0.002231150697180107, 'init_value': -3.488149881362915, 'ave_value': -2.0071038664850005, 'soft_opc': nan} step=6903




2022-04-20 19:04.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190406/model_6903.pt


Epoch 40/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:04.24 [info     ] FQE_20220420190406: epoch=40 step=7080 epoch=40 metrics={'time_sample_batch': 0.00014247463247870322, 'time_algorithm_update': 0.001994266348370051, 'loss': 0.02042279141299995, 'time_step': 0.0022043144635561494, 'init_value': -3.503390073776245, 'ave_value': -1.9862611904836822, 'soft_opc': nan} step=7080




2022-04-20 19:04.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190406/model_7080.pt


Epoch 41/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:04.24 [info     ] FQE_20220420190406: epoch=41 step=7257 epoch=41 metrics={'time_sample_batch': 0.00014227527683064088, 'time_algorithm_update': 0.0020204614111259157, 'loss': 0.020784781167104756, 'time_step': 0.002231630228333554, 'init_value': -3.5734989643096924, 'ave_value': -1.9847310301313574, 'soft_opc': nan} step=7257




2022-04-20 19:04.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190406/model_7257.pt


Epoch 42/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:04.24 [info     ] FQE_20220420190406: epoch=42 step=7434 epoch=42 metrics={'time_sample_batch': 0.0001453531663970085, 'time_algorithm_update': 0.0020690233693958, 'loss': 0.02139888085046057, 'time_step': 0.0022864678485245353, 'init_value': -3.7228050231933594, 'ave_value': -2.1018512496523356, 'soft_opc': nan} step=7434




2022-04-20 19:04.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190406/model_7434.pt


Epoch 43/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:04.25 [info     ] FQE_20220420190406: epoch=43 step=7611 epoch=43 metrics={'time_sample_batch': 0.00014804716164109398, 'time_algorithm_update': 0.0020484156527761687, 'loss': 0.022335403963876097, 'time_step': 0.002265874948878746, 'init_value': -3.81707763671875, 'ave_value': -2.1446181901407573, 'soft_opc': nan} step=7611




2022-04-20 19:04.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190406/model_7611.pt


Epoch 44/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:04.25 [info     ] FQE_20220420190406: epoch=44 step=7788 epoch=44 metrics={'time_sample_batch': 0.00014109800090897554, 'time_algorithm_update': 0.001916520339620989, 'loss': 0.023298884832726446, 'time_step': 0.002123345089497539, 'init_value': -3.8730456829071045, 'ave_value': -2.1401335099426744, 'soft_opc': nan} step=7788




2022-04-20 19:04.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190406/model_7788.pt


Epoch 45/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:04.26 [info     ] FQE_20220420190406: epoch=45 step=7965 epoch=45 metrics={'time_sample_batch': 0.00014422572938735875, 'time_algorithm_update': 0.0020350426603845282, 'loss': 0.02414860995354836, 'time_step': 0.0022451109805349577, 'init_value': -3.878109931945801, 'ave_value': -2.1139282573224256, 'soft_opc': nan} step=7965




2022-04-20 19:04.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190406/model_7965.pt


Epoch 46/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:04.26 [info     ] FQE_20220420190406: epoch=46 step=8142 epoch=46 metrics={'time_sample_batch': 0.00014083668337029926, 'time_algorithm_update': 0.0019552559502380717, 'loss': 0.024451799966305452, 'time_step': 0.002159374581891938, 'init_value': -4.000717639923096, 'ave_value': -2.1832507607927014, 'soft_opc': nan} step=8142




2022-04-20 19:04.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190406/model_8142.pt


Epoch 47/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:04.27 [info     ] FQE_20220420190406: epoch=47 step=8319 epoch=47 metrics={'time_sample_batch': 0.00014472277150989253, 'time_algorithm_update': 0.002075563042850818, 'loss': 0.02523860115015953, 'time_step': 0.0022884923859504656, 'init_value': -4.072551250457764, 'ave_value': -2.245974722308003, 'soft_opc': nan} step=8319




2022-04-20 19:04.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190406/model_8319.pt


Epoch 48/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:04.27 [info     ] FQE_20220420190406: epoch=48 step=8496 epoch=48 metrics={'time_sample_batch': 0.00014299861455367783, 'time_algorithm_update': 0.0020234234588967876, 'loss': 0.025695667570551573, 'time_step': 0.0022354072096657618, 'init_value': -4.136031627655029, 'ave_value': -2.26362129068939, 'soft_opc': nan} step=8496




2022-04-20 19:04.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190406/model_8496.pt


Epoch 49/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:04.27 [info     ] FQE_20220420190406: epoch=49 step=8673 epoch=49 metrics={'time_sample_batch': 0.00014616405896547823, 'time_algorithm_update': 0.002030437275514764, 'loss': 0.02582516269442641, 'time_step': 0.002246220906575521, 'init_value': -4.155874252319336, 'ave_value': -2.2469536259560634, 'soft_opc': nan} step=8673




2022-04-20 19:04.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190406/model_8673.pt


Epoch 50/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:04.28 [info     ] FQE_20220420190406: epoch=50 step=8850 epoch=50 metrics={'time_sample_batch': 0.00014454092683091675, 'time_algorithm_update': 0.00197526155892065, 'loss': 0.026669442051190272, 'time_step': 0.0021874217663781117, 'init_value': -4.1572651863098145, 'ave_value': -2.2126635013447444, 'soft_opc': nan} step=8850




2022-04-20 19:04.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190406/model_8850.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

start
[ 0.00000000e+00  7.95731469e+08  4.27108923e-02  1.24000047e-02
  1.42999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.34732792e-01  6.00000000e-01  3.37421461e-01]
Read chunk # 39 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.38489108e-01  4.94000047e-02
 -1.56000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -7.49080829e-02  7.04145269e-02]
Read chunk # 40 out of 4999
torch.Size([44400, 6])
2022-04-20 19:04.28 [debug    ] RoundIterator is selected.
2022-04-20 19:04.28 [info     ] Directory is created at d3rlpy_logs/FQE_20220420190428
2022-04-20 19:04.28 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 19:04.28 [debug    ] Building models...
2022-04-20 19:04.28 [debug    ] Models have been built.
2022-04-20 19:04.28 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420190428/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 19:04.29 [info     ] FQE_20220420190428: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00014742099961569143, 'time_algorithm_update': 0.002004809157792912, 'loss': 0.02458943459909236, 'time_step': 0.0022178195243658023, 'init_value': -1.6692348718643188, 'ave_value': -1.7086311834755246, 'soft_opc': nan} step=344




2022-04-20 19:04.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190428/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:04.30 [info     ] FQE_20220420190428: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00014775991439819336, 'time_algorithm_update': 0.0019417696220930232, 'loss': 0.02409276809153515, 'time_step': 0.0021555021751758665, 'init_value': -2.4348459243774414, 'ave_value': -2.5137065320938556, 'soft_opc': nan} step=688




2022-04-20 19:04.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190428/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:04.31 [info     ] FQE_20220420190428: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00014680901239084643, 'time_algorithm_update': 0.001956437909325888, 'loss': 0.027645968193144993, 'time_step': 0.002168384402297264, 'init_value': -3.4402997493743896, 'ave_value': -3.595087196805456, 'soft_opc': nan} step=1032




2022-04-20 19:04.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190428/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:04.31 [info     ] FQE_20220420190428: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00015049964882606683, 'time_algorithm_update': 0.002040076394413793, 'loss': 0.030306412180517474, 'time_step': 0.002262352510940197, 'init_value': -4.153754711151123, 'ave_value': -4.389738950160172, 'soft_opc': nan} step=1376




2022-04-20 19:04.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190428/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:04.32 [info     ] FQE_20220420190428: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00014863041944281999, 'time_algorithm_update': 0.00199712639631227, 'loss': 0.03685748215959689, 'time_step': 0.0022122090639070022, 'init_value': -5.005720138549805, 'ave_value': -5.376491173484304, 'soft_opc': nan} step=1720




2022-04-20 19:04.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190428/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:04.33 [info     ] FQE_20220420190428: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00015279304149538972, 'time_algorithm_update': 0.002066305210424024, 'loss': 0.04255277548407659, 'time_step': 0.0022904124370841093, 'init_value': -5.619762897491455, 'ave_value': -6.072174385512198, 'soft_opc': nan} step=2064




2022-04-20 19:04.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190428/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:04.34 [info     ] FQE_20220420190428: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00014754159505977186, 'time_algorithm_update': 0.0020129950933678205, 'loss': 0.05009842619873843, 'time_step': 0.002229076485301173, 'init_value': -6.465324401855469, 'ave_value': -7.047684141537091, 'soft_opc': nan} step=2408




2022-04-20 19:04.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190428/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:04.35 [info     ] FQE_20220420190428: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00014958201452743177, 'time_algorithm_update': 0.0020345206870589147, 'loss': 0.059300921575794384, 'time_step': 0.0022541340007338415, 'init_value': -7.002833366394043, 'ave_value': -7.720322377563597, 'soft_opc': nan} step=2752




2022-04-20 19:04.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190428/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:04.36 [info     ] FQE_20220420190428: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00014969775843065838, 'time_algorithm_update': 0.002013659754464793, 'loss': 0.06778218971056388, 'time_step': 0.002233654953712641, 'init_value': -7.553940296173096, 'ave_value': -8.405436133130177, 'soft_opc': nan} step=3096




2022-04-20 19:04.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190428/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:04.37 [info     ] FQE_20220420190428: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00014887646187183469, 'time_algorithm_update': 0.002017283855482589, 'loss': 0.07892367515041558, 'time_step': 0.002234151890111524, 'init_value': -8.154718399047852, 'ave_value': -9.191189612085754, 'soft_opc': nan} step=3440




2022-04-20 19:04.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190428/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:04.37 [info     ] FQE_20220420190428: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00014962290608605674, 'time_algorithm_update': 0.001983585745789284, 'loss': 0.08824714931637742, 'time_step': 0.0022027790546417236, 'init_value': -8.489029884338379, 'ave_value': -9.689677141485987, 'soft_opc': nan} step=3784




2022-04-20 19:04.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190428/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:04.38 [info     ] FQE_20220420190428: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.0001487524010414301, 'time_algorithm_update': 0.0020080839478692344, 'loss': 0.09360440136041752, 'time_step': 0.002223389786343242, 'init_value': -8.841792106628418, 'ave_value': -10.291043316995776, 'soft_opc': nan} step=4128




2022-04-20 19:04.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190428/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:04.39 [info     ] FQE_20220420190428: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00015071311662363452, 'time_algorithm_update': 0.0019912733588107797, 'loss': 0.1030481135023238, 'time_step': 0.0022081067395764726, 'init_value': -9.121789932250977, 'ave_value': -10.784480132981464, 'soft_opc': nan} step=4472




2022-04-20 19:04.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190428/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:04.40 [info     ] FQE_20220420190428: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.0001493255759394446, 'time_algorithm_update': 0.002014302930166555, 'loss': 0.11222623299190038, 'time_step': 0.0022370829138644906, 'init_value': -9.70911693572998, 'ave_value': -11.698487319226738, 'soft_opc': nan} step=4816




2022-04-20 19:04.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190428/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:04.41 [info     ] FQE_20220420190428: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00014730040417161098, 'time_algorithm_update': 0.0019289996734885282, 'loss': 0.12187012280933125, 'time_step': 0.00214233024175777, 'init_value': -9.88136100769043, 'ave_value': -12.052699584735407, 'soft_opc': nan} step=5160




2022-04-20 19:04.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190428/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:04.41 [info     ] FQE_20220420190428: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00014856319094813146, 'time_algorithm_update': 0.0020106497199036356, 'loss': 0.13014500819926345, 'time_step': 0.002225832883701768, 'init_value': -10.358461380004883, 'ave_value': -12.813861705805804, 'soft_opc': nan} step=5504




2022-04-20 19:04.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190428/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:04.42 [info     ] FQE_20220420190428: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00015036796414574912, 'time_algorithm_update': 0.001986168151678041, 'loss': 0.14123359511249028, 'time_step': 0.002207865548688312, 'init_value': -10.556039810180664, 'ave_value': -13.24150008709581, 'soft_opc': nan} step=5848




2022-04-20 19:04.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190428/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:04.43 [info     ] FQE_20220420190428: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00014872121256451275, 'time_algorithm_update': 0.001998384331547937, 'loss': 0.14791673850701298, 'time_step': 0.0022194766721060108, 'init_value': -10.617931365966797, 'ave_value': -13.467145896817113, 'soft_opc': nan} step=6192




2022-04-20 19:04.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190428/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:04.44 [info     ] FQE_20220420190428: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.0001478895198467166, 'time_algorithm_update': 0.0020485270855038666, 'loss': 0.15987042522296135, 'time_step': 0.002267852079036624, 'init_value': -11.260379791259766, 'ave_value': -14.43159330471142, 'soft_opc': nan} step=6536




2022-04-20 19:04.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190428/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:04.45 [info     ] FQE_20220420190428: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00015012677325758824, 'time_algorithm_update': 0.001966890207556791, 'loss': 0.1695060326750282, 'time_step': 0.002185940049415411, 'init_value': -11.472196578979492, 'ave_value': -14.82311480995771, 'soft_opc': nan} step=6880




2022-04-20 19:04.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190428/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:04.46 [info     ] FQE_20220420190428: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.0001503721226093381, 'time_algorithm_update': 0.0019996755344923152, 'loss': 0.1823678213594005, 'time_step': 0.0022227701752684835, 'init_value': -11.792705535888672, 'ave_value': -15.342355846714328, 'soft_opc': nan} step=7224




2022-04-20 19:04.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190428/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:04.46 [info     ] FQE_20220420190428: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00015002835628598235, 'time_algorithm_update': 0.001985198536584544, 'loss': 0.20089498424339433, 'time_step': 0.0022059741408325907, 'init_value': -12.245109558105469, 'ave_value': -16.032869493478053, 'soft_opc': nan} step=7568




2022-04-20 19:04.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190428/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:04.47 [info     ] FQE_20220420190428: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.0001486692317696505, 'time_algorithm_update': 0.0020163364188615666, 'loss': 0.21775524049109324, 'time_step': 0.002235212298326714, 'init_value': -12.204797744750977, 'ave_value': -16.25209483310983, 'soft_opc': nan} step=7912




2022-04-20 19:04.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190428/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:04.48 [info     ] FQE_20220420190428: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00015161966168603233, 'time_algorithm_update': 0.0019550205663193105, 'loss': 0.22771254427146254, 'time_step': 0.0021730481192123057, 'init_value': -12.866708755493164, 'ave_value': -17.11650847518766, 'soft_opc': nan} step=8256




2022-04-20 19:04.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190428/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:04.49 [info     ] FQE_20220420190428: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00014573543570762457, 'time_algorithm_update': 0.001947433449501215, 'loss': 0.2393926665739178, 'time_step': 0.0021631475104842077, 'init_value': -12.9920015335083, 'ave_value': -17.39220158226855, 'soft_opc': nan} step=8600




2022-04-20 19:04.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190428/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:04.50 [info     ] FQE_20220420190428: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.0001501510309618573, 'time_algorithm_update': 0.0020082994948985963, 'loss': 0.2521689654988518, 'time_step': 0.0022293433200481325, 'init_value': -13.07907485961914, 'ave_value': -17.71419184497885, 'soft_opc': nan} step=8944




2022-04-20 19:04.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190428/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:04.51 [info     ] FQE_20220420190428: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.0001481452653574389, 'time_algorithm_update': 0.0019709481749423715, 'loss': 0.25546120642142933, 'time_step': 0.0021873372931813084, 'init_value': -13.437389373779297, 'ave_value': -18.216450242845863, 'soft_opc': nan} step=9288




2022-04-20 19:04.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190428/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:04.51 [info     ] FQE_20220420190428: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.000148561111716337, 'time_algorithm_update': 0.001985810523809389, 'loss': 0.26883731101812836, 'time_step': 0.0022033647049305052, 'init_value': -13.83146858215332, 'ave_value': -18.70066385451738, 'soft_opc': nan} step=9632




2022-04-20 19:04.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190428/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:04.52 [info     ] FQE_20220420190428: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00014880923337714616, 'time_algorithm_update': 0.001983919115953667, 'loss': 0.2795714683245954, 'time_step': 0.0022028040054232574, 'init_value': -13.614948272705078, 'ave_value': -18.611251113919526, 'soft_opc': nan} step=9976




2022-04-20 19:04.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190428/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:04.53 [info     ] FQE_20220420190428: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00015198699263639228, 'time_algorithm_update': 0.0020555250866468562, 'loss': 0.2885989494145263, 'time_step': 0.002280336479808009, 'init_value': -13.848531723022461, 'ave_value': -19.102181996794435, 'soft_opc': nan} step=10320




2022-04-20 19:04.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190428/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:04.54 [info     ] FQE_20220420190428: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00014805585839027582, 'time_algorithm_update': 0.001956433057785034, 'loss': 0.28945975856812195, 'time_step': 0.002171472061512082, 'init_value': -13.902545928955078, 'ave_value': -19.24048011533849, 'soft_opc': nan} step=10664




2022-04-20 19:04.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190428/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:04.55 [info     ] FQE_20220420190428: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.0001485590324845425, 'time_algorithm_update': 0.0019770895325860313, 'loss': 0.3025300344452262, 'time_step': 0.002194437869759493, 'init_value': -14.088977813720703, 'ave_value': -19.598228842825502, 'soft_opc': nan} step=11008




2022-04-20 19:04.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190428/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:04.56 [info     ] FQE_20220420190428: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00015032915181891862, 'time_algorithm_update': 0.0020415110643519914, 'loss': 0.3177645119491878, 'time_step': 0.0022640193617621132, 'init_value': -14.531124114990234, 'ave_value': -20.14925365351342, 'soft_opc': nan} step=11352




2022-04-20 19:04.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190428/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:04.56 [info     ] FQE_20220420190428: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00014984607696533203, 'time_algorithm_update': 0.0019788554934568182, 'loss': 0.3270211187678628, 'time_step': 0.0021985644517942917, 'init_value': -14.698770523071289, 'ave_value': -20.28933725453712, 'soft_opc': nan} step=11696




2022-04-20 19:04.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190428/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:04.57 [info     ] FQE_20220420190428: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.0001517478809800259, 'time_algorithm_update': 0.0020368584366731867, 'loss': 0.33739502221714096, 'time_step': 0.0022603252599405687, 'init_value': -14.77498722076416, 'ave_value': -20.494257695610457, 'soft_opc': nan} step=12040




2022-04-20 19:04.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190428/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:04.58 [info     ] FQE_20220420190428: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00015000201934991882, 'time_algorithm_update': 0.0019935563553211302, 'loss': 0.3453111757015333, 'time_step': 0.002209799927334453, 'init_value': -14.52370548248291, 'ave_value': -20.288641117983037, 'soft_opc': nan} step=12384




2022-04-20 19:04.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190428/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:04.59 [info     ] FQE_20220420190428: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00014779664749322937, 'time_algorithm_update': 0.0019933137782784396, 'loss': 0.3539372374422762, 'time_step': 0.0022108215232228123, 'init_value': -14.602794647216797, 'ave_value': -20.581810467952007, 'soft_opc': nan} step=12728




2022-04-20 19:04.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190428/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:05.00 [info     ] FQE_20220420190428: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.0001475076342737952, 'time_algorithm_update': 0.00196112172548161, 'loss': 0.3665434675518597, 'time_step': 0.0021798901779707088, 'init_value': -14.946085929870605, 'ave_value': -21.0323092291484, 'soft_opc': nan} step=13072




2022-04-20 19:05.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190428/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:05.01 [info     ] FQE_20220420190428: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00014542424401571585, 'time_algorithm_update': 0.001971904621567837, 'loss': 0.38039843029299275, 'time_step': 0.002186385005019432, 'init_value': -15.262529373168945, 'ave_value': -21.59072188010087, 'soft_opc': nan} step=13416




2022-04-20 19:05.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190428/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:05.01 [info     ] FQE_20220420190428: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00014943646830181743, 'time_algorithm_update': 0.0019795111445493475, 'loss': 0.39296450686835965, 'time_step': 0.00219901911048002, 'init_value': -15.21231460571289, 'ave_value': -21.653965079408508, 'soft_opc': nan} step=13760




2022-04-20 19:05.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190428/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:05.02 [info     ] FQE_20220420190428: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.0001517804556114729, 'time_algorithm_update': 0.00203190709269324, 'loss': 0.40887870469000626, 'time_step': 0.002252867748570997, 'init_value': -15.452810287475586, 'ave_value': -21.89019485652984, 'soft_opc': nan} step=14104




2022-04-20 19:05.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190428/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:05.03 [info     ] FQE_20220420190428: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00015048925266709438, 'time_algorithm_update': 0.001996967681618624, 'loss': 0.4120151107795103, 'time_step': 0.002216862384663072, 'init_value': -15.57403564453125, 'ave_value': -22.13440630076168, 'soft_opc': nan} step=14448




2022-04-20 19:05.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190428/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:05.04 [info     ] FQE_20220420190428: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00014953003373256949, 'time_algorithm_update': 0.001985767553018969, 'loss': 0.42483026962077547, 'time_step': 0.00220426223998846, 'init_value': -15.72160530090332, 'ave_value': -22.152424424269178, 'soft_opc': nan} step=14792




2022-04-20 19:05.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190428/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:05.05 [info     ] FQE_20220420190428: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.0001482513061789579, 'time_algorithm_update': 0.001965951087862946, 'loss': 0.4390580985521854, 'time_step': 0.0021841096323589946, 'init_value': -15.827685356140137, 'ave_value': -22.18827004260845, 'soft_opc': nan} step=15136




2022-04-20 19:05.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190428/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:05.06 [info     ] FQE_20220420190428: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.0001502106356066327, 'time_algorithm_update': 0.002016738403675168, 'loss': 0.45108214241647443, 'time_step': 0.0022378952004188713, 'init_value': -15.966439247131348, 'ave_value': -22.32495908855318, 'soft_opc': nan} step=15480




2022-04-20 19:05.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190428/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:05.06 [info     ] FQE_20220420190428: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.0001530252223791078, 'time_algorithm_update': 0.002066089663394662, 'loss': 0.44824965258648747, 'time_step': 0.0022906141225681746, 'init_value': -15.728246688842773, 'ave_value': -22.210270765504323, 'soft_opc': nan} step=15824




2022-04-20 19:05.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190428/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:05.07 [info     ] FQE_20220420190428: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.0001495231029599212, 'time_algorithm_update': 0.002015603836192641, 'loss': 0.4566537835581098, 'time_step': 0.002232867617939794, 'init_value': -15.98261833190918, 'ave_value': -22.459491362362293, 'soft_opc': nan} step=16168




2022-04-20 19:05.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190428/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:05.08 [info     ] FQE_20220420190428: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00014895339344823085, 'time_algorithm_update': 0.002034265634625457, 'loss': 0.46526457580499525, 'time_step': 0.002252760321594948, 'init_value': -15.685144424438477, 'ave_value': -22.250294271984078, 'soft_opc': nan} step=16512




2022-04-20 19:05.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190428/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:05.09 [info     ] FQE_20220420190428: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00015193570491879485, 'time_algorithm_update': 0.0020069951234861863, 'loss': 0.46245292444215264, 'time_step': 0.002228362615718398, 'init_value': -15.707860946655273, 'ave_value': -22.14150972450907, 'soft_opc': nan} step=16856




2022-04-20 19:05.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190428/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:05.10 [info     ] FQE_20220420190428: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00015318324399548908, 'time_algorithm_update': 0.002108454011207403, 'loss': 0.46932313756389155, 'time_step': 0.0023345406665358434, 'init_value': -15.799030303955078, 'ave_value': -22.107451021402806, 'soft_opc': nan} step=17200




2022-04-20 19:05.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420190428/model_17200.pt
search iteration:  24
using hyper params:  [0.009186477001427513, 0.002674755160600311, 1.7247007814318917e-05, 5]
2022-04-20 19:05.10 [debug    ] RoundIterator is selected.
2022-04-20 19:05.10 [info     ] Directory is created at d3rlpy_logs/CQL_20220420190510
2022-04-20 19:05.10 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 19:05.10 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 19:05.10 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220420190510/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.009186477001427513, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'w

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:05.16 [info     ] CQL_20220420190510: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.0003467637893052129, 'time_algorithm_update': 0.017093889894541244, 'temp_loss': 4.557675393352731, 'temp': 0.9973077228543354, 'alpha_loss': -17.181233255486738, 'alpha': 1.017451424696292, 'critic_loss': 50.95765640861109, 'actor_loss': 3.329456817336947, 'time_step': 0.017534327785871183, 'td_error': 3.0349488571163747, 'init_value': -7.3535637855529785, 'ave_value': -5.204035982928902} step=342
2022-04-20 19:05.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420190510/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:05.22 [info     ] CQL_20220420190510: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.000341257853814733, 'time_algorithm_update': 0.01698202347894858, 'temp_loss': 4.433857329407631, 'temp': 0.9915770124970821, 'alpha_loss': -11.079297305547703, 'alpha': 1.0469368169879356, 'critic_loss': 25.81441763269971, 'actor_loss': 7.598386174754093, 'time_step': 0.017421718229327286, 'td_error': 3.7346173590063727, 'init_value': -16.003698348999023, 'ave_value': -9.598506540926593} step=684
2022-04-20 19:05.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420190510/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:05.28 [info     ] CQL_20220420190510: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.00033357687163771245, 'time_algorithm_update': 0.016668431940134506, 'temp_loss': 3.96312805923105, 'temp': 0.9861385508587486, 'alpha_loss': -7.601256705167001, 'alpha': 1.0704853485899362, 'critic_loss': 27.5976369980483, 'actor_loss': 13.06310564732691, 'time_step': 0.017094290744491487, 'td_error': 5.869925054748081, 'init_value': -24.554105758666992, 'ave_value': -14.480789806382345} step=1026
2022-04-20 19:05.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420190510/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:05.34 [info     ] CQL_20220420190510: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.00032604228683382445, 'time_algorithm_update': 0.0163209675348293, 'temp_loss': 3.604304045264484, 'temp': 0.980985555906742, 'alpha_loss': -5.674833957911932, 'alpha': 1.0917255630967213, 'critic_loss': 36.22952506974427, 'actor_loss': 18.547154981490465, 'time_step': 0.01673779571265505, 'td_error': 8.025769614316175, 'init_value': -32.14344024658203, 'ave_value': -18.82340412157915} step=1368
2022-04-20 19:05.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420190510/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:05.41 [info     ] CQL_20220420190510: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.000344233903271413, 'time_algorithm_update': 0.017117052050361858, 'temp_loss': 3.3292046625014633, 'temp': 0.9760065359330317, 'alpha_loss': -4.120215236443525, 'alpha': 1.110839857344042, 'critic_loss': 48.52698724311695, 'actor_loss': 23.882669114229973, 'time_step': 0.017552439232318723, 'td_error': 10.74431452985285, 'init_value': -40.18931198120117, 'ave_value': -23.378810362540655} step=1710
2022-04-20 19:05.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420190510/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:05.46 [info     ] CQL_20220420190510: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.0003265728030288429, 'time_algorithm_update': 0.01624300535659344, 'temp_loss': 3.0859547748900296, 'temp': 0.9711445986527448, 'alpha_loss': -2.7568058616193065, 'alpha': 1.1269124091020104, 'critic_loss': 62.50484746520282, 'actor_loss': 28.791556397376702, 'time_step': 0.016663650323075856, 'td_error': 13.882558748859024, 'init_value': -46.626399993896484, 'ave_value': -27.45099226948914} step=2052
2022-04-20 19:05.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420190510/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:05.52 [info     ] CQL_20220420190510: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.0003341485185232776, 'time_algorithm_update': 0.016455890142429642, 'temp_loss': 2.8648939404571268, 'temp': 0.9663770301648748, 'alpha_loss': -1.5491277672808155, 'alpha': 1.1390803665445561, 'critic_loss': 78.24946986862093, 'actor_loss': 33.4088760175203, 'time_step': 0.016883478527180633, 'td_error': 17.395561012921775, 'init_value': -53.519386291503906, 'ave_value': -31.10232806607678} step=2394
2022-04-20 19:05.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420190510/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:05.59 [info     ] CQL_20220420190510: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.00034617541129128976, 'time_algorithm_update': 0.01741913117860493, 'temp_loss': 2.6555479346660147, 'temp': 0.9617048074976046, 'alpha_loss': -0.44255300472648434, 'alpha': 1.1457266650701825, 'critic_loss': 93.7301945379603, 'actor_loss': 37.70702217057435, 'time_step': 0.01786122893729405, 'td_error': 21.150024651502925, 'init_value': -59.6470947265625, 'ave_value': -35.41429964319878} step=2736
2022-04-20 19:05.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420190510/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:06.05 [info     ] CQL_20220420190510: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.00034889700817085844, 'time_algorithm_update': 0.017590948015625715, 'temp_loss': 2.481211743159601, 'temp': 0.9570988348701543, 'alpha_loss': 0.5808530601602026, 'alpha': 1.14520937274074, 'critic_loss': 109.58141645911144, 'actor_loss': 41.619246884396205, 'time_step': 0.018037862247890897, 'td_error': 23.79699174787568, 'init_value': -65.0181655883789, 'ave_value': -37.73709333913149} step=3078
2022-04-20 19:06.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420190510/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:06.12 [info     ] CQL_20220420190510: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.00034879104435792444, 'time_algorithm_update': 0.01773727149294134, 'temp_loss': 2.3059893058754546, 'temp': 0.9525609638607293, 'alpha_loss': 1.5564025586841312, 'alpha': 1.134588535069025, 'critic_loss': 123.48541565387569, 'actor_loss': 45.34767324882641, 'time_step': 0.018180380090635422, 'td_error': 27.116794472077835, 'init_value': -70.42195892333984, 'ave_value': -41.229887858457396} step=3420
2022-04-20 19:06.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420190510/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:06.18 [info     ] CQL_20220420190510: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.0003496052926046806, 'time_algorithm_update': 0.017535644665099027, 'temp_loss': 2.170821802309382, 'temp': 0.9480432090703507, 'alpha_loss': 2.406619593849656, 'alpha': 1.113665625017289, 'critic_loss': 137.4909477903132, 'actor_loss': 48.71808722423531, 'time_step': 0.017980613206562242, 'td_error': 30.785767647816993, 'init_value': -75.53791809082031, 'ave_value': -44.63846854463891} step=3762
2022-04-20 19:06.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420190510/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:06.24 [info     ] CQL_20220420190510: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.00034939545636985733, 'time_algorithm_update': 0.017691182114227474, 'temp_loss': 2.0396605838111967, 'temp': 0.94357646104188, 'alpha_loss': 3.1440994811859744, 'alpha': 1.0822058131820278, 'critic_loss': 151.43499827245523, 'actor_loss': 51.86323990180478, 'time_step': 0.01814241646326076, 'td_error': 33.682991465682434, 'init_value': -79.17084503173828, 'ave_value': -46.48437534322744} step=4104
2022-04-20 19:06.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420190510/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:06.31 [info     ] CQL_20220420190510: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.0003448292525888186, 'time_algorithm_update': 0.01756726370917426, 'temp_loss': 1.930220189150314, 'temp': 0.939109941846446, 'alpha_loss': 3.784041379278863, 'alpha': 1.0439962451918083, 'critic_loss': 165.13520375748126, 'actor_loss': 54.872893071314046, 'time_step': 0.018009136294760898, 'td_error': 36.94229200697917, 'init_value': -83.75052642822266, 'ave_value': -49.20460858132925} step=4446
2022-04-20 19:06.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420190510/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:06.37 [info     ] CQL_20220420190510: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.0003475348154703776, 'time_algorithm_update': 0.017332145345141315, 'temp_loss': 1.8272076500089545, 'temp': 0.9346298777569108, 'alpha_loss': 4.270529081249795, 'alpha': 1.003916302096774, 'critic_loss': 179.4610937464307, 'actor_loss': 57.649755834836014, 'time_step': 0.017777555170114975, 'td_error': 39.042821076603495, 'init_value': -87.14491271972656, 'ave_value': -51.13542430320033} step=4788
2022-04-20 19:06.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420190510/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:06.43 [info     ] CQL_20220420190510: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.0003503393708613881, 'time_algorithm_update': 0.017374639622649256, 'temp_loss': 1.725328683156019, 'temp': 0.9301845553325631, 'alpha_loss': 4.65535394106692, 'alpha': 0.9644578634298335, 'critic_loss': 193.81960724947746, 'actor_loss': 60.289454521491514, 'time_step': 0.017819640929238836, 'td_error': 42.52914917790501, 'init_value': -92.687744140625, 'ave_value': -54.56565676432942} step=5130
2022-04-20 19:06.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420190510/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:06.50 [info     ] CQL_20220420190510: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.00034875549071016366, 'time_algorithm_update': 0.0173320121932448, 'temp_loss': 1.6400550511496805, 'temp': 0.925694894895219, 'alpha_loss': 4.951394859113191, 'alpha': 0.9264869233321028, 'critic_loss': 207.88799058345327, 'actor_loss': 62.701247053536754, 'time_step': 0.017780626029299015, 'td_error': 45.60075215647083, 'init_value': -96.27703857421875, 'ave_value': -56.74740654143396} step=5472
2022-04-20 19:06.50 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420190510/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:06.56 [info     ] CQL_20220420190510: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.0003410877540097599, 'time_algorithm_update': 0.017257799182021825, 'temp_loss': 1.5310702540023982, 'temp': 0.9212772234490043, 'alpha_loss': 5.189287620678282, 'alpha': 0.8909210247254511, 'critic_loss': 222.1402450472291, 'actor_loss': 64.9648622211657, 'time_step': 0.01769363252740157, 'td_error': 46.10592887640065, 'init_value': -99.4718017578125, 'ave_value': -58.600256562784715} step=5814
2022-04-20 19:06.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420190510/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:07.02 [info     ] CQL_20220420190510: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.0003505004079718339, 'time_algorithm_update': 0.017277643694515118, 'temp_loss': 1.4420708236638566, 'temp': 0.9169011124741961, 'alpha_loss': 5.33904448796434, 'alpha': 0.8570796645175643, 'critic_loss': 236.9674032110917, 'actor_loss': 67.07369952731662, 'time_step': 0.01772508356306288, 'td_error': 47.7473297128315, 'init_value': -102.08651733398438, 'ave_value': -59.531463621944724} step=6156
2022-04-20 19:07.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420190510/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:07.09 [info     ] CQL_20220420190510: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.00035294524410314727, 'time_algorithm_update': 0.01752883091307523, 'temp_loss': 1.3729085641646246, 'temp': 0.9125646357996422, 'alpha_loss': 5.4801279369153475, 'alpha': 0.8252793123498995, 'critic_loss': 250.4684830604241, 'actor_loss': 68.99486458092404, 'time_step': 0.017980561618916473, 'td_error': 48.87011136806271, 'init_value': -103.51960754394531, 'ave_value': -60.83530391265143} step=6498
2022-04-20 19:07.09 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420190510/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:07.15 [info     ] CQL_20220420190510: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.00034925393890916255, 'time_algorithm_update': 0.017282514544258342, 'temp_loss': 1.341064478395975, 'temp': 0.9081650953195248, 'alpha_loss': 5.609312836886846, 'alpha': 0.7951689369497243, 'critic_loss': 263.99395502101606, 'actor_loss': 70.93717056408263, 'time_step': 0.017728662630270797, 'td_error': 52.584791911829164, 'init_value': -109.3301010131836, 'ave_value': -64.19308476751183} step=6840
2022-04-20 19:07.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420190510/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:07.21 [info     ] CQL_20220420190510: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.0003470691323977465, 'time_algorithm_update': 0.017330974863286604, 'temp_loss': 1.260167008777808, 'temp': 0.903711229737042, 'alpha_loss': 5.6631383073260215, 'alpha': 0.7667537795172797, 'critic_loss': 278.22560307017545, 'actor_loss': 72.63042932923076, 'time_step': 0.0177754331053349, 'td_error': 53.00737384512237, 'init_value': -110.6044921875, 'ave_value': -64.30803382382737} step=7182
2022-04-20 19:07.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420190510/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:07.28 [info     ] CQL_20220420190510: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.0003454824637251291, 'time_algorithm_update': 0.017494739844785098, 'temp_loss': 1.190475072428497, 'temp': 0.8993526294566038, 'alpha_loss': 5.687588548799704, 'alpha': 0.7398706538635388, 'critic_loss': 291.065026132684, 'actor_loss': 74.28877517075567, 'time_step': 0.017933478132326004, 'td_error': 53.7339225960255, 'init_value': -112.23661804199219, 'ave_value': -66.80243932190511} step=7524
2022-04-20 19:07.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420190510/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:07.34 [info     ] CQL_20220420190510: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.00035037771303054186, 'time_algorithm_update': 0.017343121662474516, 'temp_loss': 1.1324290938196127, 'temp': 0.8950044443035683, 'alpha_loss': 5.686971865202251, 'alpha': 0.7141408081988843, 'critic_loss': 303.6764960930361, 'actor_loss': 75.79880746763352, 'time_step': 0.017789574394449156, 'td_error': 56.83996954127347, 'init_value': -116.9879379272461, 'ave_value': -68.48571137349482} step=7866
2022-04-20 19:07.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420190510/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:07.40 [info     ] CQL_20220420190510: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.00034890885938677874, 'time_algorithm_update': 0.017277587924087255, 'temp_loss': 1.0940143844189003, 'temp': 0.8906245778875741, 'alpha_loss': 5.681571375556857, 'alpha': 0.6897297342966872, 'critic_loss': 315.68733081483003, 'actor_loss': 77.19282728050187, 'time_step': 0.01772395560615941, 'td_error': 57.00932510236987, 'init_value': -118.92045593261719, 'ave_value': -69.63696099837487} step=8208
2022-04-20 19:07.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420190510/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:07.47 [info     ] CQL_20220420190510: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.0003483581264116611, 'time_algorithm_update': 0.01725999305122777, 'temp_loss': 1.0357274708517812, 'temp': 0.8862582436430524, 'alpha_loss': 5.614299762318706, 'alpha': 0.6664548829982155, 'critic_loss': 327.5365629140397, 'actor_loss': 78.47639591373198, 'time_step': 0.017704986689383525, 'td_error': 56.36030945132982, 'init_value': -119.7673568725586, 'ave_value': -69.83406169943846} step=8550
2022-04-20 19:07.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420190510/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:07.53 [info     ] CQL_20220420190510: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.00034733543619077804, 'time_algorithm_update': 0.01728416813744439, 'temp_loss': 0.9778476495143266, 'temp': 0.8819958684388657, 'alpha_loss': 5.56793233037692, 'alpha': 0.6442308626328296, 'critic_loss': 338.8294270833333, 'actor_loss': 79.75846037390636, 'time_step': 0.01772833079622503, 'td_error': 59.339609544187, 'init_value': -122.15190124511719, 'ave_value': -71.14133492445758} step=8892
2022-04-20 19:07.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420190510/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:07.59 [info     ] CQL_20220420190510: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.00034704752135695073, 'time_algorithm_update': 0.017295852042081065, 'temp_loss': 0.9281937792374376, 'temp': 0.8777750843798208, 'alpha_loss': 5.521726500918294, 'alpha': 0.622961101476212, 'critic_loss': 351.37845937271567, 'actor_loss': 81.07912987156918, 'time_step': 0.017739129345319425, 'td_error': 59.69006809137168, 'init_value': -125.2105941772461, 'ave_value': -72.13069376978117} step=9234
2022-04-20 19:07.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420190510/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:08.05 [info     ] CQL_20220420190510: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.0003465246735957631, 'time_algorithm_update': 0.01720816559261746, 'temp_loss': 0.8764897150080107, 'temp': 0.8735158859986311, 'alpha_loss': 5.522587045591477, 'alpha': 0.6024237801457009, 'critic_loss': 362.6501370257104, 'actor_loss': 82.16360205934758, 'time_step': 0.01764998380203693, 'td_error': 60.3353282350454, 'init_value': -124.62313079833984, 'ave_value': -73.03912823708073} step=9576
2022-04-20 19:08.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420190510/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:08.12 [info     ] CQL_20220420190510: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.00034924278482359054, 'time_algorithm_update': 0.017549565661023236, 'temp_loss': 0.7974150481563039, 'temp': 0.8694195438895309, 'alpha_loss': 5.47148070767609, 'alpha': 0.5825624912105806, 'critic_loss': 372.77551938776384, 'actor_loss': 83.28984170210988, 'time_step': 0.017999198004516246, 'td_error': 60.997001565969086, 'init_value': -126.92266845703125, 'ave_value': -73.40907017239579} step=9918
2022-04-20 19:08.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420190510/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:08.18 [info     ] CQL_20220420190510: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.000346198416592782, 'time_algorithm_update': 0.017387854425530685, 'temp_loss': 0.7681231718532174, 'temp': 0.8653866192053633, 'alpha_loss': 5.362575388791268, 'alpha': 0.5635702350334815, 'critic_loss': 382.14154873675074, 'actor_loss': 84.21474860565007, 'time_step': 0.017834040853712294, 'td_error': 60.23728401948859, 'init_value': -128.3541717529297, 'ave_value': -75.202434829546} step=10260
2022-04-20 19:08.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420190510/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:08.25 [info     ] CQL_20220420190510: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.00034913194109821875, 'time_algorithm_update': 0.017564652258889715, 'temp_loss': 0.7509216472332241, 'temp': 0.86131039401244, 'alpha_loss': 5.22883365307635, 'alpha': 0.5453423441153521, 'critic_loss': 392.42843146073193, 'actor_loss': 85.04663752951818, 'time_step': 0.018010915371409635, 'td_error': 58.27754396649407, 'init_value': -129.3815460205078, 'ave_value': -74.77911801623935} step=10602
2022-04-20 19:08.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420190510/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:08.31 [info     ] CQL_20220420190510: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.0003485017352634006, 'time_algorithm_update': 0.017614804513273182, 'temp_loss': 0.68477125977825, 'temp': 0.8573257405506937, 'alpha_loss': 5.1908399975090695, 'alpha': 0.5278222560882568, 'critic_loss': 403.1825240285773, 'actor_loss': 85.97782973797001, 'time_step': 0.01806055732637818, 'td_error': 58.557339840980525, 'init_value': -131.66429138183594, 'ave_value': -77.24102991524983} step=10944
2022-04-20 19:08.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420190510/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:08.38 [info     ] CQL_20220420190510: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.0003542621233309919, 'time_algorithm_update': 0.017757408103050543, 'temp_loss': 0.6863974236736172, 'temp': 0.8532600974478917, 'alpha_loss': 5.007791313511587, 'alpha': 0.5110076069831848, 'critic_loss': 413.0057585420664, 'actor_loss': 86.73701340970938, 'time_step': 0.018210765213994253, 'td_error': 58.003550627665085, 'init_value': -130.75302124023438, 'ave_value': -76.18207510247021} step=11286
2022-04-20 19:08.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420190510/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:08.44 [info     ] CQL_20220420190510: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.000350940994351928, 'time_algorithm_update': 0.01752982641521253, 'temp_loss': 0.6328238557196326, 'temp': 0.849231419507523, 'alpha_loss': 4.925322267047146, 'alpha': 0.49496835339487644, 'critic_loss': 420.9755713925724, 'actor_loss': 87.47688059221234, 'time_step': 0.017976444367079708, 'td_error': 59.504541487356306, 'init_value': -133.0072021484375, 'ave_value': -78.11346855222239} step=11628
2022-04-20 19:08.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420190510/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:08.50 [info     ] CQL_20220420190510: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.0003511870813648603, 'time_algorithm_update': 0.01760998455404538, 'temp_loss': 0.5875316885523281, 'temp': 0.8453101641253421, 'alpha_loss': 4.824943188338252, 'alpha': 0.47933975942650736, 'critic_loss': 428.6024727626154, 'actor_loss': 88.06993790119014, 'time_step': 0.01805816616928368, 'td_error': 58.961572564918725, 'init_value': -132.97706604003906, 'ave_value': -78.30887708149247} step=11970
2022-04-20 19:08.50 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420190510/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:08.57 [info     ] CQL_20220420190510: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.0003486014249032004, 'time_algorithm_update': 0.017545871567307858, 'temp_loss': 0.5813906784632314, 'temp': 0.8413846459653642, 'alpha_loss': 4.7161821796183, 'alpha': 0.46434087650469175, 'critic_loss': 436.64621114451984, 'actor_loss': 88.67963529887952, 'time_step': 0.017993749233714322, 'td_error': 59.37443216232793, 'init_value': -135.00265502929688, 'ave_value': -80.16968232377826} step=12312
2022-04-20 19:08.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420190510/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:09.03 [info     ] CQL_20220420190510: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.0003526022559718082, 'time_algorithm_update': 0.0177646171279818, 'temp_loss': 0.5568128015780658, 'temp': 0.8374280565314822, 'alpha_loss': 4.605936522372285, 'alpha': 0.44972338444656795, 'critic_loss': 445.0933490775482, 'actor_loss': 89.2096567209701, 'time_step': 0.018212390921966373, 'td_error': 59.276621436585486, 'init_value': -134.3358154296875, 'ave_value': -79.83725115167934} step=12654
2022-04-20 19:09.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420190510/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:09.10 [info     ] CQL_20220420190510: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.00034499168395996094, 'time_algorithm_update': 0.017799208735861972, 'temp_loss': 0.5403409892834766, 'temp': 0.8335481172765208, 'alpha_loss': 4.465167232424195, 'alpha': 0.43573392674936884, 'critic_loss': 452.4799291599564, 'actor_loss': 89.71568965354162, 'time_step': 0.01824239262363367, 'td_error': 59.05566433586224, 'init_value': -134.6280517578125, 'ave_value': -80.5661830607909} step=12996
2022-04-20 19:09.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420190510/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:09.16 [info     ] CQL_20220420190510: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.00034651839942262885, 'time_algorithm_update': 0.017643280893738508, 'temp_loss': 0.5281164576435646, 'temp': 0.8295357272638912, 'alpha_loss': 4.351706453234131, 'alpha': 0.42224922018092975, 'critic_loss': 461.5646867361682, 'actor_loss': 90.29646847819724, 'time_step': 0.018083676260117202, 'td_error': 56.31320613019018, 'init_value': -136.55020141601562, 'ave_value': -80.89450766886543} step=13338
2022-04-20 19:09.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420190510/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:09.22 [info     ] CQL_20220420190510: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.0003448076415480229, 'time_algorithm_update': 0.017687021640309115, 'temp_loss': 0.48494844218138716, 'temp': 0.8256486746651387, 'alpha_loss': 4.220394174955045, 'alpha': 0.4091937966339769, 'critic_loss': 467.84428735922654, 'actor_loss': 90.65914912530553, 'time_step': 0.018129856962906688, 'td_error': 56.5977240305754, 'init_value': -135.430419921875, 'ave_value': -81.13937653658962} step=13680
2022-04-20 19:09.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420190510/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:09.29 [info     ] CQL_20220420190510: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.0003492295393469738, 'time_algorithm_update': 0.017587997759991918, 'temp_loss': 0.5099476401786707, 'temp': 0.8218373316421843, 'alpha_loss': 4.081833080938685, 'alpha': 0.3966359174565265, 'critic_loss': 474.9780085156535, 'actor_loss': 91.14532274391219, 'time_step': 0.018030325571695965, 'td_error': 55.22475369970746, 'init_value': -134.49496459960938, 'ave_value': -80.67508330259774} step=14022
2022-04-20 19:09.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420190510/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:09.35 [info     ] CQL_20220420190510: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.0003456135242306001, 'time_algorithm_update': 0.017541883284585516, 'temp_loss': 0.4792485894275862, 'temp': 0.8177576948676193, 'alpha_loss': 3.969086675616036, 'alpha': 0.38459993175595825, 'critic_loss': 481.9171512893766, 'actor_loss': 91.5096736261022, 'time_step': 0.017981977490653767, 'td_error': 57.07339755794557, 'init_value': -134.9355010986328, 'ave_value': -81.0764395222249} step=14364
2022-04-20 19:09.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420190510/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:09.42 [info     ] CQL_20220420190510: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.000348109948007684, 'time_algorithm_update': 0.01762169843528703, 'temp_loss': 0.48484370099347934, 'temp': 0.8139198758797339, 'alpha_loss': 3.833353929700907, 'alpha': 0.37284614494320945, 'critic_loss': 486.42854862324674, 'actor_loss': 91.88240086962605, 'time_step': 0.018064511449713456, 'td_error': 55.627576915195895, 'init_value': -137.89059448242188, 'ave_value': -82.79702363781362} step=14706
2022-04-20 19:09.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420190510/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:09.48 [info     ] CQL_20220420190510: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.0003445190295838473, 'time_algorithm_update': 0.017514470725031626, 'temp_loss': 0.5198653832796896, 'temp': 0.8097723039270145, 'alpha_loss': 3.6774829904935515, 'alpha': 0.36157759780075116, 'critic_loss': 493.04776384677103, 'actor_loss': 92.33686797381841, 'time_step': 0.017955602261058072, 'td_error': 56.417715767351865, 'init_value': -137.84303283691406, 'ave_value': -83.11443418926014} step=15048
2022-04-20 19:09.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420190510/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:09.54 [info     ] CQL_20220420190510: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.0003489158306902612, 'time_algorithm_update': 0.017618686832182588, 'temp_loss': 0.47256386811621703, 'temp': 0.8056720312227282, 'alpha_loss': 3.582489832800034, 'alpha': 0.35067156253502385, 'critic_loss': 496.02952566760325, 'actor_loss': 92.50509962561534, 'time_step': 0.018064303704869677, 'td_error': 54.768678077647444, 'init_value': -138.28672790527344, 'ave_value': -83.25067092871143} step=15390
2022-04-20 19:09.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420190510/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:10.01 [info     ] CQL_20220420190510: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.0003459955516614412, 'time_algorithm_update': 0.017454841680694045, 'temp_loss': 0.4668147155895219, 'temp': 0.801688857071581, 'alpha_loss': 3.4461852082034996, 'alpha': 0.3401345761373029, 'critic_loss': 499.6604728475649, 'actor_loss': 92.88789858455546, 'time_step': 0.017895552847120497, 'td_error': 53.89002684540186, 'init_value': -138.5261993408203, 'ave_value': -83.50257998073356} step=15732
2022-04-20 19:10.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420190510/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:10.07 [info     ] CQL_20220420190510: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.0003507283695957117, 'time_algorithm_update': 0.01755276270080031, 'temp_loss': 0.4818153933605604, 'temp': 0.7976531945822531, 'alpha_loss': 3.338655859406231, 'alpha': 0.3299576035368512, 'critic_loss': 503.75694622909816, 'actor_loss': 93.09242527387296, 'time_step': 0.017998576164245605, 'td_error': 56.560272063148325, 'init_value': -138.21426391601562, 'ave_value': -83.41434543943049} step=16074
2022-04-20 19:10.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420190510/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:10.14 [info     ] CQL_20220420190510: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.0003481448045250965, 'time_algorithm_update': 0.01730125201375861, 'temp_loss': 0.4428100282794725, 'temp': 0.7936758981113545, 'alpha_loss': 3.2359755596919366, 'alpha': 0.3200750583619402, 'critic_loss': 506.08228699087397, 'actor_loss': 93.30730348999737, 'time_step': 0.017746917685570074, 'td_error': 54.211378809858125, 'init_value': -140.90231323242188, 'ave_value': -84.95098900790754} step=16416
2022-04-20 19:10.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420190510/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:10.20 [info     ] CQL_20220420190510: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.00034739260087933454, 'time_algorithm_update': 0.017402440483807124, 'temp_loss': 0.4369984842640789, 'temp': 0.7899563760436766, 'alpha_loss': 3.126950796584637, 'alpha': 0.31037376707757425, 'critic_loss': 507.90191721776773, 'actor_loss': 93.52422533537212, 'time_step': 0.017843412376983822, 'td_error': 52.866923414213, 'init_value': -135.2716522216797, 'ave_value': -83.1033484666969} step=16758
2022-04-20 19:10.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420190510/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:10.26 [info     ] CQL_20220420190510: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.0003455981873629386, 'time_algorithm_update': 0.017300469833507873, 'temp_loss': 0.47207725808917606, 'temp': 0.7858336060716394, 'alpha_loss': 2.9867641141540124, 'alpha': 0.3012135870624007, 'critic_loss': 509.7000968888489, 'actor_loss': 93.57224356221874, 'time_step': 0.01774298308188455, 'td_error': 54.22309504651221, 'init_value': -137.96426391601562, 'ave_value': -84.1967923762307} step=17100
2022-04-20 19:10.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420190510/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.5191

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 19:10.27 [info     ] FQE_20220420191026: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.0001382870846484081, 'time_algorithm_update': 0.0020054262804697796, 'loss': 0.00510199873775513, 'time_step': 0.00220874562320939, 'init_value': 0.09133733808994293, 'ave_value': 0.11731058309895095, 'soft_opc': nan} step=166




2022-04-20 19:10.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191026/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:10.27 [info     ] FQE_20220420191026: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.0001404141805258142, 'time_algorithm_update': 0.001998064029647643, 'loss': 0.003769236608374729, 'time_step': 0.0022046077682311275, 'init_value': -0.045721203088760376, 'ave_value': 0.0237227085012909, 'soft_opc': nan} step=332




2022-04-20 19:10.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191026/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:10.28 [info     ] FQE_20220420191026: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00013968168971050218, 'time_algorithm_update': 0.0019509777965315853, 'loss': 0.0033551257437498994, 'time_step': 0.002158022788633783, 'init_value': -0.12453034520149231, 'ave_value': -0.022905632050137396, 'soft_opc': nan} step=498




2022-04-20 19:10.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191026/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:10.28 [info     ] FQE_20220420191026: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00013792802052325513, 'time_algorithm_update': 0.00196664735495326, 'loss': 0.003436673131732113, 'time_step': 0.002168580710169781, 'init_value': -0.2173224687576294, 'ave_value': -0.07376366987413316, 'soft_opc': nan} step=664




2022-04-20 19:10.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191026/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:10.28 [info     ] FQE_20220420191026: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00013862029615655002, 'time_algorithm_update': 0.001936274838734822, 'loss': 0.0033577906342324272, 'time_step': 0.002139786639845515, 'init_value': -0.31582167744636536, 'ave_value': -0.13254031305377548, 'soft_opc': nan} step=830




2022-04-20 19:10.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191026/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:10.29 [info     ] FQE_20220420191026: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00013537722897816854, 'time_algorithm_update': 0.0019077895635581878, 'loss': 0.0031702098652653127, 'time_step': 0.002108980374163892, 'init_value': -0.36064428091049194, 'ave_value': -0.15454809051048984, 'soft_opc': nan} step=996




2022-04-20 19:10.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191026/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:10.29 [info     ] FQE_20220420191026: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00014002926378365023, 'time_algorithm_update': 0.00202709508229451, 'loss': 0.0031411020381162955, 'time_step': 0.002228311745517225, 'init_value': -0.45723700523376465, 'ave_value': -0.21390005094292866, 'soft_opc': nan} step=1162




2022-04-20 19:10.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191026/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:10.30 [info     ] FQE_20220420191026: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.0001387509954981057, 'time_algorithm_update': 0.002001884471939271, 'loss': 0.003043759712410799, 'time_step': 0.002207496080053858, 'init_value': -0.5694290399551392, 'ave_value': -0.28457022113180175, 'soft_opc': nan} step=1328




2022-04-20 19:10.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191026/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:10.30 [info     ] FQE_20220420191026: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00013788206031523556, 'time_algorithm_update': 0.0019766781703535333, 'loss': 0.0030358224281064987, 'time_step': 0.002177999680300793, 'init_value': -0.595816969871521, 'ave_value': -0.2820319523762173, 'soft_opc': nan} step=1494




2022-04-20 19:10.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191026/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:10.30 [info     ] FQE_20220420191026: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00014111220118511155, 'time_algorithm_update': 0.0019523020250251494, 'loss': 0.0030875563865572393, 'time_step': 0.0021596213421189643, 'init_value': -0.684251070022583, 'ave_value': -0.33538157954887565, 'soft_opc': nan} step=1660




2022-04-20 19:10.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191026/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:10.31 [info     ] FQE_20220420191026: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00013775710599968233, 'time_algorithm_update': 0.001939032451215997, 'loss': 0.00306087682522968, 'time_step': 0.002145768648170563, 'init_value': -0.7675318121910095, 'ave_value': -0.38048935424502905, 'soft_opc': nan} step=1826




2022-04-20 19:10.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191026/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:10.31 [info     ] FQE_20220420191026: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00014297358960990445, 'time_algorithm_update': 0.0020666021898568393, 'loss': 0.0030969934852187893, 'time_step': 0.002276085945497076, 'init_value': -0.8591463565826416, 'ave_value': -0.43652032957756304, 'soft_opc': nan} step=1992




2022-04-20 19:10.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191026/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:10.32 [info     ] FQE_20220420191026: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00014000771993614105, 'time_algorithm_update': 0.0019890400300542994, 'loss': 0.0031076470643831753, 'time_step': 0.0021986487400100893, 'init_value': -0.9935568571090698, 'ave_value': -0.5283134386883126, 'soft_opc': nan} step=2158




2022-04-20 19:10.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191026/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:10.32 [info     ] FQE_20220420191026: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.0001396615821194936, 'time_algorithm_update': 0.002007026270211461, 'loss': 0.0030176187248294613, 'time_step': 0.0022139333816896, 'init_value': -1.1006536483764648, 'ave_value': -0.6019191285326808, 'soft_opc': nan} step=2324




2022-04-20 19:10.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191026/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:10.32 [info     ] FQE_20220420191026: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00013990000069859517, 'time_algorithm_update': 0.0019919182880815253, 'loss': 0.0033317927834970586, 'time_step': 0.0022013374121792346, 'init_value': -1.1713988780975342, 'ave_value': -0.655398453859327, 'soft_opc': nan} step=2490




2022-04-20 19:10.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191026/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:10.33 [info     ] FQE_20220420191026: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00013749570731657097, 'time_algorithm_update': 0.0019306231694049146, 'loss': 0.0035217417907586926, 'time_step': 0.002134693674294345, 'init_value': -1.2222845554351807, 'ave_value': -0.6652006519136128, 'soft_opc': nan} step=2656




2022-04-20 19:10.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191026/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:10.33 [info     ] FQE_20220420191026: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00014476172895316617, 'time_algorithm_update': 0.0020586309662784436, 'loss': 0.0034788339367242955, 'time_step': 0.002269767853150885, 'init_value': -1.3322665691375732, 'ave_value': -0.7420912414453588, 'soft_opc': nan} step=2822




2022-04-20 19:10.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191026/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:10.34 [info     ] FQE_20220420191026: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00014253122260771603, 'time_algorithm_update': 0.0019829445574657024, 'loss': 0.0035210944465305432, 'time_step': 0.002193667802465967, 'init_value': -1.4270501136779785, 'ave_value': -0.7999378745888804, 'soft_opc': nan} step=2988




2022-04-20 19:10.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191026/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:10.34 [info     ] FQE_20220420191026: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00014283570898584574, 'time_algorithm_update': 0.002028381968119058, 'loss': 0.003940889692489417, 'time_step': 0.002238168773880924, 'init_value': -1.5538725852966309, 'ave_value': -0.8787615478978501, 'soft_opc': nan} step=3154




2022-04-20 19:10.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191026/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:10.35 [info     ] FQE_20220420191026: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.0001407301569559488, 'time_algorithm_update': 0.0019511185496686452, 'loss': 0.004016229863200022, 'time_step': 0.002153827483395496, 'init_value': -1.6242091655731201, 'ave_value': -0.9187936520757707, 'soft_opc': nan} step=3320




2022-04-20 19:10.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191026/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:10.35 [info     ] FQE_20220420191026: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00014376353068524096, 'time_algorithm_update': 0.0019849897867225737, 'loss': 0.004198334159481014, 'time_step': 0.002199962914708149, 'init_value': -1.6881859302520752, 'ave_value': -0.9670742959656694, 'soft_opc': nan} step=3486




2022-04-20 19:10.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191026/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:10.35 [info     ] FQE_20220420191026: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00014033662267478117, 'time_algorithm_update': 0.0019492083285228316, 'loss': 0.0045239154841324476, 'time_step': 0.0021575516965015827, 'init_value': -1.8127427101135254, 'ave_value': -1.063801915983896, 'soft_opc': nan} step=3652




2022-04-20 19:10.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191026/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:10.36 [info     ] FQE_20220420191026: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.000142723680978798, 'time_algorithm_update': 0.0020178800605865845, 'loss': 0.0047323183627821595, 'time_step': 0.002226151615740305, 'init_value': -1.8957605361938477, 'ave_value': -1.0963937120965204, 'soft_opc': nan} step=3818




2022-04-20 19:10.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191026/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:10.36 [info     ] FQE_20220420191026: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00014443282621452608, 'time_algorithm_update': 0.0019888389541442134, 'loss': 0.004938031617251887, 'time_step': 0.002205087477902332, 'init_value': -1.9867535829544067, 'ave_value': -1.1312608149808807, 'soft_opc': nan} step=3984




2022-04-20 19:10.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191026/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:10.37 [info     ] FQE_20220420191026: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00014020305082022426, 'time_algorithm_update': 0.0019797101078263247, 'loss': 0.00535475591854024, 'time_step': 0.002185654927449054, 'init_value': -2.052436113357544, 'ave_value': -1.149065076643685, 'soft_opc': nan} step=4150




2022-04-20 19:10.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191026/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:10.37 [info     ] FQE_20220420191026: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00014245940978268543, 'time_algorithm_update': 0.0019774077886558442, 'loss': 0.005533552415464185, 'time_step': 0.002185140747621835, 'init_value': -2.1765031814575195, 'ave_value': -1.2323390402134742, 'soft_opc': nan} step=4316




2022-04-20 19:10.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191026/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:10.37 [info     ] FQE_20220420191026: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.0001388946211481669, 'time_algorithm_update': 0.001958538250750806, 'loss': 0.005926799360474751, 'time_step': 0.002165637820600027, 'init_value': -2.220822334289551, 'ave_value': -1.2792097837001353, 'soft_opc': nan} step=4482




2022-04-20 19:10.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191026/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:10.38 [info     ] FQE_20220420191026: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00014000053865363798, 'time_algorithm_update': 0.0019002305455954678, 'loss': 0.0062123236729600744, 'time_step': 0.002106785774230957, 'init_value': -2.278238296508789, 'ave_value': -1.3006383424406653, 'soft_opc': nan} step=4648




2022-04-20 19:10.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191026/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:10.38 [info     ] FQE_20220420191026: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.0001353973365691771, 'time_algorithm_update': 0.0019694365650774486, 'loss': 0.006194849709660399, 'time_step': 0.002170895955648767, 'init_value': -2.3526458740234375, 'ave_value': -1.3429829972905216, 'soft_opc': nan} step=4814




2022-04-20 19:10.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191026/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:10.39 [info     ] FQE_20220420191026: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00014125008180917027, 'time_algorithm_update': 0.001966497984277197, 'loss': 0.006874834425769836, 'time_step': 0.002174173492983163, 'init_value': -2.461383104324341, 'ave_value': -1.4294105994070436, 'soft_opc': nan} step=4980




2022-04-20 19:10.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191026/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:10.39 [info     ] FQE_20220420191026: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00014139514371573207, 'time_algorithm_update': 0.0019594919250672123, 'loss': 0.007043951643695381, 'time_step': 0.0021676729960613942, 'init_value': -2.5682806968688965, 'ave_value': -1.5001685814948769, 'soft_opc': nan} step=5146




2022-04-20 19:10.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191026/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:10.39 [info     ] FQE_20220420191026: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00013764076922313277, 'time_algorithm_update': 0.001958812575742423, 'loss': 0.0073890402567354085, 'time_step': 0.0021623631557786322, 'init_value': -2.702366352081299, 'ave_value': -1.5923958692800355, 'soft_opc': nan} step=5312




2022-04-20 19:10.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191026/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:10.40 [info     ] FQE_20220420191026: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.0001407904797289745, 'time_algorithm_update': 0.0019709159092730784, 'loss': 0.007262872292166464, 'time_step': 0.002182446330426687, 'init_value': -2.739428758621216, 'ave_value': -1.610233200938852, 'soft_opc': nan} step=5478




2022-04-20 19:10.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191026/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:10.40 [info     ] FQE_20220420191026: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.0001400737877351692, 'time_algorithm_update': 0.001945432410182723, 'loss': 0.007951111710877213, 'time_step': 0.002150312963738499, 'init_value': -2.783389091491699, 'ave_value': -1.6412989239334255, 'soft_opc': nan} step=5644




2022-04-20 19:10.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191026/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:10.41 [info     ] FQE_20220420191026: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.0001389448901256883, 'time_algorithm_update': 0.00193180951727442, 'loss': 0.008456179584974018, 'time_step': 0.0021330879395266614, 'init_value': -2.898831605911255, 'ave_value': -1.7342015536854396, 'soft_opc': nan} step=5810




2022-04-20 19:10.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191026/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:10.41 [info     ] FQE_20220420191026: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.0001441441386579031, 'time_algorithm_update': 0.001986650099237281, 'loss': 0.008677502376299509, 'time_step': 0.00219747675470559, 'init_value': -2.920293092727661, 'ave_value': -1.697052631882934, 'soft_opc': nan} step=5976




2022-04-20 19:10.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191026/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:10.41 [info     ] FQE_20220420191026: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00014239190572715667, 'time_algorithm_update': 0.0019768964813416264, 'loss': 0.009277066375417859, 'time_step': 0.002186946122043104, 'init_value': -3.0063586235046387, 'ave_value': -1.7822892084801654, 'soft_opc': nan} step=6142




2022-04-20 19:10.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191026/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:10.42 [info     ] FQE_20220420191026: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00014076462711196347, 'time_algorithm_update': 0.0019686925842101314, 'loss': 0.009627126548607576, 'time_step': 0.002180089433509183, 'init_value': -3.095846176147461, 'ave_value': -1.8819602878311197, 'soft_opc': nan} step=6308




2022-04-20 19:10.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191026/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:10.42 [info     ] FQE_20220420191026: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00013966301837599422, 'time_algorithm_update': 0.0019912073411137224, 'loss': 0.010278842966661351, 'time_step': 0.0021993955933904074, 'init_value': -3.1763510704040527, 'ave_value': -1.9093087764876382, 'soft_opc': nan} step=6474




2022-04-20 19:10.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191026/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:10.43 [info     ] FQE_20220420191026: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.0001425987266632448, 'time_algorithm_update': 0.0019442503710827195, 'loss': 0.01080412989207375, 'time_step': 0.002152409898229392, 'init_value': -3.243194580078125, 'ave_value': -1.9441914265883062, 'soft_opc': nan} step=6640




2022-04-20 19:10.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191026/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:10.43 [info     ] FQE_20220420191026: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00014265186815376742, 'time_algorithm_update': 0.002070303422858916, 'loss': 0.010809296148082981, 'time_step': 0.002282646765191871, 'init_value': -3.3235535621643066, 'ave_value': -1.9904885849109977, 'soft_opc': nan} step=6806




2022-04-20 19:10.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191026/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:10.43 [info     ] FQE_20220420191026: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.0001395165202129318, 'time_algorithm_update': 0.0019102441259177334, 'loss': 0.011469565428364236, 'time_step': 0.0021135620324008435, 'init_value': -3.403794288635254, 'ave_value': -2.0390487333805813, 'soft_opc': nan} step=6972




2022-04-20 19:10.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191026/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:10.44 [info     ] FQE_20220420191026: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00014070286808243716, 'time_algorithm_update': 0.002004999712289098, 'loss': 0.011996774376701025, 'time_step': 0.002211066613714379, 'init_value': -3.4757509231567383, 'ave_value': -2.076727058652889, 'soft_opc': nan} step=7138




2022-04-20 19:10.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191026/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:10.44 [info     ] FQE_20220420191026: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00014037396534379707, 'time_algorithm_update': 0.0019861833158745824, 'loss': 0.0122487980252446, 'time_step': 0.002192240163504359, 'init_value': -3.5827414989471436, 'ave_value': -2.14784155652422, 'soft_opc': nan} step=7304




2022-04-20 19:10.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191026/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:10.45 [info     ] FQE_20220420191026: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.0001414468489497541, 'time_algorithm_update': 0.0019911125481846823, 'loss': 0.013114902460863477, 'time_step': 0.0021969453398003637, 'init_value': -3.60491681098938, 'ave_value': -2.1457264580235287, 'soft_opc': nan} step=7470




2022-04-20 19:10.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191026/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:10.45 [info     ] FQE_20220420191026: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00013934847820236022, 'time_algorithm_update': 0.0019297642880175487, 'loss': 0.013252130559510944, 'time_step': 0.0021337759063904545, 'init_value': -3.711397171020508, 'ave_value': -2.215200511884649, 'soft_opc': nan} step=7636




2022-04-20 19:10.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191026/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:10.45 [info     ] FQE_20220420191026: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00014106767723359257, 'time_algorithm_update': 0.0019838580166000918, 'loss': 0.013225024353561318, 'time_step': 0.0021916584796216115, 'init_value': -3.7621514797210693, 'ave_value': -2.2548817219145527, 'soft_opc': nan} step=7802




2022-04-20 19:10.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191026/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:10.46 [info     ] FQE_20220420191026: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00013987414808158414, 'time_algorithm_update': 0.0020003060260450982, 'loss': 0.014355509956395366, 'time_step': 0.0022031384778310016, 'init_value': -3.8271851539611816, 'ave_value': -2.2955806879372846, 'soft_opc': nan} step=7968




2022-04-20 19:10.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191026/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:10.46 [info     ] FQE_20220420191026: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00014232727418462914, 'time_algorithm_update': 0.001958294087145702, 'loss': 0.014674261999905604, 'time_step': 0.0021680478590080537, 'init_value': -3.9392967224121094, 'ave_value': -2.3792780805815448, 'soft_opc': nan} step=8134




2022-04-20 19:10.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191026/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:10.47 [info     ] FQE_20220420191026: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.0001398540404905756, 'time_algorithm_update': 0.0019418489502136965, 'loss': 0.015332187834498754, 'time_step': 0.0021473801279642494, 'init_value': -3.990950107574463, 'ave_value': -2.4173750969866576, 'soft_opc': nan} step=8300




2022-04-20 19:10.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191026/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

start
[ 0.00000000e+00  7.95731469e+08 -4.38489108e-01  4.94000047e-02
 -1.56000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -7.49080829e-02  7.04145269e-02]
Read chunk # 40 out of 4999
torch.Size([44400, 6])
2022-04-20 19:10.47 [debug    ] RoundIterator is selected.
2022-04-20 19:10.47 [info     ] Directory is created at d3rlpy_logs/FQE_20220420191047
2022-04-20 19:10.47 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 19:10.47 [debug    ] Building models...
2022-04-20 19:10.47 [debug    ] Models have been built.
2022-04-20 19:10.47 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420191047/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 19:10.48 [info     ] FQE_20220420191047: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00014415799185287122, 'time_algorithm_update': 0.001958257930223332, 'loss': 0.02180650609350482, 'time_step': 0.0021709564120270487, 'init_value': -1.1086344718933105, 'ave_value': -1.1096332210543993, 'soft_opc': nan} step=344




2022-04-20 19:10.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191047/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:10.49 [info     ] FQE_20220420191047: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00014668911002403082, 'time_algorithm_update': 0.001995176769966303, 'loss': 0.022046299913876452, 'time_step': 0.002207837132520454, 'init_value': -1.9578285217285156, 'ave_value': -1.9505241345983368, 'soft_opc': nan} step=688




2022-04-20 19:10.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191047/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:10.50 [info     ] FQE_20220420191047: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00014884527339491734, 'time_algorithm_update': 0.0020260665305825167, 'loss': 0.026155443520956608, 'time_step': 0.0022375223248503927, 'init_value': -2.8728437423706055, 'ave_value': -2.8512402641746375, 'soft_opc': nan} step=1032




2022-04-20 19:10.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191047/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:10.51 [info     ] FQE_20220420191047: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.0001499625139458235, 'time_algorithm_update': 0.002020912114963975, 'loss': 0.027991568046990177, 'time_step': 0.0022352795268214026, 'init_value': -3.626901626586914, 'ave_value': -3.59970752689484, 'soft_opc': nan} step=1376




2022-04-20 19:10.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191047/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:10.52 [info     ] FQE_20220420191047: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00014762337817702184, 'time_algorithm_update': 0.0019974375880041787, 'loss': 0.03333700455313678, 'time_step': 0.0022133796714073005, 'init_value': -4.481167316436768, 'ave_value': -4.453655178375072, 'soft_opc': nan} step=1720




2022-04-20 19:10.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191047/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:10.52 [info     ] FQE_20220420191047: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00015028895333755847, 'time_algorithm_update': 0.001990407012229742, 'loss': 0.038810439139799494, 'time_step': 0.0022085392197897266, 'init_value': -5.020827293395996, 'ave_value': -4.991512770709154, 'soft_opc': nan} step=2064




2022-04-20 19:10.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191047/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:10.53 [info     ] FQE_20220420191047: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00014938864597054415, 'time_algorithm_update': 0.00201611879260041, 'loss': 0.046398804374147465, 'time_step': 0.002233182275018027, 'init_value': -5.745149612426758, 'ave_value': -5.7174805951682295, 'soft_opc': nan} step=2408




2022-04-20 19:10.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191047/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:10.54 [info     ] FQE_20220420191047: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00014995419701864553, 'time_algorithm_update': 0.001995053402213163, 'loss': 0.057794744947513695, 'time_step': 0.0022147089936012444, 'init_value': -6.200779438018799, 'ave_value': -6.204946321557771, 'soft_opc': nan} step=2752




2022-04-20 19:10.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191047/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:10.55 [info     ] FQE_20220420191047: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00014966241149015204, 'time_algorithm_update': 0.0020207194394843524, 'loss': 0.06674750677450712, 'time_step': 0.002239766509033913, 'init_value': -6.693150520324707, 'ave_value': -6.732877188568582, 'soft_opc': nan} step=3096




2022-04-20 19:10.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191047/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:10.56 [info     ] FQE_20220420191047: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.0001518885756647864, 'time_algorithm_update': 0.0020338754321253577, 'loss': 0.08107002633677926, 'time_step': 0.00225622154945551, 'init_value': -7.344215393066406, 'ave_value': -7.465162204276468, 'soft_opc': nan} step=3440




2022-04-20 19:10.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191047/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:10.57 [info     ] FQE_20220420191047: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00015041786570881688, 'time_algorithm_update': 0.002006636109463004, 'loss': 0.09312351331450479, 'time_step': 0.002225877933723982, 'init_value': -7.810398578643799, 'ave_value': -8.063357591537574, 'soft_opc': nan} step=3784




2022-04-20 19:10.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191047/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:10.57 [info     ] FQE_20220420191047: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.0001496132030043491, 'time_algorithm_update': 0.0019864315210386765, 'loss': 0.11214104480499965, 'time_step': 0.002203091632488162, 'init_value': -8.231203079223633, 'ave_value': -8.65451277319325, 'soft_opc': nan} step=4128




2022-04-20 19:10.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191047/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:10.58 [info     ] FQE_20220420191047: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00014763238818146462, 'time_algorithm_update': 0.0019626166931418485, 'loss': 0.12533203207584487, 'time_step': 0.0021753713142040163, 'init_value': -8.457798957824707, 'ave_value': -9.110058714993096, 'soft_opc': nan} step=4472




2022-04-20 19:10.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191047/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:10.59 [info     ] FQE_20220420191047: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.0001519641210866529, 'time_algorithm_update': 0.0020373803238536038, 'loss': 0.1405107754304312, 'time_step': 0.0022588420745938325, 'init_value': -9.168155670166016, 'ave_value': -10.092346358785893, 'soft_opc': nan} step=4816




2022-04-20 19:10.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191047/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:11.00 [info     ] FQE_20220420191047: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.0001507470774096112, 'time_algorithm_update': 0.002018334560616072, 'loss': 0.1594990538649781, 'time_step': 0.0022379437158274096, 'init_value': -9.520654678344727, 'ave_value': -10.740585687004769, 'soft_opc': nan} step=5160




2022-04-20 19:11.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191047/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:11.01 [info     ] FQE_20220420191047: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00014801704606344534, 'time_algorithm_update': 0.001966601194337357, 'loss': 0.1829159213392454, 'time_step': 0.002183035362598508, 'init_value': -10.283147811889648, 'ave_value': -11.758964738554347, 'soft_opc': nan} step=5504




2022-04-20 19:11.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191047/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:11.02 [info     ] FQE_20220420191047: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00014831437621005747, 'time_algorithm_update': 0.0019785276679105536, 'loss': 0.21224379226522044, 'time_step': 0.0021934731062068498, 'init_value': -10.499954223632812, 'ave_value': -12.211973735015537, 'soft_opc': nan} step=5848




2022-04-20 19:11.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191047/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:11.02 [info     ] FQE_20220420191047: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00014864636021991108, 'time_algorithm_update': 0.001978637174118397, 'loss': 0.23548498215305425, 'time_step': 0.002196817204009655, 'init_value': -10.984346389770508, 'ave_value': -12.90981542691085, 'soft_opc': nan} step=6192




2022-04-20 19:11.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191047/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:11.03 [info     ] FQE_20220420191047: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.0001515226308689561, 'time_algorithm_update': 0.002025159292442854, 'loss': 0.26044305983584287, 'time_step': 0.0022478817507278087, 'init_value': -11.472135543823242, 'ave_value': -13.608862043364258, 'soft_opc': nan} step=6536




2022-04-20 19:11.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191047/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:11.04 [info     ] FQE_20220420191047: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00014795258987781613, 'time_algorithm_update': 0.001940273268278255, 'loss': 0.292406581282659, 'time_step': 0.002155325440473335, 'init_value': -11.548870086669922, 'ave_value': -13.983587577573216, 'soft_opc': nan} step=6880




2022-04-20 19:11.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191047/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:11.05 [info     ] FQE_20220420191047: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00014789575754210006, 'time_algorithm_update': 0.0019924335701521053, 'loss': 0.3223339607018631, 'time_step': 0.0022093431894169295, 'init_value': -12.597471237182617, 'ave_value': -15.186825858462635, 'soft_opc': nan} step=7224




2022-04-20 19:11.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191047/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:11.06 [info     ] FQE_20220420191047: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00014777862748434378, 'time_algorithm_update': 0.0019779212253038274, 'loss': 0.35346272982505345, 'time_step': 0.00219277725663296, 'init_value': -13.10120677947998, 'ave_value': -15.825760417686658, 'soft_opc': nan} step=7568




2022-04-20 19:11.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191047/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:11.07 [info     ] FQE_20220420191047: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00014888616495354232, 'time_algorithm_update': 0.0020085725673409396, 'loss': 0.38290858976434655, 'time_step': 0.002226884281912515, 'init_value': -13.277412414550781, 'ave_value': -16.20572338217685, 'soft_opc': nan} step=7912




2022-04-20 19:11.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191047/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:11.07 [info     ] FQE_20220420191047: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00014935953672542127, 'time_algorithm_update': 0.0019846572432407113, 'loss': 0.42042009075995274, 'time_step': 0.0022020492442818575, 'init_value': -13.893298149108887, 'ave_value': -16.9805994773292, 'soft_opc': nan} step=8256




2022-04-20 19:11.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191047/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:11.08 [info     ] FQE_20220420191047: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00015136946079342864, 'time_algorithm_update': 0.0020005231679872024, 'loss': 0.44751132922896814, 'time_step': 0.002221088076746741, 'init_value': -14.308304786682129, 'ave_value': -17.589300271708453, 'soft_opc': nan} step=8600




2022-04-20 19:11.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191047/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:11.09 [info     ] FQE_20220420191047: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.0001491918120273324, 'time_algorithm_update': 0.0020380824111228767, 'loss': 0.4760032490783826, 'time_step': 0.0022582183050554854, 'init_value': -14.741198539733887, 'ave_value': -18.276409014373986, 'soft_opc': nan} step=8944




2022-04-20 19:11.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191047/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:11.10 [info     ] FQE_20220420191047: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.0001500068708907726, 'time_algorithm_update': 0.0020315293655839076, 'loss': 0.5095090068075373, 'time_step': 0.0022501342518385066, 'init_value': -14.861444473266602, 'ave_value': -18.63383099617403, 'soft_opc': nan} step=9288




2022-04-20 19:11.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191047/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:11.11 [info     ] FQE_20220420191047: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00014590731886930243, 'time_algorithm_update': 0.0019521290479704392, 'loss': 0.5325555975527264, 'time_step': 0.0021653549615726912, 'init_value': -15.407941818237305, 'ave_value': -19.367328610351763, 'soft_opc': nan} step=9632




2022-04-20 19:11.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191047/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:11.12 [info     ] FQE_20220420191047: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00014699059863423192, 'time_algorithm_update': 0.0020014366438222487, 'loss': 0.5588160768952654, 'time_step': 0.002217496550360391, 'init_value': -15.443906784057617, 'ave_value': -19.793368961764415, 'soft_opc': nan} step=9976




2022-04-20 19:11.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191047/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:11.12 [info     ] FQE_20220420191047: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00014886260032653809, 'time_algorithm_update': 0.0019926012948501943, 'loss': 0.591648684543839, 'time_step': 0.0022097112134445546, 'init_value': -15.2774658203125, 'ave_value': -19.694432773665937, 'soft_opc': nan} step=10320




2022-04-20 19:11.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191047/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:11.13 [info     ] FQE_20220420191047: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00014872814333716103, 'time_algorithm_update': 0.0020089780175408653, 'loss': 0.6077865348901427, 'time_step': 0.0022281359794527984, 'init_value': -15.733003616333008, 'ave_value': -20.333709056736684, 'soft_opc': nan} step=10664




2022-04-20 19:11.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191047/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:11.14 [info     ] FQE_20220420191047: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00014880576799082202, 'time_algorithm_update': 0.0019891324431397196, 'loss': 0.6207140985616418, 'time_step': 0.002205478590588237, 'init_value': -16.26473045349121, 'ave_value': -21.109378437223835, 'soft_opc': nan} step=11008




2022-04-20 19:11.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191047/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:11.15 [info     ] FQE_20220420191047: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.0001535034456918406, 'time_algorithm_update': 0.002070031193799751, 'loss': 0.6412748661498691, 'time_step': 0.002292168001795924, 'init_value': -16.696441650390625, 'ave_value': -21.805466746956714, 'soft_opc': nan} step=11352




2022-04-20 19:11.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191047/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:11.16 [info     ] FQE_20220420191047: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.0001490746819695761, 'time_algorithm_update': 0.002004678859267124, 'loss': 0.6552097674697465, 'time_step': 0.002223380776338799, 'init_value': -17.046951293945312, 'ave_value': -22.272190616982947, 'soft_opc': nan} step=11696




2022-04-20 19:11.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191047/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:11.17 [info     ] FQE_20220420191047: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00014985300773798034, 'time_algorithm_update': 0.0020334928534751716, 'loss': 0.6546016061126233, 'time_step': 0.0022516125856443894, 'init_value': -17.236684799194336, 'ave_value': -22.72892930974244, 'soft_opc': nan} step=12040




2022-04-20 19:11.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191047/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:11.17 [info     ] FQE_20220420191047: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00014738842498424442, 'time_algorithm_update': 0.001999227806579235, 'loss': 0.6766673258267516, 'time_step': 0.0022176725919856584, 'init_value': -17.473251342773438, 'ave_value': -23.04589960034433, 'soft_opc': nan} step=12384




2022-04-20 19:11.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191047/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:11.18 [info     ] FQE_20220420191047: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00014838299085927564, 'time_algorithm_update': 0.00198445763698844, 'loss': 0.6936974423764231, 'time_step': 0.0022024526152499887, 'init_value': -18.015647888183594, 'ave_value': -23.793008973848845, 'soft_opc': nan} step=12728




2022-04-20 19:11.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191047/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:11.19 [info     ] FQE_20220420191047: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00014878428259561228, 'time_algorithm_update': 0.0020218373731125234, 'loss': 0.7044397399240975, 'time_step': 0.0022413626659748167, 'init_value': -17.700456619262695, 'ave_value': -23.671489644451654, 'soft_opc': nan} step=13072




2022-04-20 19:11.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191047/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:11.20 [info     ] FQE_20220420191047: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00015056895655254986, 'time_algorithm_update': 0.0020204893378324286, 'loss': 0.6965112131912001, 'time_step': 0.002240533052488815, 'init_value': -17.645090103149414, 'ave_value': -23.77136168381585, 'soft_opc': nan} step=13416




2022-04-20 19:11.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191047/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:11.21 [info     ] FQE_20220420191047: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.0001482256623201592, 'time_algorithm_update': 0.001980940962946692, 'loss': 0.6939139610243051, 'time_step': 0.0021978686022204024, 'init_value': -18.109344482421875, 'ave_value': -24.44704998330725, 'soft_opc': nan} step=13760




2022-04-20 19:11.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191047/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:11.22 [info     ] FQE_20220420191047: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00014904210733812908, 'time_algorithm_update': 0.00201473125191622, 'loss': 0.6948192804499508, 'time_step': 0.002232351275377495, 'init_value': -17.642623901367188, 'ave_value': -24.095844872842903, 'soft_opc': nan} step=14104




2022-04-20 19:11.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191047/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:11.22 [info     ] FQE_20220420191047: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00014966310456741687, 'time_algorithm_update': 0.0020124454830968102, 'loss': 0.6995812870865298, 'time_step': 0.0022329951441565224, 'init_value': -17.85253143310547, 'ave_value': -24.345709390043037, 'soft_opc': nan} step=14448




2022-04-20 19:11.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191047/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:11.23 [info     ] FQE_20220420191047: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00015213045963021211, 'time_algorithm_update': 0.002026509406954743, 'loss': 0.7154795126177284, 'time_step': 0.0022485436395157216, 'init_value': -17.71955108642578, 'ave_value': -24.426145093720237, 'soft_opc': nan} step=14792




2022-04-20 19:11.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191047/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:11.24 [info     ] FQE_20220420191047: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.0001506923243056896, 'time_algorithm_update': 0.0020274228827897893, 'loss': 0.7279279431988767, 'time_step': 0.002247194911158362, 'init_value': -17.65401840209961, 'ave_value': -24.454532775539604, 'soft_opc': nan} step=15136




2022-04-20 19:11.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191047/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:11.25 [info     ] FQE_20220420191047: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00015058073886605196, 'time_algorithm_update': 0.001980220162591269, 'loss': 0.737028350089785, 'time_step': 0.002200545266617176, 'init_value': -18.426551818847656, 'ave_value': -25.100407519953343, 'soft_opc': nan} step=15480




2022-04-20 19:11.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191047/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:11.26 [info     ] FQE_20220420191047: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00015006301014922386, 'time_algorithm_update': 0.0020310116368670796, 'loss': 0.7400020680845131, 'time_step': 0.002251372780910758, 'init_value': -18.65552520751953, 'ave_value': -25.422086356644023, 'soft_opc': nan} step=15824




2022-04-20 19:11.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191047/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:11.27 [info     ] FQE_20220420191047: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00014868447946947674, 'time_algorithm_update': 0.0020276183305784714, 'loss': 0.7582842827460534, 'time_step': 0.002245191917863003, 'init_value': -18.82132339477539, 'ave_value': -25.584363139964438, 'soft_opc': nan} step=16168




2022-04-20 19:11.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191047/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:11.28 [info     ] FQE_20220420191047: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00014949607294659282, 'time_algorithm_update': 0.0020107689291931864, 'loss': 0.7536912885505359, 'time_step': 0.002228582321211349, 'init_value': -18.655900955200195, 'ave_value': -25.65643677434104, 'soft_opc': nan} step=16512




2022-04-20 19:11.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191047/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:11.28 [info     ] FQE_20220420191047: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00014605910279030023, 'time_algorithm_update': 0.0019764713076658026, 'loss': 0.7520851299794781, 'time_step': 0.0021927148796791252, 'init_value': -18.756000518798828, 'ave_value': -25.7473905021954, 'soft_opc': nan} step=16856




2022-04-20 19:11.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191047/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:11.29 [info     ] FQE_20220420191047: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00015336067177528558, 'time_algorithm_update': 0.0019782178623731746, 'loss': 0.7556567845248812, 'time_step': 0.0022013471570125846, 'init_value': -19.208126068115234, 'ave_value': -26.34042938075339, 'soft_opc': nan} step=17200




2022-04-20 19:11.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191047/model_17200.pt
search iteration:  25
using hyper params:  [0.0037333174750318237, 0.00624477003468608, 5.7940196727116555e-05, 7]
2022-04-20 19:11.29 [debug    ] RoundIterator is selected.
2022-04-20 19:11.29 [info     ] Directory is created at d3rlpy_logs/CQL_20220420191129
2022-04-20 19:11.29 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 19:11.29 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 19:11.29 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220420191129/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0037333174750318237, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, '

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:11.36 [info     ] CQL_20220420191129: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.0003669178276731257, 'time_algorithm_update': 0.017527677859479222, 'temp_loss': 4.531356686737105, 'temp': 0.99024993407796, 'alpha_loss': -16.24794980378179, 'alpha': 1.0166652934593068, 'critic_loss': 51.23024474249946, 'actor_loss': 5.687140195167553, 'time_step': 0.017992931499815824, 'td_error': 7.436087057080573, 'init_value': -15.056800842285156, 'ave_value': -9.410492239930072} step=342
2022-04-20 19:11.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191129/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:11.42 [info     ] CQL_20220420191129: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.00036931944172284754, 'time_algorithm_update': 0.017225968210320724, 'temp_loss': 3.9398324029487477, 'temp': 0.9715070133669335, 'alpha_loss': -6.834781201959354, 'alpha': 1.0391804377237956, 'critic_loss': 30.288885283888433, 'actor_loss': 12.750267756612677, 'time_step': 0.017693947630318983, 'td_error': 6.195304073923458, 'init_value': -23.91306495666504, 'ave_value': -14.6025760989757} step=684
2022-04-20 19:11.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191129/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:11.48 [info     ] CQL_20220420191129: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.0003638581225746556, 'time_algorithm_update': 0.01738199225643225, 'temp_loss': 3.2004398546720805, 'temp': 0.9553361245763232, 'alpha_loss': -3.6560247475989383, 'alpha': 1.0545899251051116, 'critic_loss': 47.84473542999803, 'actor_loss': 20.394676596100567, 'time_step': 0.01784196304298981, 'td_error': 9.513020033739972, 'init_value': -35.25429153442383, 'ave_value': -21.526795459442713} step=1026
2022-04-20 19:11.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191129/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:11.54 [info     ] CQL_20220420191129: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.00037301702108996653, 'time_algorithm_update': 0.017219760961699904, 'temp_loss': 2.710878144230759, 'temp': 0.9406222634845309, 'alpha_loss': -1.1578670717916746, 'alpha': 1.0638397893710443, 'critic_loss': 71.49774207845766, 'actor_loss': 27.87576936978346, 'time_step': 0.017692758325944868, 'td_error': 13.913429494163132, 'init_value': -45.97746658325195, 'ave_value': -27.979819788911993} step=1368
2022-04-20 19:11.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191129/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:12.01 [info     ] CQL_20220420191129: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.0003666340956213879, 'time_algorithm_update': 0.017340483721236737, 'temp_loss': 2.285007433584559, 'temp': 0.9269956688434757, 'alpha_loss': 1.077241412555419, 'alpha': 1.0638856811133044, 'critic_loss': 97.82333400793243, 'actor_loss': 34.80643967857138, 'time_step': 0.01780380003633555, 'td_error': 19.22378904714973, 'init_value': -56.2515983581543, 'ave_value': -33.58808777417014} step=1710
2022-04-20 19:12.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191129/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:12.07 [info     ] CQL_20220420191129: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.0003714408093725729, 'time_algorithm_update': 0.01764307245176438, 'temp_loss': 1.9105930875616464, 'temp': 0.9144806666680944, 'alpha_loss': 3.14331389702203, 'alpha': 1.0504520490155582, 'critic_loss': 127.21739558169716, 'actor_loss': 41.18878067864312, 'time_step': 0.018108715090835302, 'td_error': 23.086855621947727, 'init_value': -63.653533935546875, 'ave_value': -38.205590912092916} step=2052
2022-04-20 19:12.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191129/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:12.14 [info     ] CQL_20220420191129: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.0003690043388054385, 'time_algorithm_update': 0.017702961525721858, 'temp_loss': 1.5676714654205834, 'temp': 0.9029577505170253, 'alpha_loss': 4.805816589043155, 'alpha': 1.0236886338880884, 'critic_loss': 156.52280100326092, 'actor_loss': 46.84262541163037, 'time_step': 0.018168024152343035, 'td_error': 29.466638004825743, 'init_value': -72.03045654296875, 'ave_value': -43.41600699430382} step=2394
2022-04-20 19:12.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191129/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:12.20 [info     ] CQL_20220420191129: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.0003713041718243158, 'time_algorithm_update': 0.017382772345291942, 'temp_loss': 1.2867910387048944, 'temp': 0.8924212633517751, 'alpha_loss': 6.382706838741637, 'alpha': 0.9865288096561766, 'critic_loss': 184.41638295134607, 'actor_loss': 52.07922049851445, 'time_step': 0.01785310179169415, 'td_error': 33.822806836510814, 'init_value': -78.95173645019531, 'ave_value': -46.791395660978445} step=2736
2022-04-20 19:12.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191129/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:12.26 [info     ] CQL_20220420191129: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.00037004166876363474, 'time_algorithm_update': 0.017488923686289647, 'temp_loss': 1.067169356459414, 'temp': 0.8827182152117902, 'alpha_loss': 7.453959658829092, 'alpha': 0.94581786686914, 'critic_loss': 215.01569897389552, 'actor_loss': 56.8965612154955, 'time_step': 0.01795729210502223, 'td_error': 39.8182642349551, 'init_value': -85.4375991821289, 'ave_value': -51.311312634444185} step=3078
2022-04-20 19:12.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191129/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:12.33 [info     ] CQL_20220420191129: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.00036998450407507824, 'time_algorithm_update': 0.01753107358140555, 'temp_loss': 0.9167113522862831, 'temp': 0.8736948247192896, 'alpha_loss': 7.961206238172208, 'alpha': 0.9062989876981367, 'critic_loss': 244.13908707468133, 'actor_loss': 61.10618906969216, 'time_step': 0.01799878042343764, 'td_error': 44.04148374426913, 'init_value': -89.19944763183594, 'ave_value': -54.15292003844504} step=3420
2022-04-20 19:12.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191129/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:12.39 [info     ] CQL_20220420191129: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.0003712860464352613, 'time_algorithm_update': 0.01731304815638135, 'temp_loss': 0.7361817606534177, 'temp': 0.865028944454695, 'alpha_loss': 8.308011073118065, 'alpha': 0.8702707886695862, 'critic_loss': 273.657465616862, 'actor_loss': 65.09278005187275, 'time_step': 0.017783371328610426, 'td_error': 45.83229452583529, 'init_value': -97.90089416503906, 'ave_value': -59.67935231369492} step=3762
2022-04-20 19:12.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191129/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:12.46 [info     ] CQL_20220420191129: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.0003711117638481988, 'time_algorithm_update': 0.01747447775121321, 'temp_loss': 0.5950622326253276, 'temp': 0.8571104510137212, 'alpha_loss': 8.67885797205027, 'alpha': 0.8368916663161495, 'critic_loss': 298.0880645395022, 'actor_loss': 68.47793856838294, 'time_step': 0.017945605411864164, 'td_error': 48.7703469140281, 'init_value': -103.36018371582031, 'ave_value': -62.51896916035328} step=4104
2022-04-20 19:12.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191129/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:12.52 [info     ] CQL_20220420191129: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.0003705108374880071, 'time_algorithm_update': 0.01769758107369406, 'temp_loss': 0.49928047979108947, 'temp': 0.8499201170185156, 'alpha_loss': 8.871941031071177, 'alpha': 0.8051539621157953, 'critic_loss': 325.08971516570153, 'actor_loss': 71.76318653842858, 'time_step': 0.01816521332277889, 'td_error': 49.94764725655823, 'init_value': -107.92634582519531, 'ave_value': -64.98510102699872} step=4446
2022-04-20 19:12.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191129/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:12.58 [info     ] CQL_20220420191129: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.00036789381016067596, 'time_algorithm_update': 0.017922255727979872, 'temp_loss': 0.3683276787585421, 'temp': 0.8432524732330389, 'alpha_loss': 8.997927580660546, 'alpha': 0.775423191269936, 'critic_loss': 351.8696840520491, 'actor_loss': 74.8225702207688, 'time_step': 0.01838721587643986, 'td_error': 52.0927177377248, 'init_value': -112.2857437133789, 'ave_value': -67.77373906186573} step=4788
2022-04-20 19:12.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191129/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:13.05 [info     ] CQL_20220420191129: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.0003708921677885, 'time_algorithm_update': 0.018042616676865963, 'temp_loss': 0.2849748746235991, 'temp': 0.8376134304623855, 'alpha_loss': 9.320206095600685, 'alpha': 0.7474479999458581, 'critic_loss': 374.43482159173976, 'actor_loss': 77.4364292747096, 'time_step': 0.018514286704927857, 'td_error': 57.344099063606876, 'init_value': -113.8962631225586, 'ave_value': -68.42778411226901} step=5130
2022-04-20 19:13.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191129/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:13.11 [info     ] CQL_20220420191129: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.0003716959590800324, 'time_algorithm_update': 0.017640063637181333, 'temp_loss': 0.18535413850120633, 'temp': 0.8328993735257645, 'alpha_loss': 9.543994350042956, 'alpha': 0.7204994067811129, 'critic_loss': 398.09981211165933, 'actor_loss': 79.95922925179465, 'time_step': 0.018110041729888025, 'td_error': 65.4402880352798, 'init_value': -118.97996520996094, 'ave_value': -70.61738283592838} step=5472
2022-04-20 19:13.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191129/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:13.18 [info     ] CQL_20220420191129: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.00037535380201730113, 'time_algorithm_update': 0.01748213293956734, 'temp_loss': 0.16266632839296644, 'temp': 0.829088691905228, 'alpha_loss': 9.276896480928388, 'alpha': 0.6948043780717236, 'critic_loss': 420.85731140493647, 'actor_loss': 82.10141002365023, 'time_step': 0.017957377154924715, 'td_error': 63.765357997097695, 'init_value': -124.94905090332031, 'ave_value': -74.55570467869978} step=5814
2022-04-20 19:13.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191129/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:13.24 [info     ] CQL_20220420191129: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.0003677641439159014, 'time_algorithm_update': 0.017686942864579763, 'temp_loss': 0.06428431598633005, 'temp': 0.8261860731052376, 'alpha_loss': 9.435243345143503, 'alpha': 0.6710936502057906, 'critic_loss': 435.53615958230534, 'actor_loss': 83.9783500314456, 'time_step': 0.01815515861176608, 'td_error': 63.948697194942035, 'init_value': -128.2864532470703, 'ave_value': -74.08903234812456} step=6156
2022-04-20 19:13.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191129/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:13.31 [info     ] CQL_20220420191129: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.00037105669055068703, 'time_algorithm_update': 0.01765512025844284, 'temp_loss': 0.027348377801782904, 'temp': 0.824846739657441, 'alpha_loss': 9.49382392007705, 'alpha': 0.6480046549038581, 'critic_loss': 452.3489790353162, 'actor_loss': 85.9884173967685, 'time_step': 0.018124538555479887, 'td_error': 78.41065899697571, 'init_value': -129.69815063476562, 'ave_value': -76.23706413842133} step=6498
2022-04-20 19:13.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191129/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:13.37 [info     ] CQL_20220420191129: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.00036633363244129204, 'time_algorithm_update': 0.01767747792584157, 'temp_loss': -0.014657937834442358, 'temp': 0.8247833617946558, 'alpha_loss': 9.919522146035357, 'alpha': 0.6254752130187743, 'critic_loss': 475.12231775473435, 'actor_loss': 88.16596375850209, 'time_step': 0.01814390413942393, 'td_error': 84.31371409124976, 'init_value': -131.93295288085938, 'ave_value': -77.23530981608944} step=6840
2022-04-20 19:13.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191129/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:13.44 [info     ] CQL_20220420191129: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.0003634712152313768, 'time_algorithm_update': 0.01772712615498325, 'temp_loss': -0.061032604061241384, 'temp': 0.8261356953291865, 'alpha_loss': 9.063457052610074, 'alpha': 0.6043419994805989, 'critic_loss': 493.2293826097633, 'actor_loss': 89.46256876270674, 'time_step': 0.018188283457393536, 'td_error': 81.487145513471, 'init_value': -138.38150024414062, 'ave_value': -78.66246967004226} step=7182
2022-04-20 19:13.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191129/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:13.50 [info     ] CQL_20220420191129: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.0003661140363815932, 'time_algorithm_update': 0.017672416062382925, 'temp_loss': -0.07906174348743512, 'temp': 0.8291299749187558, 'alpha_loss': 9.385117865445322, 'alpha': 0.5847346120410495, 'critic_loss': 508.0407638103641, 'actor_loss': 90.95693641796447, 'time_step': 0.01813723051060013, 'td_error': 81.12065704244638, 'init_value': -142.04443359375, 'ave_value': -82.08462496444986} step=7524
2022-04-20 19:13.50 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191129/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:13.56 [info     ] CQL_20220420191129: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.0003666787119636759, 'time_algorithm_update': 0.017499806588156183, 'temp_loss': -0.09678658005939415, 'temp': 0.833353369730955, 'alpha_loss': 9.368653197037546, 'alpha': 0.5652051805055629, 'critic_loss': 526.7344816330581, 'actor_loss': 92.49484839634589, 'time_step': 0.017963430337738572, 'td_error': 94.51149050344425, 'init_value': -144.21914672851562, 'ave_value': -83.77172035024577} step=7866
2022-04-20 19:13.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191129/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:14.03 [info     ] CQL_20220420191129: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.00036323279665227524, 'time_algorithm_update': 0.01762339106777258, 'temp_loss': -0.09662568770697591, 'temp': 0.8377549888794882, 'alpha_loss': 9.515281123724597, 'alpha': 0.5462534208046762, 'critic_loss': 546.8237088744403, 'actor_loss': 93.90350709881699, 'time_step': 0.01808300074080975, 'td_error': 122.6742994558064, 'init_value': -147.90005493164062, 'ave_value': -82.96641877656518} step=8208
2022-04-20 19:14.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191129/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:14.09 [info     ] CQL_20220420191129: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.000368197758992513, 'time_algorithm_update': 0.0176064521945708, 'temp_loss': -0.12747935029236893, 'temp': 0.8437816206126185, 'alpha_loss': 9.925522811231557, 'alpha': 0.5275645886945446, 'critic_loss': 568.3699818215175, 'actor_loss': 95.67814263684011, 'time_step': 0.018074701404013828, 'td_error': 105.38156652073044, 'init_value': -153.18675231933594, 'ave_value': -85.55276253915585} step=8550
2022-04-20 19:14.09 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191129/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:14.16 [info     ] CQL_20220420191129: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.0003687547661407649, 'time_algorithm_update': 0.01754441944479245, 'temp_loss': -0.08666971847632823, 'temp': 0.8495158513038479, 'alpha_loss': 9.706250677331846, 'alpha': 0.5095619271721756, 'critic_loss': 593.8323006434747, 'actor_loss': 96.96606190999348, 'time_step': 0.01801454114635088, 'td_error': 176.2745096452169, 'init_value': -154.26968383789062, 'ave_value': -87.13804993964396} step=8892
2022-04-20 19:14.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191129/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:14.22 [info     ] CQL_20220420191129: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.0003745765016790022, 'time_algorithm_update': 0.01755738467500921, 'temp_loss': -0.09531454623769424, 'temp': 0.8543826399142282, 'alpha_loss': 9.821517219320375, 'alpha': 0.49241664081986186, 'critic_loss': 618.2468454461349, 'actor_loss': 98.33414173683924, 'time_step': 0.018030662982784516, 'td_error': 216.92840556857948, 'init_value': -157.79193115234375, 'ave_value': -88.56780205134739} step=9234
2022-04-20 19:14.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191129/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:14.29 [info     ] CQL_20220420191129: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.0003735363831994129, 'time_algorithm_update': 0.017457080863372623, 'temp_loss': -0.10293240337605365, 'temp': 0.8598380773736719, 'alpha_loss': 10.289082023832533, 'alpha': 0.4756913038722256, 'critic_loss': 644.5560071621722, 'actor_loss': 100.03527642411795, 'time_step': 0.017928274054276317, 'td_error': 233.94747062009336, 'init_value': -161.79507446289062, 'ave_value': -90.17722097019288} step=9576
2022-04-20 19:14.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191129/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:14.35 [info     ] CQL_20220420191129: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.0003696171163815504, 'time_algorithm_update': 0.01754078251576563, 'temp_loss': -0.07378906319571431, 'temp': 0.8642024373450474, 'alpha_loss': 10.64943374248973, 'alpha': 0.4592327139182398, 'critic_loss': 676.8374250980846, 'actor_loss': 101.75668861433776, 'time_step': 0.01800864969777782, 'td_error': 307.3299010231294, 'init_value': -166.74908447265625, 'ave_value': -91.16639455027543} step=9918
2022-04-20 19:14.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191129/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:14.41 [info     ] CQL_20220420191129: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.00036604223195572347, 'time_algorithm_update': 0.01752591899961059, 'temp_loss': 0.02778767385895838, 'temp': 0.8661467371976863, 'alpha_loss': 6.804245653208236, 'alpha': 0.44536939054204705, 'critic_loss': 703.094047591003, 'actor_loss': 101.96638511077703, 'time_step': 0.01799213189130638, 'td_error': 100.50646417360379, 'init_value': -160.1937255859375, 'ave_value': -89.65758592556726} step=10260
2022-04-20 19:14.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191129/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:14.48 [info     ] CQL_20220420191129: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.0003670147287915325, 'time_algorithm_update': 0.01741754799558405, 'temp_loss': 0.0075955653116542695, 'temp': 0.8647335471465574, 'alpha_loss': 5.848721181440075, 'alpha': 0.4355137613084581, 'critic_loss': 695.2035599201046, 'actor_loss': 101.22092312818383, 'time_step': 0.017884481022929586, 'td_error': 73.40230969180064, 'init_value': -158.21798706054688, 'ave_value': -88.829320155703} step=10602
2022-04-20 19:14.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191129/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:14.54 [info     ] CQL_20220420191129: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.0003652935139617028, 'time_algorithm_update': 0.01742610666486952, 'temp_loss': 0.0033330548206093715, 'temp': 0.8652041310455367, 'alpha_loss': 5.862919779548868, 'alpha': 0.4254698923282456, 'critic_loss': 680.2245269239994, 'actor_loss': 100.78305176405878, 'time_step': 0.017891080058806123, 'td_error': 68.57427357635905, 'init_value': -156.5607147216797, 'ave_value': -88.38875155159147} step=10944
2022-04-20 19:14.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191129/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:15.00 [info     ] CQL_20220420191129: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.0003487296968872784, 'time_algorithm_update': 0.01640984677431876, 'temp_loss': -0.027796338219740237, 'temp': 0.8657392359267898, 'alpha_loss': 5.713891765527558, 'alpha': 0.4149422636854718, 'critic_loss': 668.0365836160224, 'actor_loss': 100.32396298681783, 'time_step': 0.0168507224635074, 'td_error': 71.94765775195901, 'init_value': -156.83566284179688, 'ave_value': -90.69918328775277} step=11286
2022-04-20 19:15.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191129/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:15.06 [info     ] CQL_20220420191129: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.00035417567916780883, 'time_algorithm_update': 0.01666095312575848, 'temp_loss': 0.03848491006732335, 'temp': 0.8652372914448119, 'alpha_loss': 5.358378795852438, 'alpha': 0.40444581689890363, 'critic_loss': 658.4706169262267, 'actor_loss': 99.96363002933256, 'time_step': 0.01711167996389824, 'td_error': 71.33457395748788, 'init_value': -153.36294555664062, 'ave_value': -89.93864567659594} step=11628
2022-04-20 19:15.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191129/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:15.12 [info     ] CQL_20220420191129: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.0003702584763019406, 'time_algorithm_update': 0.017562323843526562, 'temp_loss': 0.04744053907605291, 'temp': 0.8632450295470612, 'alpha_loss': 5.262768309715896, 'alpha': 0.39384801772951383, 'critic_loss': 648.5061265376576, 'actor_loss': 99.728719097829, 'time_step': 0.018034205102083974, 'td_error': 63.48267221340202, 'init_value': -151.49794006347656, 'ave_value': -89.82907606718709} step=11970
2022-04-20 19:15.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191129/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:15.19 [info     ] CQL_20220420191129: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.00036922881477757504, 'time_algorithm_update': 0.017345714987370007, 'temp_loss': 0.06248505065511716, 'temp': 0.8590031628720245, 'alpha_loss': 5.056742466681185, 'alpha': 0.38317233803328016, 'critic_loss': 637.800967121682, 'actor_loss': 99.44606631541113, 'time_step': 0.017810360730042933, 'td_error': 65.23518398817626, 'init_value': -147.06761169433594, 'ave_value': -88.44185148415802} step=12312
2022-04-20 19:15.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191129/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:15.25 [info     ] CQL_20220420191129: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.0003369098518326966, 'time_algorithm_update': 0.015798413265518278, 'temp_loss': 0.06089972440501926, 'temp': 0.8545949244708345, 'alpha_loss': 4.809081642948396, 'alpha': 0.37269079502214464, 'critic_loss': 635.7698676572209, 'actor_loss': 99.41461215102882, 'time_step': 0.016226141773469267, 'td_error': 70.85440260502156, 'init_value': -150.30935668945312, 'ave_value': -89.6475753819137} step=12654
2022-04-20 19:15.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191129/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:15.31 [info     ] CQL_20220420191129: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.00037682335279141255, 'time_algorithm_update': 0.01775573638447544, 'temp_loss': 0.04288436447362802, 'temp': 0.8520517561867921, 'alpha_loss': 4.713690198652926, 'alpha': 0.3623956677160765, 'critic_loss': 627.5077898348981, 'actor_loss': 99.15011128208093, 'time_step': 0.018235426897193954, 'td_error': 62.19679071600464, 'init_value': -146.66757202148438, 'ave_value': -89.24982614163879} step=12996
2022-04-20 19:15.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191129/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:15.37 [info     ] CQL_20220420191129: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.0003742258451138323, 'time_algorithm_update': 0.017624525298849184, 'temp_loss': 0.0446949224940866, 'temp': 0.8506066340800614, 'alpha_loss': 4.515299964369389, 'alpha': 0.3519771625423989, 'critic_loss': 615.0162606936449, 'actor_loss': 98.70206895348622, 'time_step': 0.018099372149908054, 'td_error': 61.91500932316841, 'init_value': -142.1680450439453, 'ave_value': -87.25639779281374} step=13338
2022-04-20 19:15.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191129/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:15.44 [info     ] CQL_20220420191129: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.0003790067650421321, 'time_algorithm_update': 0.017678294265479372, 'temp_loss': 0.05755720522842909, 'temp': 0.8465620502393846, 'alpha_loss': 4.383785427662364, 'alpha': 0.3420369243412687, 'critic_loss': 604.8136298642521, 'actor_loss': 98.33378161603247, 'time_step': 0.01815754767746953, 'td_error': 63.7356904152975, 'init_value': -138.5904998779297, 'ave_value': -87.94407027957273} step=13680
2022-04-20 19:15.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191129/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:15.50 [info     ] CQL_20220420191129: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.00037882969393367654, 'time_algorithm_update': 0.017788205927575542, 'temp_loss': 0.027224394443788026, 'temp': 0.8444995477534177, 'alpha_loss': 4.217283785691735, 'alpha': 0.3320665182600244, 'critic_loss': 594.1208834285625, 'actor_loss': 97.95117508737664, 'time_step': 0.018268650038200513, 'td_error': 52.59317584469214, 'init_value': -137.5647430419922, 'ave_value': -87.3438881913539} step=14022
2022-04-20 19:15.50 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191129/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:15.57 [info     ] CQL_20220420191129: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.00037614016505012733, 'time_algorithm_update': 0.01776650147131312, 'temp_loss': 0.04914671552438311, 'temp': 0.8422221015071312, 'alpha_loss': 4.054220191916527, 'alpha': 0.32246372225688913, 'critic_loss': 584.4518003407975, 'actor_loss': 97.47933388871756, 'time_step': 0.01824437944512618, 'td_error': 59.22524777706364, 'init_value': -138.7695770263672, 'ave_value': -86.01079822208766} step=14364
2022-04-20 19:15.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191129/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:16.03 [info     ] CQL_20220420191129: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.00037574280075162477, 'time_algorithm_update': 0.01755909334149277, 'temp_loss': 0.07283431144529268, 'temp': 0.8393800891282266, 'alpha_loss': 3.8953476523795323, 'alpha': 0.3130514151171634, 'critic_loss': 576.4022567481326, 'actor_loss': 97.24444836622094, 'time_step': 0.0180344149383188, 'td_error': 60.028667086658345, 'init_value': -135.0299072265625, 'ave_value': -85.65153928227387} step=14706
2022-04-20 19:16.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191129/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:16.10 [info     ] CQL_20220420191129: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.0003724342201188294, 'time_algorithm_update': 0.017722346629315648, 'temp_loss': 0.09892303940475161, 'temp': 0.8334128989113702, 'alpha_loss': 3.808031570144564, 'alpha': 0.30389142419859677, 'critic_loss': 567.9099652920551, 'actor_loss': 96.94209932026111, 'time_step': 0.018193463833011382, 'td_error': 55.9624051319596, 'init_value': -137.54006958007812, 'ave_value': -87.3333028773668} step=15048
2022-04-20 19:16.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191129/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:16.16 [info     ] CQL_20220420191129: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.00037754279131080673, 'time_algorithm_update': 0.017649057315804108, 'temp_loss': 0.11877212535088871, 'temp': 0.8269843178534368, 'alpha_loss': 3.642275803967526, 'alpha': 0.29499356692645984, 'critic_loss': 560.1874976799502, 'actor_loss': 96.70274498030456, 'time_step': 0.018125547303093806, 'td_error': 52.19974496502265, 'init_value': -133.52969360351562, 'ave_value': -85.58903017495733} step=15390
2022-04-20 19:16.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191129/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:16.23 [info     ] CQL_20220420191129: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.00037359982206110366, 'time_algorithm_update': 0.01761087827515184, 'temp_loss': 0.09983243812855921, 'temp': 0.8207976737914727, 'alpha_loss': 3.5299982215925962, 'alpha': 0.28626926530871477, 'critic_loss': 551.1052359419259, 'actor_loss': 96.31190537569816, 'time_step': 0.01808668856035199, 'td_error': 54.15916000777864, 'init_value': -134.4757080078125, 'ave_value': -86.6679269196677} step=15732
2022-04-20 19:16.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191129/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:16.29 [info     ] CQL_20220420191129: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.0003767871020133035, 'time_algorithm_update': 0.017761540691754972, 'temp_loss': 0.10612011694877643, 'temp': 0.815627231932523, 'alpha_loss': 3.2909222053505522, 'alpha': 0.2779469848201986, 'critic_loss': 543.4010911015739, 'actor_loss': 95.99073207029822, 'time_step': 0.018238851201464557, 'td_error': 55.41662191706538, 'init_value': -132.01377868652344, 'ave_value': -86.14214957968072} step=16074
2022-04-20 19:16.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191129/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:16.36 [info     ] CQL_20220420191129: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.0003801235678600289, 'time_algorithm_update': 0.017772388737104093, 'temp_loss': 0.1171174123284151, 'temp': 0.8085343163264426, 'alpha_loss': 3.2390256632141203, 'alpha': 0.2698580548602935, 'critic_loss': 535.5201291982194, 'actor_loss': 95.65850981773688, 'time_step': 0.0182521615112037, 'td_error': 53.9530035248911, 'init_value': -133.55136108398438, 'ave_value': -86.32663517656225} step=16416
2022-04-20 19:16.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191129/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:16.42 [info     ] CQL_20220420191129: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.00037825595565706664, 'time_algorithm_update': 0.01778509045204921, 'temp_loss': 0.14381368792131108, 'temp': 0.8010600836653459, 'alpha_loss': 3.0282018418200534, 'alpha': 0.2620300809193773, 'critic_loss': 527.0234341983906, 'actor_loss': 95.2278310541521, 'time_step': 0.01826591380158363, 'td_error': 52.889851741457846, 'init_value': -128.7416229248047, 'ave_value': -85.1327558269298} step=16758
2022-04-20 19:16.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191129/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:16.49 [info     ] CQL_20220420191129: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.00037483583416855125, 'time_algorithm_update': 0.01780728150529471, 'temp_loss': 0.12674427018310244, 'temp': 0.7943298126521864, 'alpha_loss': 2.8496119226628576, 'alpha': 0.2545535358769155, 'critic_loss': 519.8177169889037, 'actor_loss': 94.90262788917586, 'time_step': 0.018282693031935665, 'td_error': 49.94063768730687, 'init_value': -124.5820083618164, 'ave_value': -83.28358949753012} step=17100
2022-04-20 19:16.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191129/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.519

Epoch 1/50:   0%|          | 0/177 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 19:16.49 [info     ] FQE_20220420191649: epoch=1 step=177 epoch=1 metrics={'time_sample_batch': 0.00015621266122591698, 'time_algorithm_update': 0.002122138179628189, 'loss': 0.006839482245261723, 'time_step': 0.002352054509739418, 'init_value': -0.4875377118587494, 'ave_value': -0.43922884661156136, 'soft_opc': nan} step=177




2022-04-20 19:16.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191649/model_177.pt


Epoch 2/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:16.50 [info     ] FQE_20220420191649: epoch=2 step=354 epoch=2 metrics={'time_sample_batch': 0.0001567110503460728, 'time_algorithm_update': 0.0021018860703807766, 'loss': 0.0057561528384895986, 'time_step': 0.0023243993015612586, 'init_value': -0.5877999663352966, 'ave_value': -0.5075947137387308, 'soft_opc': nan} step=354




2022-04-20 19:16.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191649/model_354.pt


Epoch 3/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:16.50 [info     ] FQE_20220420191649: epoch=3 step=531 epoch=3 metrics={'time_sample_batch': 0.0001583220595020359, 'time_algorithm_update': 0.0021036708422299834, 'loss': 0.005527052007573472, 'time_step': 0.0023304931188033798, 'init_value': -0.6725394129753113, 'ave_value': -0.5697516090071595, 'soft_opc': nan} step=531




2022-04-20 19:16.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191649/model_531.pt


Epoch 4/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:16.51 [info     ] FQE_20220420191649: epoch=4 step=708 epoch=4 metrics={'time_sample_batch': 0.00015854431410967294, 'time_algorithm_update': 0.0020619327739133672, 'loss': 0.0052389931012639555, 'time_step': 0.0022923084302137126, 'init_value': -0.6875832676887512, 'ave_value': -0.576511520442662, 'soft_opc': nan} step=708




2022-04-20 19:16.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191649/model_708.pt


Epoch 5/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:16.51 [info     ] FQE_20220420191649: epoch=5 step=885 epoch=5 metrics={'time_sample_batch': 0.000158704606826696, 'time_algorithm_update': 0.0020804405212402344, 'loss': 0.005240395820389192, 'time_step': 0.002307451377480717, 'init_value': -0.7227621078491211, 'ave_value': -0.5958013368082477, 'soft_opc': nan} step=885




2022-04-20 19:16.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191649/model_885.pt


Epoch 6/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:16.51 [info     ] FQE_20220420191649: epoch=6 step=1062 epoch=6 metrics={'time_sample_batch': 0.0001544534823315292, 'time_algorithm_update': 0.002050953396296097, 'loss': 0.005055238722986673, 'time_step': 0.002272464461245779, 'init_value': -0.7302655577659607, 'ave_value': -0.6022827243482745, 'soft_opc': nan} step=1062




2022-04-20 19:16.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191649/model_1062.pt


Epoch 7/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:16.52 [info     ] FQE_20220420191649: epoch=7 step=1239 epoch=7 metrics={'time_sample_batch': 0.00015381231146343683, 'time_algorithm_update': 0.0020400939014671884, 'loss': 0.0048518411862521855, 'time_step': 0.0022600464901681673, 'init_value': -0.7481119632720947, 'ave_value': -0.5999995546029495, 'soft_opc': nan} step=1239




2022-04-20 19:16.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191649/model_1239.pt


Epoch 8/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:16.52 [info     ] FQE_20220420191649: epoch=8 step=1416 epoch=8 metrics={'time_sample_batch': 0.00015683362712967867, 'time_algorithm_update': 0.0020829378548315016, 'loss': 0.004683948782309469, 'time_step': 0.0023085680385093905, 'init_value': -0.7594168186187744, 'ave_value': -0.585481469834352, 'soft_opc': nan} step=1416




2022-04-20 19:16.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191649/model_1416.pt


Epoch 9/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:16.53 [info     ] FQE_20220420191649: epoch=9 step=1593 epoch=9 metrics={'time_sample_batch': 0.00015856586607162563, 'time_algorithm_update': 0.0020719719471904516, 'loss': 0.0045028517409464565, 'time_step': 0.0022977260546495687, 'init_value': -0.7860080599784851, 'ave_value': -0.5944595752490891, 'soft_opc': nan} step=1593




2022-04-20 19:16.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191649/model_1593.pt


Epoch 10/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:16.53 [info     ] FQE_20220420191649: epoch=10 step=1770 epoch=10 metrics={'time_sample_batch': 0.00015702490079200874, 'time_algorithm_update': 0.002109493912950074, 'loss': 0.004501481222825409, 'time_step': 0.002334563745617193, 'init_value': -0.8590767979621887, 'ave_value': -0.6517482547475411, 'soft_opc': nan} step=1770




2022-04-20 19:16.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191649/model_1770.pt


Epoch 11/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:16.54 [info     ] FQE_20220420191649: epoch=11 step=1947 epoch=11 metrics={'time_sample_batch': 0.00015708416868737862, 'time_algorithm_update': 0.002114132972760389, 'loss': 0.004615610257569847, 'time_step': 0.0023381238603322517, 'init_value': -0.8843827247619629, 'ave_value': -0.6318756515960078, 'soft_opc': nan} step=1947




2022-04-20 19:16.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191649/model_1947.pt


Epoch 12/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:16.54 [info     ] FQE_20220420191649: epoch=12 step=2124 epoch=12 metrics={'time_sample_batch': 0.0001586601559051686, 'time_algorithm_update': 0.00208648449957034, 'loss': 0.00459753236822826, 'time_step': 0.002313133013450493, 'init_value': -0.9439970850944519, 'ave_value': -0.6625666889521453, 'soft_opc': nan} step=2124




2022-04-20 19:16.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191649/model_2124.pt


Epoch 13/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:16.55 [info     ] FQE_20220420191649: epoch=13 step=2301 epoch=13 metrics={'time_sample_batch': 0.00015958419627388993, 'time_algorithm_update': 0.00208432795637745, 'loss': 0.004632932938356479, 'time_step': 0.002311806220792781, 'init_value': -1.0093460083007812, 'ave_value': -0.7007168651208864, 'soft_opc': nan} step=2301




2022-04-20 19:16.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191649/model_2301.pt


Epoch 14/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:16.55 [info     ] FQE_20220420191649: epoch=14 step=2478 epoch=14 metrics={'time_sample_batch': 0.00016062946642859507, 'time_algorithm_update': 0.002078216628166242, 'loss': 0.005510426164567891, 'time_step': 0.0023066526078908455, 'init_value': -1.1226483583450317, 'ave_value': -0.757504593243112, 'soft_opc': nan} step=2478




2022-04-20 19:16.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191649/model_2478.pt


Epoch 15/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:16.56 [info     ] FQE_20220420191649: epoch=15 step=2655 epoch=15 metrics={'time_sample_batch': 0.00015670566235558462, 'time_algorithm_update': 0.0020544421201371876, 'loss': 0.005860480511466325, 'time_step': 0.002277366185592393, 'init_value': -1.1906365156173706, 'ave_value': -0.7804900674087866, 'soft_opc': nan} step=2655




2022-04-20 19:16.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191649/model_2655.pt


Epoch 16/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:16.56 [info     ] FQE_20220420191649: epoch=16 step=2832 epoch=16 metrics={'time_sample_batch': 0.00015505559026858227, 'time_algorithm_update': 0.0020989092056360624, 'loss': 0.006277545936021554, 'time_step': 0.0023211759362517102, 'init_value': -1.3009920120239258, 'ave_value': -0.8068146468707929, 'soft_opc': nan} step=2832




2022-04-20 19:16.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191649/model_2832.pt


Epoch 17/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:16.56 [info     ] FQE_20220420191649: epoch=17 step=3009 epoch=17 metrics={'time_sample_batch': 0.00015117354312185513, 'time_algorithm_update': 0.002062200826440154, 'loss': 0.006665412301071367, 'time_step': 0.0022786822022691283, 'init_value': -1.3372238874435425, 'ave_value': -0.7982909472519392, 'soft_opc': nan} step=3009




2022-04-20 19:16.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191649/model_3009.pt


Epoch 18/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:16.57 [info     ] FQE_20220420191649: epoch=18 step=3186 epoch=18 metrics={'time_sample_batch': 0.00015873828176724708, 'time_algorithm_update': 0.002173835948362189, 'loss': 0.007515556467065619, 'time_step': 0.0024072032863810913, 'init_value': -1.4275304079055786, 'ave_value': -0.8459773389338697, 'soft_opc': nan} step=3186




2022-04-20 19:16.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191649/model_3186.pt


Epoch 19/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:16.57 [info     ] FQE_20220420191649: epoch=19 step=3363 epoch=19 metrics={'time_sample_batch': 0.00015812270385397357, 'time_algorithm_update': 0.0020840194939220017, 'loss': 0.007963579801821524, 'time_step': 0.0023142564094672767, 'init_value': -1.5312622785568237, 'ave_value': -0.8724850204129991, 'soft_opc': nan} step=3363




2022-04-20 19:16.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191649/model_3363.pt


Epoch 20/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:16.58 [info     ] FQE_20220420191649: epoch=20 step=3540 epoch=20 metrics={'time_sample_batch': 0.00015287076012562896, 'time_algorithm_update': 0.0020652841039970095, 'loss': 0.008954898907413633, 'time_step': 0.0022876235724842483, 'init_value': -1.6291910409927368, 'ave_value': -0.9197317847377948, 'soft_opc': nan} step=3540




2022-04-20 19:16.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191649/model_3540.pt


Epoch 21/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:16.58 [info     ] FQE_20220420191649: epoch=21 step=3717 epoch=21 metrics={'time_sample_batch': 0.0001580984578967768, 'time_algorithm_update': 0.0020648422887769797, 'loss': 0.009988107473200114, 'time_step': 0.002293237858572922, 'init_value': -1.7250691652297974, 'ave_value': -0.938077073328913, 'soft_opc': nan} step=3717




2022-04-20 19:16.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191649/model_3717.pt


Epoch 22/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:16.59 [info     ] FQE_20220420191649: epoch=22 step=3894 epoch=22 metrics={'time_sample_batch': 0.00015576545801539878, 'time_algorithm_update': 0.0020956912283170022, 'loss': 0.010688456646193051, 'time_step': 0.002321553095585882, 'init_value': -1.8284244537353516, 'ave_value': -0.9919376412538011, 'soft_opc': nan} step=3894




2022-04-20 19:16.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191649/model_3894.pt


Epoch 23/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:16.59 [info     ] FQE_20220420191649: epoch=23 step=4071 epoch=23 metrics={'time_sample_batch': 0.00015461916303904043, 'time_algorithm_update': 0.002072712795882575, 'loss': 0.01147938565821992, 'time_step': 0.0022979846781930008, 'init_value': -1.9062532186508179, 'ave_value': -0.9876926972516799, 'soft_opc': nan} step=4071




2022-04-20 19:16.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191649/model_4071.pt


Epoch 24/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:17.00 [info     ] FQE_20220420191649: epoch=24 step=4248 epoch=24 metrics={'time_sample_batch': 0.0001549801584017479, 'time_algorithm_update': 0.0020572465691862806, 'loss': 0.013021430020394215, 'time_step': 0.0022779669465318236, 'init_value': -2.013876438140869, 'ave_value': -0.9904097500039046, 'soft_opc': nan} step=4248




2022-04-20 19:17.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191649/model_4248.pt


Epoch 25/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:17.00 [info     ] FQE_20220420191649: epoch=25 step=4425 epoch=25 metrics={'time_sample_batch': 0.0001569858378609695, 'time_algorithm_update': 0.00209241263610495, 'loss': 0.014258239059814246, 'time_step': 0.002320281529830674, 'init_value': -2.18072509765625, 'ave_value': -1.0768488998099737, 'soft_opc': nan} step=4425




2022-04-20 19:17.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191649/model_4425.pt


Epoch 26/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:17.00 [info     ] FQE_20220420191649: epoch=26 step=4602 epoch=26 metrics={'time_sample_batch': 0.0001560173465707208, 'time_algorithm_update': 0.0020935845240361273, 'loss': 0.015398092605410365, 'time_step': 0.0023210789524229234, 'init_value': -2.2313506603240967, 'ave_value': -1.0851779757400377, 'soft_opc': nan} step=4602




2022-04-20 19:17.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191649/model_4602.pt


Epoch 27/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:17.01 [info     ] FQE_20220420191649: epoch=27 step=4779 epoch=27 metrics={'time_sample_batch': 0.00015856451907400357, 'time_algorithm_update': 0.002112402080816064, 'loss': 0.01702944051847914, 'time_step': 0.0023415533162779727, 'init_value': -2.3557288646698, 'ave_value': -1.1399158268747431, 'soft_opc': nan} step=4779




2022-04-20 19:17.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191649/model_4779.pt


Epoch 28/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:17.01 [info     ] FQE_20220420191649: epoch=28 step=4956 epoch=28 metrics={'time_sample_batch': 0.00015616147531627935, 'time_algorithm_update': 0.00208365580456405, 'loss': 0.01923521490120408, 'time_step': 0.0023122507300080554, 'init_value': -2.4275834560394287, 'ave_value': -1.1270567331735784, 'soft_opc': nan} step=4956




2022-04-20 19:17.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191649/model_4956.pt


Epoch 29/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:17.02 [info     ] FQE_20220420191649: epoch=29 step=5133 epoch=29 metrics={'time_sample_batch': 0.00015458414110086733, 'time_algorithm_update': 0.0020574270668676344, 'loss': 0.019444515497165884, 'time_step': 0.002279920093083786, 'init_value': -2.602147340774536, 'ave_value': -1.2174182084568024, 'soft_opc': nan} step=5133




2022-04-20 19:17.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191649/model_5133.pt


Epoch 30/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:17.02 [info     ] FQE_20220420191649: epoch=30 step=5310 epoch=30 metrics={'time_sample_batch': 0.00015210027748582055, 'time_algorithm_update': 0.002011917405209299, 'loss': 0.021154394952085733, 'time_step': 0.0022284944178694384, 'init_value': -2.69281268119812, 'ave_value': -1.2576060757728982, 'soft_opc': nan} step=5310




2022-04-20 19:17.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191649/model_5310.pt


Epoch 31/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:17.03 [info     ] FQE_20220420191649: epoch=31 step=5487 epoch=31 metrics={'time_sample_batch': 0.00015390525429935777, 'time_algorithm_update': 0.0021386793104268735, 'loss': 0.02271610411478596, 'time_step': 0.0023593565838485116, 'init_value': -2.7017383575439453, 'ave_value': -1.2148027990698569, 'soft_opc': nan} step=5487




2022-04-20 19:17.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191649/model_5487.pt


Epoch 32/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:17.03 [info     ] FQE_20220420191649: epoch=32 step=5664 epoch=32 metrics={'time_sample_batch': 0.00015685787308687544, 'time_algorithm_update': 0.0021108476455602267, 'loss': 0.023743819350660857, 'time_step': 0.0023377561299814343, 'init_value': -2.8293607234954834, 'ave_value': -1.2609630089032242, 'soft_opc': nan} step=5664




2022-04-20 19:17.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191649/model_5664.pt


Epoch 33/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:17.04 [info     ] FQE_20220420191649: epoch=33 step=5841 epoch=33 metrics={'time_sample_batch': 0.0001567366433008916, 'time_algorithm_update': 0.0020692550529867915, 'loss': 0.024833265934034256, 'time_step': 0.00229398140126029, 'init_value': -2.952207565307617, 'ave_value': -1.2935480250565856, 'soft_opc': nan} step=5841




2022-04-20 19:17.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191649/model_5841.pt


Epoch 34/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:17.04 [info     ] FQE_20220420191649: epoch=34 step=6018 epoch=34 metrics={'time_sample_batch': 0.00015705722873493775, 'time_algorithm_update': 0.002069956838747876, 'loss': 0.025513305389296033, 'time_step': 0.002293167814696576, 'init_value': -2.9095191955566406, 'ave_value': -1.2144912763708482, 'soft_opc': nan} step=6018




2022-04-20 19:17.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191649/model_6018.pt


Epoch 35/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:17.05 [info     ] FQE_20220420191649: epoch=35 step=6195 epoch=35 metrics={'time_sample_batch': 0.00015633254401427877, 'time_algorithm_update': 0.002113905330162264, 'loss': 0.026727273570252636, 'time_step': 0.002339070799660548, 'init_value': -3.024014472961426, 'ave_value': -1.2707466395577423, 'soft_opc': nan} step=6195




2022-04-20 19:17.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191649/model_6195.pt


Epoch 36/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:17.05 [info     ] FQE_20220420191649: epoch=36 step=6372 epoch=36 metrics={'time_sample_batch': 0.00015823854564946924, 'time_algorithm_update': 0.002078766203196035, 'loss': 0.02863004775112758, 'time_step': 0.0023015084239722647, 'init_value': -3.0317866802215576, 'ave_value': -1.247101701624613, 'soft_opc': nan} step=6372




2022-04-20 19:17.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191649/model_6372.pt


Epoch 37/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:17.05 [info     ] FQE_20220420191649: epoch=37 step=6549 epoch=37 metrics={'time_sample_batch': 0.00015295157998295152, 'time_algorithm_update': 0.002015458661957649, 'loss': 0.030339165351295706, 'time_step': 0.002237771190492447, 'init_value': -3.166367292404175, 'ave_value': -1.353785056057568, 'soft_opc': nan} step=6549




2022-04-20 19:17.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191649/model_6549.pt


Epoch 38/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:17.06 [info     ] FQE_20220420191649: epoch=38 step=6726 epoch=38 metrics={'time_sample_batch': 0.00015712727261128399, 'time_algorithm_update': 0.00211851206202965, 'loss': 0.031037270705778636, 'time_step': 0.002345848891694667, 'init_value': -3.0785157680511475, 'ave_value': -1.1727158499733397, 'soft_opc': nan} step=6726




2022-04-20 19:17.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191649/model_6726.pt


Epoch 39/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:17.06 [info     ] FQE_20220420191649: epoch=39 step=6903 epoch=39 metrics={'time_sample_batch': 0.00015499497537559034, 'time_algorithm_update': 0.002132831993749586, 'loss': 0.033095913032848534, 'time_step': 0.0023560833796269476, 'init_value': -3.2240684032440186, 'ave_value': -1.26278287030983, 'soft_opc': nan} step=6903




2022-04-20 19:17.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191649/model_6903.pt


Epoch 40/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:17.07 [info     ] FQE_20220420191649: epoch=40 step=7080 epoch=40 metrics={'time_sample_batch': 0.00015529939683817202, 'time_algorithm_update': 0.0021292247341177556, 'loss': 0.03453371594896761, 'time_step': 0.0023551835852154233, 'init_value': -3.2405710220336914, 'ave_value': -1.2347269628320012, 'soft_opc': nan} step=7080




2022-04-20 19:17.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191649/model_7080.pt


Epoch 41/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:17.07 [info     ] FQE_20220420191649: epoch=41 step=7257 epoch=41 metrics={'time_sample_batch': 0.00015870191283145194, 'time_algorithm_update': 0.002078562806555107, 'loss': 0.036039122680233696, 'time_step': 0.0023068304115769554, 'init_value': -3.3279244899749756, 'ave_value': -1.2219062274193262, 'soft_opc': nan} step=7257




2022-04-20 19:17.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191649/model_7257.pt


Epoch 42/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:17.08 [info     ] FQE_20220420191649: epoch=42 step=7434 epoch=42 metrics={'time_sample_batch': 0.00015590554576809122, 'time_algorithm_update': 0.0021118740577482234, 'loss': 0.03719444799836788, 'time_step': 0.0023377924989172296, 'init_value': -3.3086729049682617, 'ave_value': -1.1572891462946664, 'soft_opc': nan} step=7434




2022-04-20 19:17.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191649/model_7434.pt


Epoch 43/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:17.08 [info     ] FQE_20220420191649: epoch=43 step=7611 epoch=43 metrics={'time_sample_batch': 0.00015550009948385639, 'time_algorithm_update': 0.0020644274135093905, 'loss': 0.03878724030587338, 'time_step': 0.0022897841566700047, 'init_value': -3.330232620239258, 'ave_value': -1.1222931248569363, 'soft_opc': nan} step=7611




2022-04-20 19:17.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191649/model_7611.pt


Epoch 44/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:17.09 [info     ] FQE_20220420191649: epoch=44 step=7788 epoch=44 metrics={'time_sample_batch': 0.00015626654113079868, 'time_algorithm_update': 0.002060038895256775, 'loss': 0.03608249321890374, 'time_step': 0.0022842695484053617, 'init_value': -3.4185259342193604, 'ave_value': -1.1807491356116795, 'soft_opc': nan} step=7788




2022-04-20 19:17.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191649/model_7788.pt


Epoch 45/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:17.09 [info     ] FQE_20220420191649: epoch=45 step=7965 epoch=45 metrics={'time_sample_batch': 0.00016101874874136543, 'time_algorithm_update': 0.002138440891847772, 'loss': 0.04343283281787851, 'time_step': 0.0023695466208592648, 'init_value': -3.439223527908325, 'ave_value': -1.178113614056992, 'soft_opc': nan} step=7965




2022-04-20 19:17.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191649/model_7965.pt


Epoch 46/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:17.10 [info     ] FQE_20220420191649: epoch=46 step=8142 epoch=46 metrics={'time_sample_batch': 0.00016383801476430084, 'time_algorithm_update': 0.002157130483853615, 'loss': 0.04429105551923904, 'time_step': 0.002392433457455393, 'init_value': -3.4822099208831787, 'ave_value': -1.1526245099801413, 'soft_opc': nan} step=8142




2022-04-20 19:17.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191649/model_8142.pt


Epoch 47/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:17.10 [info     ] FQE_20220420191649: epoch=47 step=8319 epoch=47 metrics={'time_sample_batch': 0.00015745998102392853, 'time_algorithm_update': 0.002095120101325256, 'loss': 0.04729933949518886, 'time_step': 0.002323817398588536, 'init_value': -3.553152084350586, 'ave_value': -1.2093860873191788, 'soft_opc': nan} step=8319




2022-04-20 19:17.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191649/model_8319.pt


Epoch 48/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:17.10 [info     ] FQE_20220420191649: epoch=48 step=8496 epoch=48 metrics={'time_sample_batch': 0.0001591167880990411, 'time_algorithm_update': 0.0021162652699960828, 'loss': 0.04723084016997725, 'time_step': 0.00234525082475048, 'init_value': -3.486071825027466, 'ave_value': -1.091686252304199, 'soft_opc': nan} step=8496




2022-04-20 19:17.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191649/model_8496.pt


Epoch 49/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:17.11 [info     ] FQE_20220420191649: epoch=49 step=8673 epoch=49 metrics={'time_sample_batch': 0.00015778191345559674, 'time_algorithm_update': 0.002135449210129215, 'loss': 0.05010366511773213, 'time_step': 0.002364704164408021, 'init_value': -3.5671942234039307, 'ave_value': -1.1186708679284296, 'soft_opc': nan} step=8673




2022-04-20 19:17.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191649/model_8673.pt


Epoch 50/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 19:17.11 [info     ] FQE_20220420191649: epoch=50 step=8850 epoch=50 metrics={'time_sample_batch': 0.00015985898378878663, 'time_algorithm_update': 0.002065740736190882, 'loss': 0.05051328297467681, 'time_step': 0.0022971320286982477, 'init_value': -3.641052007675171, 'ave_value': -1.1659094784531865, 'soft_opc': nan} step=8850




2022-04-20 19:17.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191649/model_8850.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

2022-04-20 19:17.12 [info     ] Directory is created at d3rlpy_logs/FQE_20220420191712
2022-04-20 19:17.12 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 19:17.12 [debug    ] Building models...
2022-04-20 19:17.12 [debug    ] Models have been built.
2022-04-20 19:17.12 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420191712/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/355 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 19:17.13 [info     ] FQE_20220420191712: epoch=1 step=355 epoch=1 metrics={'time_sample_batch': 0.0001575060293708049, 'time_algorithm_update': 0.002068858079507317, 'loss': 0.024344116507071845, 'time_step': 0.002295818463177748, 'init_value': -1.2657444477081299, 'ave_value': -1.2322800112507057, 'soft_opc': nan} step=355




2022-04-20 19:17.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191712/model_355.pt


Epoch 2/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:17.13 [info     ] FQE_20220420191712: epoch=2 step=710 epoch=2 metrics={'time_sample_batch': 0.00015867260140432437, 'time_algorithm_update': 0.0020560842164805237, 'loss': 0.023391638391874206, 'time_step': 0.0022845288397560655, 'init_value': -2.341012954711914, 'ave_value': -2.315427299355908, 'soft_opc': nan} step=710




2022-04-20 19:17.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191712/model_710.pt


Epoch 3/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:17.14 [info     ] FQE_20220420191712: epoch=3 step=1065 epoch=3 metrics={'time_sample_batch': 0.00015806144391986685, 'time_algorithm_update': 0.002132373460581605, 'loss': 0.025401857558270576, 'time_step': 0.002362476939886389, 'init_value': -2.658071994781494, 'ave_value': -2.625675665810608, 'soft_opc': nan} step=1065




2022-04-20 19:17.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191712/model_1065.pt


Epoch 4/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:17.15 [info     ] FQE_20220420191712: epoch=4 step=1420 epoch=4 metrics={'time_sample_batch': 0.00015933815862091493, 'time_algorithm_update': 0.002093567646725077, 'loss': 0.03149389851523537, 'time_step': 0.0023240881906428806, 'init_value': -3.391151189804077, 'ave_value': -3.380410429179438, 'soft_opc': nan} step=1420




2022-04-20 19:17.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191712/model_1420.pt


Epoch 5/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:17.16 [info     ] FQE_20220420191712: epoch=5 step=1775 epoch=5 metrics={'time_sample_batch': 0.00015690494591081647, 'time_algorithm_update': 0.002090810050427074, 'loss': 0.03838741986317114, 'time_step': 0.0023193097450363805, 'init_value': -3.695772647857666, 'ave_value': -3.710066898556443, 'soft_opc': nan} step=1775




2022-04-20 19:17.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191712/model_1775.pt


Epoch 6/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:17.17 [info     ] FQE_20220420191712: epoch=6 step=2130 epoch=6 metrics={'time_sample_batch': 0.00015938181272694761, 'time_algorithm_update': 0.002076856183334136, 'loss': 0.04995849411934614, 'time_step': 0.0023076138026277785, 'init_value': -4.188196659088135, 'ave_value': -4.211583895127899, 'soft_opc': nan} step=2130




2022-04-20 19:17.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191712/model_2130.pt


Epoch 7/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:17.18 [info     ] FQE_20220420191712: epoch=7 step=2485 epoch=7 metrics={'time_sample_batch': 0.00015911921648912027, 'time_algorithm_update': 0.0020629741776157433, 'loss': 0.05965725589858394, 'time_step': 0.0022930098251557687, 'init_value': -4.702568054199219, 'ave_value': -4.733527211900537, 'soft_opc': nan} step=2485




2022-04-20 19:17.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191712/model_2485.pt


Epoch 8/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:17.19 [info     ] FQE_20220420191712: epoch=8 step=2840 epoch=8 metrics={'time_sample_batch': 0.0001589936269840724, 'time_algorithm_update': 0.0020988262874979367, 'loss': 0.07280633932268116, 'time_step': 0.0023280922795685245, 'init_value': -5.106015682220459, 'ave_value': -5.13642201376084, 'soft_opc': nan} step=2840




2022-04-20 19:17.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191712/model_2840.pt


Epoch 9/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:17.20 [info     ] FQE_20220420191712: epoch=9 step=3195 epoch=9 metrics={'time_sample_batch': 0.00015709500917246644, 'time_algorithm_update': 0.0020826950879164145, 'loss': 0.08594934802607332, 'time_step': 0.002313832833733357, 'init_value': -5.480368137359619, 'ave_value': -5.539921874925494, 'soft_opc': nan} step=3195




2022-04-20 19:17.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191712/model_3195.pt


Epoch 10/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:17.21 [info     ] FQE_20220420191712: epoch=10 step=3550 epoch=10 metrics={'time_sample_batch': 0.00015922264314033617, 'time_algorithm_update': 0.0020869328942097407, 'loss': 0.10394994733776425, 'time_step': 0.0023201566346934144, 'init_value': -5.855475425720215, 'ave_value': -5.974765359879288, 'soft_opc': nan} step=3550




2022-04-20 19:17.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191712/model_3550.pt


Epoch 11/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:17.21 [info     ] FQE_20220420191712: epoch=11 step=3905 epoch=11 metrics={'time_sample_batch': 0.00015823538874236632, 'time_algorithm_update': 0.0020653180673088824, 'loss': 0.11935297827642988, 'time_step': 0.0022950877606029243, 'init_value': -6.178330898284912, 'ave_value': -6.404370778568923, 'soft_opc': nan} step=3905




2022-04-20 19:17.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191712/model_3905.pt


Epoch 12/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:17.22 [info     ] FQE_20220420191712: epoch=12 step=4260 epoch=12 metrics={'time_sample_batch': 0.00015869073464836874, 'time_algorithm_update': 0.002068349005470813, 'loss': 0.13616987822026433, 'time_step': 0.002295447739077286, 'init_value': -6.221164226531982, 'ave_value': -6.572839828783423, 'soft_opc': nan} step=4260




2022-04-20 19:17.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191712/model_4260.pt


Epoch 13/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:17.23 [info     ] FQE_20220420191712: epoch=13 step=4615 epoch=13 metrics={'time_sample_batch': 0.00015638445464658066, 'time_algorithm_update': 0.002088118271088936, 'loss': 0.1610438454791274, 'time_step': 0.0023137099306348346, 'init_value': -6.685925006866455, 'ave_value': -7.192094350405795, 'soft_opc': nan} step=4615




2022-04-20 19:17.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191712/model_4615.pt


Epoch 14/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:17.24 [info     ] FQE_20220420191712: epoch=14 step=4970 epoch=14 metrics={'time_sample_batch': 0.00015750535776917364, 'time_algorithm_update': 0.0020850275603818223, 'loss': 0.17982785493450265, 'time_step': 0.002313324431298484, 'init_value': -6.6981520652771, 'ave_value': -7.313476947984123, 'soft_opc': nan} step=4970




2022-04-20 19:17.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191712/model_4970.pt


Epoch 15/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:17.25 [info     ] FQE_20220420191712: epoch=15 step=5325 epoch=15 metrics={'time_sample_batch': 0.00015633609932912907, 'time_algorithm_update': 0.0020772960824026188, 'loss': 0.20609578882199778, 'time_step': 0.0023032678684718174, 'init_value': -7.2533721923828125, 'ave_value': -8.09853071588927, 'soft_opc': nan} step=5325




2022-04-20 19:17.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191712/model_5325.pt


Epoch 16/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:17.26 [info     ] FQE_20220420191712: epoch=16 step=5680 epoch=16 metrics={'time_sample_batch': 0.00015797346410617022, 'time_algorithm_update': 0.0020665591871234733, 'loss': 0.22298679054832796, 'time_step': 0.0022946445035262847, 'init_value': -7.348822593688965, 'ave_value': -8.375814146900662, 'soft_opc': nan} step=5680




2022-04-20 19:17.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191712/model_5680.pt


Epoch 17/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:17.27 [info     ] FQE_20220420191712: epoch=17 step=6035 epoch=17 metrics={'time_sample_batch': 0.00015498416524537852, 'time_algorithm_update': 0.0020374667476600323, 'loss': 0.24131038395137014, 'time_step': 0.002262537244340064, 'init_value': -7.704920768737793, 'ave_value': -8.873373251717581, 'soft_opc': nan} step=6035




2022-04-20 19:17.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191712/model_6035.pt


Epoch 18/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:17.28 [info     ] FQE_20220420191712: epoch=18 step=6390 epoch=18 metrics={'time_sample_batch': 0.00016247386663732394, 'time_algorithm_update': 0.002091354719350036, 'loss': 0.2572369474497899, 'time_step': 0.0023230411636997275, 'init_value': -7.794241905212402, 'ave_value': -9.125495724483097, 'soft_opc': nan} step=6390




2022-04-20 19:17.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191712/model_6390.pt


Epoch 19/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:17.29 [info     ] FQE_20220420191712: epoch=19 step=6745 epoch=19 metrics={'time_sample_batch': 0.00015541063228123624, 'time_algorithm_update': 0.0021094906498009052, 'loss': 0.2736725238322372, 'time_step': 0.002338396663397131, 'init_value': -7.96519136428833, 'ave_value': -9.457734182957163, 'soft_opc': nan} step=6745




2022-04-20 19:17.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191712/model_6745.pt


Epoch 20/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:17.29 [info     ] FQE_20220420191712: epoch=20 step=7100 epoch=20 metrics={'time_sample_batch': 0.00016073979122537962, 'time_algorithm_update': 0.002100954593067438, 'loss': 0.285552175280074, 'time_step': 0.002330923752045967, 'init_value': -8.57301139831543, 'ave_value': -10.04867591124071, 'soft_opc': nan} step=7100




2022-04-20 19:17.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191712/model_7100.pt


Epoch 21/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:17.30 [info     ] FQE_20220420191712: epoch=21 step=7455 epoch=21 metrics={'time_sample_batch': 0.00015920853950607944, 'time_algorithm_update': 0.002087337869993398, 'loss': 0.30307885559511855, 'time_step': 0.002318040417953276, 'init_value': -8.93606948852539, 'ave_value': -10.48705243955497, 'soft_opc': nan} step=7455




2022-04-20 19:17.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191712/model_7455.pt


Epoch 22/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:17.31 [info     ] FQE_20220420191712: epoch=22 step=7810 epoch=22 metrics={'time_sample_batch': 0.00015948658258142605, 'time_algorithm_update': 0.002086944983039104, 'loss': 0.3177656658780827, 'time_step': 0.0023164084259892853, 'init_value': -9.276880264282227, 'ave_value': -10.9153913438114, 'soft_opc': nan} step=7810




2022-04-20 19:17.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191712/model_7810.pt


Epoch 23/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:17.32 [info     ] FQE_20220420191712: epoch=23 step=8165 epoch=23 metrics={'time_sample_batch': 0.00015757923394861355, 'time_algorithm_update': 0.0021128862676486164, 'loss': 0.33246872652794274, 'time_step': 0.0023405189245519504, 'init_value': -9.640771865844727, 'ave_value': -11.315117608833019, 'soft_opc': nan} step=8165




2022-04-20 19:17.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191712/model_8165.pt


Epoch 24/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:17.33 [info     ] FQE_20220420191712: epoch=24 step=8520 epoch=24 metrics={'time_sample_batch': 0.00016007289080552652, 'time_algorithm_update': 0.0021289335170262295, 'loss': 0.33683090815976474, 'time_step': 0.0023590101322657625, 'init_value': -10.14659309387207, 'ave_value': -11.849536765536277, 'soft_opc': nan} step=8520




2022-04-20 19:17.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191712/model_8520.pt


Epoch 25/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:17.34 [info     ] FQE_20220420191712: epoch=25 step=8875 epoch=25 metrics={'time_sample_batch': 0.00015588008182149538, 'time_algorithm_update': 0.0020867609641921354, 'loss': 0.34977513063751475, 'time_step': 0.002314278777216522, 'init_value': -10.357810974121094, 'ave_value': -12.05824358473068, 'soft_opc': nan} step=8875




2022-04-20 19:17.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191712/model_8875.pt


Epoch 26/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:17.35 [info     ] FQE_20220420191712: epoch=26 step=9230 epoch=26 metrics={'time_sample_batch': 0.00016021459874972493, 'time_algorithm_update': 0.0020797393691371862, 'loss': 0.35810757336062443, 'time_step': 0.0023106138471146705, 'init_value': -10.790253639221191, 'ave_value': -12.530511196483133, 'soft_opc': nan} step=9230




2022-04-20 19:17.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191712/model_9230.pt


Epoch 27/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:17.36 [info     ] FQE_20220420191712: epoch=27 step=9585 epoch=27 metrics={'time_sample_batch': 0.000156303862450828, 'time_algorithm_update': 0.002101226591728103, 'loss': 0.36647554569366114, 'time_step': 0.0023269324235513176, 'init_value': -11.004190444946289, 'ave_value': -12.60526208492449, 'soft_opc': nan} step=9585




2022-04-20 19:17.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191712/model_9585.pt


Epoch 28/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:17.37 [info     ] FQE_20220420191712: epoch=28 step=9940 epoch=28 metrics={'time_sample_batch': 0.0001608868719826282, 'time_algorithm_update': 0.00209452467904964, 'loss': 0.37968434779576854, 'time_step': 0.0023277564787528883, 'init_value': -11.43580150604248, 'ave_value': -13.026644275998613, 'soft_opc': nan} step=9940




2022-04-20 19:17.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191712/model_9940.pt


Epoch 29/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:17.38 [info     ] FQE_20220420191712: epoch=29 step=10295 epoch=29 metrics={'time_sample_batch': 0.00015839522993060903, 'time_algorithm_update': 0.0020860833181461816, 'loss': 0.39098974999932334, 'time_step': 0.0023162136615162165, 'init_value': -11.982660293579102, 'ave_value': -13.552902009143486, 'soft_opc': nan} step=10295




2022-04-20 19:17.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191712/model_10295.pt


Epoch 30/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:17.38 [info     ] FQE_20220420191712: epoch=30 step=10650 epoch=30 metrics={'time_sample_batch': 0.00016086336592553368, 'time_algorithm_update': 0.002048487730429206, 'loss': 0.4110277421720011, 'time_step': 0.0022818236283853022, 'init_value': -12.78478717803955, 'ave_value': -14.29416297518647, 'soft_opc': nan} step=10650




2022-04-20 19:17.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191712/model_10650.pt


Epoch 31/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:17.39 [info     ] FQE_20220420191712: epoch=31 step=11005 epoch=31 metrics={'time_sample_batch': 0.00015361745592574, 'time_algorithm_update': 0.0020400221918670225, 'loss': 0.42506287669841675, 'time_step': 0.0022653761044354507, 'init_value': -13.302694320678711, 'ave_value': -14.68923440594298, 'soft_opc': nan} step=11005




2022-04-20 19:17.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191712/model_11005.pt


Epoch 32/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:17.40 [info     ] FQE_20220420191712: epoch=32 step=11360 epoch=32 metrics={'time_sample_batch': 0.00015930860814913897, 'time_algorithm_update': 0.0020852700085707116, 'loss': 0.43459227371593595, 'time_step': 0.002315746226780851, 'init_value': -13.303181648254395, 'ave_value': -14.604072516001617, 'soft_opc': nan} step=11360




2022-04-20 19:17.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191712/model_11360.pt


Epoch 33/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:17.41 [info     ] FQE_20220420191712: epoch=33 step=11715 epoch=33 metrics={'time_sample_batch': 0.00016158063646773218, 'time_algorithm_update': 0.0020848207070793905, 'loss': 0.43891248211894235, 'time_step': 0.002318947751757125, 'init_value': -13.746767044067383, 'ave_value': -14.921877081472873, 'soft_opc': nan} step=11715




2022-04-20 19:17.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191712/model_11715.pt


Epoch 34/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:17.42 [info     ] FQE_20220420191712: epoch=34 step=12070 epoch=34 metrics={'time_sample_batch': 0.00015962627572073062, 'time_algorithm_update': 0.0020986201057971365, 'loss': 0.45054648685938037, 'time_step': 0.0023268437721359896, 'init_value': -14.161566734313965, 'ave_value': -15.111589976780097, 'soft_opc': nan} step=12070




2022-04-20 19:17.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191712/model_12070.pt


Epoch 35/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:17.43 [info     ] FQE_20220420191712: epoch=35 step=12425 epoch=35 metrics={'time_sample_batch': 0.00015741670635384573, 'time_algorithm_update': 0.0020523850346954777, 'loss': 0.45675731341708714, 'time_step': 0.002282818270401216, 'init_value': -14.352682113647461, 'ave_value': -15.246481626251883, 'soft_opc': nan} step=12425




2022-04-20 19:17.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191712/model_12425.pt


Epoch 36/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:17.44 [info     ] FQE_20220420191712: epoch=36 step=12780 epoch=36 metrics={'time_sample_batch': 0.00015890833357690086, 'time_algorithm_update': 0.0020709098224908533, 'loss': 0.4691049453395773, 'time_step': 0.0022998366557376486, 'init_value': -14.845596313476562, 'ave_value': -15.414754951978281, 'soft_opc': nan} step=12780




2022-04-20 19:17.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191712/model_12780.pt


Epoch 37/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:17.45 [info     ] FQE_20220420191712: epoch=37 step=13135 epoch=37 metrics={'time_sample_batch': 0.00015975992444535378, 'time_algorithm_update': 0.002047933659083407, 'loss': 0.48068667917289365, 'time_step': 0.002278298391422755, 'init_value': -15.396981239318848, 'ave_value': -15.83409497344931, 'soft_opc': nan} step=13135




2022-04-20 19:17.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191712/model_13135.pt


Epoch 38/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:17.45 [info     ] FQE_20220420191712: epoch=38 step=13490 epoch=38 metrics={'time_sample_batch': 0.0001593126377589266, 'time_algorithm_update': 0.00205348981937892, 'loss': 0.49358578236694906, 'time_step': 0.0022818572084668657, 'init_value': -15.654296875, 'ave_value': -16.082458428305692, 'soft_opc': nan} step=13490




2022-04-20 19:17.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191712/model_13490.pt


Epoch 39/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:17.46 [info     ] FQE_20220420191712: epoch=39 step=13845 epoch=39 metrics={'time_sample_batch': 0.00015741603475221446, 'time_algorithm_update': 0.0020814103139957913, 'loss': 0.5163055865382644, 'time_step': 0.0023090214796469247, 'init_value': -16.169742584228516, 'ave_value': -16.361186278229063, 'soft_opc': nan} step=13845




2022-04-20 19:17.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191712/model_13845.pt


Epoch 40/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:17.47 [info     ] FQE_20220420191712: epoch=40 step=14200 epoch=40 metrics={'time_sample_batch': 0.00015837441028003962, 'time_algorithm_update': 0.0020981862511433346, 'loss': 0.5203129525283273, 'time_step': 0.0023285785191495653, 'init_value': -16.62856674194336, 'ave_value': -16.720302165092054, 'soft_opc': nan} step=14200




2022-04-20 19:17.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191712/model_14200.pt


Epoch 41/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:17.48 [info     ] FQE_20220420191712: epoch=41 step=14555 epoch=41 metrics={'time_sample_batch': 0.00015998088138204224, 'time_algorithm_update': 0.002095967279353612, 'loss': 0.5457301489431673, 'time_step': 0.002326456257994746, 'init_value': -17.64175033569336, 'ave_value': -17.597628604429875, 'soft_opc': nan} step=14555




2022-04-20 19:17.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191712/model_14555.pt


Epoch 42/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:17.49 [info     ] FQE_20220420191712: epoch=42 step=14910 epoch=42 metrics={'time_sample_batch': 0.00015931465256382042, 'time_algorithm_update': 0.002057501967524139, 'loss': 0.5644171225667839, 'time_step': 0.0022869647388726894, 'init_value': -18.229948043823242, 'ave_value': -17.91847197648948, 'soft_opc': nan} step=14910




2022-04-20 19:17.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191712/model_14910.pt


Epoch 43/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:17.50 [info     ] FQE_20220420191712: epoch=43 step=15265 epoch=43 metrics={'time_sample_batch': 0.00016082038342113227, 'time_algorithm_update': 0.0020728212007334536, 'loss': 0.5898359400862959, 'time_step': 0.0023047225576051525, 'init_value': -18.556411743164062, 'ave_value': -18.06552615586088, 'soft_opc': nan} step=15265




2022-04-20 19:17.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191712/model_15265.pt


Epoch 44/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:17.51 [info     ] FQE_20220420191712: epoch=44 step=15620 epoch=44 metrics={'time_sample_batch': 0.0001605819648420307, 'time_algorithm_update': 0.002095076063988914, 'loss': 0.6048792007313648, 'time_step': 0.0023264287223278635, 'init_value': -18.867267608642578, 'ave_value': -18.115659919500043, 'soft_opc': nan} step=15620




2022-04-20 19:17.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191712/model_15620.pt


Epoch 45/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:17.52 [info     ] FQE_20220420191712: epoch=45 step=15975 epoch=45 metrics={'time_sample_batch': 0.00015685793379662742, 'time_algorithm_update': 0.002019414767413072, 'loss': 0.6190097941426744, 'time_step': 0.0022477022359068963, 'init_value': -18.709728240966797, 'ave_value': -17.967772622320183, 'soft_opc': nan} step=15975




2022-04-20 19:17.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191712/model_15975.pt


Epoch 46/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:17.53 [info     ] FQE_20220420191712: epoch=46 step=16330 epoch=46 metrics={'time_sample_batch': 0.00015839455832897777, 'time_algorithm_update': 0.002107367045442823, 'loss': 0.6205866479275512, 'time_step': 0.0023409957617101535, 'init_value': -19.585222244262695, 'ave_value': -18.561731380968325, 'soft_opc': nan} step=16330




2022-04-20 19:17.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191712/model_16330.pt


Epoch 47/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:17.53 [info     ] FQE_20220420191712: epoch=47 step=16685 epoch=47 metrics={'time_sample_batch': 0.00015807689075738611, 'time_algorithm_update': 0.002102908282212808, 'loss': 0.6411921368518346, 'time_step': 0.002333418752106143, 'init_value': -19.788326263427734, 'ave_value': -18.825254314040894, 'soft_opc': nan} step=16685




2022-04-20 19:17.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191712/model_16685.pt


Epoch 48/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:17.54 [info     ] FQE_20220420191712: epoch=48 step=17040 epoch=48 metrics={'time_sample_batch': 0.00016046040494677047, 'time_algorithm_update': 0.0021005496172837807, 'loss': 0.6596657374217897, 'time_step': 0.0023318901867933678, 'init_value': -19.418224334716797, 'ave_value': -18.250675559718943, 'soft_opc': nan} step=17040




2022-04-20 19:17.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191712/model_17040.pt


Epoch 49/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:17.55 [info     ] FQE_20220420191712: epoch=49 step=17395 epoch=49 metrics={'time_sample_batch': 0.0001604698073696083, 'time_algorithm_update': 0.0020751691200363805, 'loss': 0.6526932081574915, 'time_step': 0.00230885022123095, 'init_value': -19.842138290405273, 'ave_value': -18.568845100292368, 'soft_opc': nan} step=17395




2022-04-20 19:17.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191712/model_17395.pt


Epoch 50/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:17.56 [info     ] FQE_20220420191712: epoch=50 step=17750 epoch=50 metrics={'time_sample_batch': 0.00015927704287246918, 'time_algorithm_update': 0.002078416985525212, 'loss': 0.6559555432588702, 'time_step': 0.0023090033464028803, 'init_value': -20.05397605895996, 'ave_value': -18.627172691021187, 'soft_opc': nan} step=17750




2022-04-20 19:17.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420191712/model_17750.pt
search iteration:  26
using hyper params:  [0.007739333057912394, 0.009403525102164665, 6.702876428476512e-05, 5]
2022-04-20 19:17.56 [debug    ] RoundIterator is selected.
2022-04-20 19:17.56 [info     ] Directory is created at d3rlpy_logs/CQL_20220420191756
2022-04-20 19:17.56 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 19:17.56 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 19:17.56 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220420191756/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.007739333057912394, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'we

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:18.03 [info     ] CQL_20220420191756: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.00034937454245940986, 'time_algorithm_update': 0.017671871603580944, 'temp_loss': 4.540833194353427, 'temp': 0.9888113697021328, 'alpha_loss': -13.445727484965184, 'alpha': 1.0156482099092494, 'critic_loss': 34.15568419227823, 'actor_loss': 5.022269608289526, 'time_step': 0.018117316982202362, 'td_error': 11.60607259806327, 'init_value': -14.160118103027344, 'ave_value': -8.9258272563995} step=342
2022-04-20 19:18.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191756/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:18.09 [info     ] CQL_20220420191756: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.00035179985894097225, 'time_algorithm_update': 0.01772186839789675, 'temp_loss': 3.429552113800718, 'temp': 0.9682783702312158, 'alpha_loss': -3.149044057158263, 'alpha': 1.033543526429182, 'critic_loss': 29.383819741812367, 'actor_loss': 12.482394212867781, 'time_step': 0.018170215930157933, 'td_error': 7.028860899348067, 'init_value': -21.741729736328125, 'ave_value': -13.3613756375733} step=684
2022-04-20 19:18.09 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191756/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:18.15 [info     ] CQL_20220420191756: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.0003500089310763175, 'time_algorithm_update': 0.01763480518296448, 'temp_loss': 2.456514346669292, 'temp': 0.9520134207798027, 'alpha_loss': 2.1552875232474324, 'alpha': 1.0343463110644915, 'critic_loss': 56.53731250205235, 'actor_loss': 19.062446254038672, 'time_step': 0.018080155751858538, 'td_error': 11.374778163961782, 'init_value': -31.38983154296875, 'ave_value': -18.38011401027374} step=1026
2022-04-20 19:18.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191756/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:18.22 [info     ] CQL_20220420191756: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.000350879646881282, 'time_algorithm_update': 0.017826895964773076, 'temp_loss': 1.7991849820167698, 'temp': 0.9383104731116378, 'alpha_loss': 5.8866875680566535, 'alpha': 1.0142856588837696, 'critic_loss': 99.16396070781506, 'actor_loss': 25.686926373264246, 'time_step': 0.01827447944217258, 'td_error': 18.798955991689976, 'init_value': -39.49876403808594, 'ave_value': -23.39354801185966} step=1368
2022-04-20 19:18.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191756/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:18.28 [info     ] CQL_20220420191756: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.00035281209220663147, 'time_algorithm_update': 0.0178808315455565, 'temp_loss': 1.3868964802451997, 'temp': 0.9263686440144366, 'alpha_loss': 8.444843500678303, 'alpha': 0.978899547056845, 'critic_loss': 150.68998658029656, 'actor_loss': 32.35368626020108, 'time_step': 0.01833481816520468, 'td_error': 29.98900982136948, 'init_value': -49.36763381958008, 'ave_value': -29.091630056291013} step=1710
2022-04-20 19:18.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191756/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:18.35 [info     ] CQL_20220420191756: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.00035136694099470884, 'time_algorithm_update': 0.01781879112734432, 'temp_loss': 1.016763914833989, 'temp': 0.9160921605009782, 'alpha_loss': 10.272077662205836, 'alpha': 0.9380669349815414, 'critic_loss': 203.326661717822, 'actor_loss': 38.646642143963376, 'time_step': 0.0182684952752632, 'td_error': 34.22022987973659, 'init_value': -61.2539176940918, 'ave_value': -35.07855704759155} step=2052
2022-04-20 19:18.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191756/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:18.42 [info     ] CQL_20220420191756: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.00034997616594994974, 'time_algorithm_update': 0.018023607326529877, 'temp_loss': 0.7058153009445173, 'temp': 0.9074556681147793, 'alpha_loss': 11.749233961105347, 'alpha': 0.8980924086264003, 'critic_loss': 261.4015004118981, 'actor_loss': 45.01956748962402, 'time_step': 0.018471627207527385, 'td_error': 45.569837842937794, 'init_value': -70.37960052490234, 'ave_value': -40.36856774437266} step=2394
2022-04-20 19:18.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191756/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:18.48 [info     ] CQL_20220420191756: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.0003566714058145445, 'time_algorithm_update': 0.018067313913713422, 'temp_loss': 0.4231140867028146, 'temp': 0.9008088413386317, 'alpha_loss': 12.99084326677155, 'alpha': 0.8602474710048987, 'critic_loss': 323.32939375492566, 'actor_loss': 51.19353128176684, 'time_step': 0.018524650244684946, 'td_error': 81.80203309189118, 'init_value': -83.84845733642578, 'ave_value': -47.33961440172147} step=2736
2022-04-20 19:18.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191756/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:18.55 [info     ] CQL_20220420191756: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.0003644562604134543, 'time_algorithm_update': 0.019902758431016354, 'temp_loss': 0.21073881277476836, 'temp': 0.8966726408018704, 'alpha_loss': 14.571702003479004, 'alpha': 0.8239445165235396, 'critic_loss': 392.8402248627958, 'actor_loss': 57.455774597257204, 'time_step': 0.020372174636662355, 'td_error': 121.96995545773446, 'init_value': -92.64091491699219, 'ave_value': -52.3345822413306} step=3078
2022-04-20 19:18.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191756/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:19.05 [info     ] CQL_20220420191756: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.0004240162888465569, 'time_algorithm_update': 0.027592895323770086, 'temp_loss': 0.0725303938862873, 'temp': 0.8944129727737248, 'alpha_loss': 15.603484527409424, 'alpha': 0.7897415863491638, 'critic_loss': 475.6662321927255, 'actor_loss': 64.08505288062737, 'time_step': 0.028146322010553372, 'td_error': 74.09694428458627, 'init_value': -97.77986145019531, 'ave_value': -54.47623554346865} step=3420
2022-04-20 19:19.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191756/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:19.15 [info     ] CQL_20220420191756: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.00042852532793903906, 'time_algorithm_update': 0.027637630875347652, 'temp_loss': -0.05910332447560559, 'temp': 0.8945015175649297, 'alpha_loss': 17.613987897571764, 'alpha': 0.7578850517496031, 'critic_loss': 560.5422746089466, 'actor_loss': 70.79677517093413, 'time_step': 0.02819027050196776, 'td_error': 152.94660339310892, 'init_value': -121.3286361694336, 'ave_value': -66.11720003164231} step=3762
2022-04-20 19:19.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191756/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:19.25 [info     ] CQL_20220420191756: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.00042565872794703434, 'time_algorithm_update': 0.02655877913647925, 'temp_loss': -0.16377298683145938, 'temp': 0.8964576362169276, 'alpha_loss': 20.96524409243935, 'alpha': 0.725030697577181, 'critic_loss': 665.375086555704, 'actor_loss': 78.37399874235454, 'time_step': 0.02710532305533426, 'td_error': 417.7128951612997, 'init_value': -132.91799926757812, 'ave_value': -73.01856646001741} step=4104
2022-04-20 19:19.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191756/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:19.34 [info     ] CQL_20220420191756: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.0004121929581402338, 'time_algorithm_update': 0.026022867849695753, 'temp_loss': -0.25985770815253606, 'temp': 0.902372471770348, 'alpha_loss': 22.264052962699132, 'alpha': 0.6944620811451249, 'critic_loss': 781.9510597987481, 'actor_loss': 85.92389375563951, 'time_step': 0.02655466258177283, 'td_error': 860.3235789388747, 'init_value': -150.42257690429688, 'ave_value': -78.3237829530125} step=4446
2022-04-20 19:19.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191756/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:19.43 [info     ] CQL_20220420191756: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.00041259799087256716, 'time_algorithm_update': 0.024126848282172667, 'temp_loss': -0.3463474024980389, 'temp': 0.9105654379777741, 'alpha_loss': 25.3987335252483, 'alpha': 0.6668570597617947, 'critic_loss': 896.4046645136605, 'actor_loss': 93.52280069094653, 'time_step': 0.02465851683365671, 'td_error': 1415.2985891375138, 'init_value': -172.47500610351562, 'ave_value': -88.02593991952571} step=4788
2022-04-20 19:19.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191756/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:19.51 [info     ] CQL_20220420191756: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.0003990067376030816, 'time_algorithm_update': 0.023380061339216624, 'temp_loss': -0.33572175621724965, 'temp': 0.9216437113215352, 'alpha_loss': 28.544901122823795, 'alpha': 0.6384777198409477, 'critic_loss': 1057.6101202713817, 'actor_loss': 103.24077543738292, 'time_step': 0.023891405055397434, 'td_error': 2302.521881174572, 'init_value': -203.19439697265625, 'ave_value': -100.55595918258334} step=5130
2022-04-20 19:19.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191756/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:19.59 [info     ] CQL_20220420191756: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.0003911946949205901, 'time_algorithm_update': 0.0219474368625217, 'temp_loss': -0.37403193585117134, 'temp': 0.9335371639296325, 'alpha_loss': 34.10654084166588, 'alpha': 0.6107902587854375, 'critic_loss': 1249.0011618095532, 'actor_loss': 113.96885975620202, 'time_step': 0.022447593728004145, 'td_error': 4710.901582573801, 'init_value': -232.1173553466797, 'ave_value': -111.14223247983166} step=5472
2022-04-20 19:19.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191756/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:20.07 [info     ] CQL_20220420191756: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.0003807000946580318, 'time_algorithm_update': 0.020871388284783614, 'temp_loss': -0.302624989856492, 'temp': 0.9460804418862214, 'alpha_loss': 29.355389985424733, 'alpha': 0.5859540544058147, 'critic_loss': 1443.8903755054139, 'actor_loss': 122.15567572075024, 'time_step': 0.021360836530986586, 'td_error': 1664.4398728017509, 'init_value': -232.7799530029297, 'ave_value': -111.37279664196275} step=5814
2022-04-20 19:20.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191756/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:20.14 [info     ] CQL_20220420191756: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.000375643111111825, 'time_algorithm_update': 0.02000991085119415, 'temp_loss': -0.413793013071674, 'temp': 0.9589952575533014, 'alpha_loss': 37.32720416331152, 'alpha': 0.5657168907380243, 'critic_loss': 1585.5796929967335, 'actor_loss': 130.83684120401304, 'time_step': 0.020486149871558473, 'td_error': 5893.492258186186, 'init_value': -262.0757751464844, 'ave_value': -125.96153790813007} step=6156
2022-04-20 19:20.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191756/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:20.21 [info     ] CQL_20220420191756: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.0003666006333646719, 'time_algorithm_update': 0.019242825563888104, 'temp_loss': -0.3968044480047466, 'temp': 0.9757860278525547, 'alpha_loss': 40.33831780347211, 'alpha': 0.5423334868330705, 'critic_loss': 1784.722744768823, 'actor_loss': 140.66266201532375, 'time_step': 0.019711703584905257, 'td_error': 9072.247507574875, 'init_value': -294.79193115234375, 'ave_value': -139.65903237987948} step=6498
2022-04-20 19:20.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191756/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:20.28 [info     ] CQL_20220420191756: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.00035899703265630713, 'time_algorithm_update': 0.018564374823319286, 'temp_loss': -0.27054054965945884, 'temp': 0.9893362347842657, 'alpha_loss': 29.32890181899975, 'alpha': 0.5239347860827084, 'critic_loss': 1987.1564855742872, 'actor_loss': 148.44515453584012, 'time_step': 0.019026983551114623, 'td_error': 7505.169987023006, 'init_value': -294.1952209472656, 'ave_value': -139.47599762766546} step=6840
2022-04-20 19:20.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191756/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:20.34 [info     ] CQL_20220420191756: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.00035984962307221706, 'time_algorithm_update': 0.01840193299522177, 'temp_loss': -0.3478978662615457, 'temp': 1.000297126714249, 'alpha_loss': 37.29182781392371, 'alpha': 0.5095636428099627, 'critic_loss': 2093.3765091031614, 'actor_loss': 153.80790101436148, 'time_step': 0.01885795872113858, 'td_error': 16314.522342241906, 'init_value': -324.7734069824219, 'ave_value': -151.76610608916428} step=7182
2022-04-20 19:20.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191756/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:20.41 [info     ] CQL_20220420191756: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.000351038592600683, 'time_algorithm_update': 0.018252897680851452, 'temp_loss': -0.5461042982400858, 'temp': 1.01970326551917, 'alpha_loss': 65.83130167834243, 'alpha': 0.4874621860639394, 'critic_loss': 2329.76407806218, 'actor_loss': 171.57955053675244, 'time_step': 0.018704289581343445, 'td_error': 1104.811032965384, 'init_value': -347.3945617675781, 'ave_value': -162.34742495880067} step=7524
2022-04-20 19:20.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191756/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:20.48 [info     ] CQL_20220420191756: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.00034961923521164564, 'time_algorithm_update': 0.0181480186027393, 'temp_loss': -0.37663700843327924, 'temp': 1.0374430797950567, 'alpha_loss': 38.773613382849774, 'alpha': 0.46960527271206615, 'critic_loss': 2630.969808299639, 'actor_loss': 180.52937575669316, 'time_step': 0.01859843521787409, 'td_error': 2498.2462857211494, 'init_value': -377.32318115234375, 'ave_value': -180.14190865857645} step=7866
2022-04-20 19:20.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191756/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:20.54 [info     ] CQL_20220420191756: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.0003554514277051067, 'time_algorithm_update': 0.017835789953756054, 'temp_loss': -0.3026494604497277, 'temp': 1.0532721552932471, 'alpha_loss': 33.245833398305884, 'alpha': 0.4571917497972299, 'critic_loss': 2831.7228357861613, 'actor_loss': 187.11369702969378, 'time_step': 0.01829280211911564, 'td_error': 23138.450639975446, 'init_value': -408.7432861328125, 'ave_value': -189.66513913109094} step=8208
2022-04-20 19:20.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191756/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:21.01 [info     ] CQL_20220420191756: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.00035544585066232066, 'time_algorithm_update': 0.017899741206252782, 'temp_loss': -0.5950934744347431, 'temp': 1.070266436415109, 'alpha_loss': 64.9282687086808, 'alpha': 0.4429731820410455, 'critic_loss': 3014.270573130825, 'actor_loss': 201.3768046864292, 'time_step': 0.01835482998898155, 'td_error': 47590.19120130194, 'init_value': -481.91796875, 'ave_value': -216.09729880182593} step=8550
2022-04-20 19:21.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191756/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:21.07 [info     ] CQL_20220420191756: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.0003507374322902389, 'time_algorithm_update': 0.017789869977716814, 'temp_loss': -0.32662144967229567, 'temp': 1.0900051980687862, 'alpha_loss': 46.50851139269377, 'alpha': 0.4258657142258527, 'critic_loss': 3352.9901165878564, 'actor_loss': 212.49825349328114, 'time_step': 0.018241448709142138, 'td_error': 33414.357312798755, 'init_value': -462.74200439453125, 'ave_value': -222.14536262436158} step=8892
2022-04-20 19:21.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191756/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:21.14 [info     ] CQL_20220420191756: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.000361521341647321, 'time_algorithm_update': 0.01773916280757614, 'temp_loss': -0.2007960164973959, 'temp': 1.1009570036018104, 'alpha_loss': 20.134216930608304, 'alpha': 0.41656625941831465, 'critic_loss': 3497.374710886102, 'actor_loss': 212.73053246213678, 'time_step': 0.018200303378858064, 'td_error': 9543.909242370959, 'init_value': -472.58251953125, 'ave_value': -229.3538034455854} step=9234
2022-04-20 19:21.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191756/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:21.20 [info     ] CQL_20220420191756: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.0003617088697110003, 'time_algorithm_update': 0.017600587934081317, 'temp_loss': -0.513166993377153, 'temp': 1.116499003959678, 'alpha_loss': 27.17869433551504, 'alpha': 0.40912548273976085, 'critic_loss': 3554.121851157027, 'actor_loss': 217.035362199036, 'time_step': 0.01806287249626472, 'td_error': 11574.620352907348, 'init_value': -461.96661376953125, 'ave_value': -226.22532648021908} step=9576
2022-04-20 19:21.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191756/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:21.27 [info     ] CQL_20220420191756: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.000352777235689219, 'time_algorithm_update': 0.017823331537302475, 'temp_loss': -0.346159502831206, 'temp': 1.1333871508202358, 'alpha_loss': 42.11042507350096, 'alpha': 0.3995324982362881, 'critic_loss': 3652.9699835526317, 'actor_loss': 225.5161711932623, 'time_step': 0.01827436093001338, 'td_error': 21166.14901199925, 'init_value': -486.4369201660156, 'ave_value': -233.9532075885039} step=9918
2022-04-20 19:21.27 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191756/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:21.33 [info     ] CQL_20220420191756: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.00035311743529916505, 'time_algorithm_update': 0.01757882213034825, 'temp_loss': -0.5598077985910122, 'temp': 1.1525688621035792, 'alpha_loss': 56.816054296772386, 'alpha': 0.38336283012091765, 'critic_loss': 3990.598402235243, 'actor_loss': 243.92626301726403, 'time_step': 0.01802758166664525, 'td_error': 20498.306489270457, 'init_value': -492.4585876464844, 'ave_value': -251.3563691003822} step=10260
2022-04-20 19:21.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191756/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:21.39 [info     ] CQL_20220420191756: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.0003488767913907592, 'time_algorithm_update': 0.01762271066855269, 'temp_loss': -0.45522188988911827, 'temp': 1.1747829433073078, 'alpha_loss': 32.05583233652059, 'alpha': 0.37180954514191167, 'critic_loss': 4151.138838918586, 'actor_loss': 243.25857071012084, 'time_step': 0.018069455498143247, 'td_error': 10705.607317718921, 'init_value': -513.0333251953125, 'ave_value': -247.07030609244296} step=10602
2022-04-20 19:21.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191756/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:21.46 [info     ] CQL_20220420191756: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.00035799874199761287, 'time_algorithm_update': 0.017821542700828863, 'temp_loss': -0.31191018617532235, 'temp': 1.1871334359659786, 'alpha_loss': 45.09074086194847, 'alpha': 0.3612676624491898, 'critic_loss': 4320.861702188414, 'actor_loss': 253.2977608128598, 'time_step': 0.018281452837046127, 'td_error': 18072.698996032137, 'init_value': -541.1004638671875, 'ave_value': -262.78704936764933} step=10944
2022-04-20 19:21.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191756/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:21.52 [info     ] CQL_20220420191756: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.0003502605951320358, 'time_algorithm_update': 0.018005441503915174, 'temp_loss': -0.24280237833484572, 'temp': 1.2000575396749709, 'alpha_loss': 22.077436511279547, 'alpha': 0.35159306968861853, 'critic_loss': 4346.370032237985, 'actor_loss': 247.14180296066908, 'time_step': 0.018452360616092792, 'td_error': 2583.123075913076, 'init_value': -510.396484375, 'ave_value': -249.10047141731053} step=11286
2022-04-20 19:21.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191756/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:21.59 [info     ] CQL_20220420191756: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.00035384245086134527, 'time_algorithm_update': 0.01782910656510738, 'temp_loss': -0.18053889210875096, 'temp': 1.2101221523786847, 'alpha_loss': 8.680472332134581, 'alpha': 0.3478960526442667, 'critic_loss': 4070.973628529331, 'actor_loss': 231.29352828354862, 'time_step': 0.01828053192785609, 'td_error': 1207.6194161005708, 'init_value': -484.08648681640625, 'ave_value': -226.62588470792315} step=11628
2022-04-20 19:21.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191756/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:22.05 [info     ] CQL_20220420191756: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.000356695108246385, 'time_algorithm_update': 0.01791663128033019, 'temp_loss': 0.06521096367018613, 'temp': 1.214017068782048, 'alpha_loss': 7.119217169563672, 'alpha': 0.34519387650908084, 'critic_loss': 3831.4049157929003, 'actor_loss': 220.57944863302666, 'time_step': 0.018372863356830083, 'td_error': 1052.4305179068492, 'init_value': -484.91998291015625, 'ave_value': -222.48606558012668} step=11970
2022-04-20 19:22.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191756/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:22.12 [info     ] CQL_20220420191756: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.0003546587904991462, 'time_algorithm_update': 0.018028587625737776, 'temp_loss': 0.1725165795055572, 'temp': 1.207299328338333, 'alpha_loss': 5.143246873777512, 'alpha': 0.3429496956673282, 'critic_loss': 3599.1343016150404, 'actor_loss': 210.69944879186085, 'time_step': 0.018482660689549138, 'td_error': 898.3069997638182, 'init_value': -482.4300842285156, 'ave_value': -216.08356798990755} step=12312
2022-04-20 19:22.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191756/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:22.19 [info     ] CQL_20220420191756: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.0003535127082066229, 'time_algorithm_update': 0.017897255936561274, 'temp_loss': 0.2867407207513413, 'temp': 1.1923168339924506, 'alpha_loss': 5.2398285893668906, 'alpha': 0.34055014277062223, 'critic_loss': 3378.8824473598547, 'actor_loss': 201.7914631715295, 'time_step': 0.018348491679855257, 'td_error': 825.6586243548417, 'init_value': -461.0279235839844, 'ave_value': -205.28006257468047} step=12654
2022-04-20 19:22.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191756/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:22.25 [info     ] CQL_20220420191756: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.00035578256462052553, 'time_algorithm_update': 0.017789485858894928, 'temp_loss': 0.29981719036940596, 'temp': 1.1770076730795074, 'alpha_loss': 5.37673804041935, 'alpha': 0.3376853986267458, 'critic_loss': 3179.627679835983, 'actor_loss': 194.27621428729498, 'time_step': 0.018242150022272478, 'td_error': 746.1721295498401, 'init_value': -437.068115234375, 'ave_value': -192.99690980590543} step=12996
2022-04-20 19:22.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191756/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:22.32 [info     ] CQL_20220420191756: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.00035798898217273735, 'time_algorithm_update': 0.01796038248385602, 'temp_loss': 0.3609148364688395, 'temp': 1.159745743051607, 'alpha_loss': 5.4448257099118145, 'alpha': 0.3342831625401625, 'critic_loss': 2994.4822219937864, 'actor_loss': 187.0780285840843, 'time_step': 0.018417128345422577, 'td_error': 674.9296499070899, 'init_value': -418.97509765625, 'ave_value': -184.44134190965343} step=13338
2022-04-20 19:22.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191756/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:22.38 [info     ] CQL_20220420191756: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.0003619019748174656, 'time_algorithm_update': 0.0178266882199293, 'temp_loss': 0.30060994502004473, 'temp': 1.1401312163001613, 'alpha_loss': 5.473651067206734, 'alpha': 0.3304141847885143, 'critic_loss': 2831.903662609078, 'actor_loss': 181.10412008720533, 'time_step': 0.01829283000432957, 'td_error': 583.5089695565289, 'init_value': -404.9625244140625, 'ave_value': -179.27181856789568} step=13680
2022-04-20 19:22.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191756/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:22.45 [info     ] CQL_20220420191756: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.0003523519861767864, 'time_algorithm_update': 0.01781257551315932, 'temp_loss': 0.3145522097811887, 'temp': 1.1247820303454037, 'alpha_loss': 5.535793283878014, 'alpha': 0.32587257193194497, 'critic_loss': 2679.2671044779104, 'actor_loss': 175.25921242697197, 'time_step': 0.01826267563111601, 'td_error': 624.7865142319666, 'init_value': -393.78472900390625, 'ave_value': -175.09911596906414} step=14022
2022-04-20 19:22.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191756/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:22.51 [info     ] CQL_20220420191756: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.0003515468006245574, 'time_algorithm_update': 0.017805188022858916, 'temp_loss': 0.3238790963574895, 'temp': 1.1075995062526904, 'alpha_loss': 5.462174405131424, 'alpha': 0.3207607130732453, 'critic_loss': 2546.9344343218886, 'actor_loss': 170.33609579878245, 'time_step': 0.018251136032461424, 'td_error': 491.0859231224326, 'init_value': -363.1343688964844, 'ave_value': -162.89134765610837} step=14364
2022-04-20 19:22.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191756/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:22.57 [info     ] CQL_20220420191756: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.0003498750820494535, 'time_algorithm_update': 0.017748620077880504, 'temp_loss': 0.26255945658736063, 'temp': 1.0914719589272437, 'alpha_loss': 5.349680170329691, 'alpha': 0.3152621812813463, 'critic_loss': 2420.0794470714545, 'actor_loss': 165.53688419632047, 'time_step': 0.018197035928915816, 'td_error': 499.3478027015165, 'init_value': -352.0610656738281, 'ave_value': -159.80492841402756} step=14706
2022-04-20 19:22.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191756/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:23.04 [info     ] CQL_20220420191756: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.00035485398699665625, 'time_algorithm_update': 0.01771330206017745, 'temp_loss': 0.2720738862864455, 'temp': 1.077294287277244, 'alpha_loss': 5.423129861117803, 'alpha': 0.30911354955873993, 'critic_loss': 2309.183397081163, 'actor_loss': 161.58630121242234, 'time_step': 0.018168485652633577, 'td_error': 492.470280825426, 'init_value': -337.4626159667969, 'ave_value': -157.88173958035537} step=15048
2022-04-20 19:23.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191756/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:23.10 [info     ] CQL_20220420191756: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.0003533203002305059, 'time_algorithm_update': 0.017772925527472245, 'temp_loss': 0.22653483204020744, 'temp': 1.0622490735081902, 'alpha_loss': 5.249814784317686, 'alpha': 0.3024847284395095, 'critic_loss': 2203.4700842070997, 'actor_loss': 157.55496313976266, 'time_step': 0.018224781019645826, 'td_error': 395.8568806162875, 'init_value': -312.67706298828125, 'ave_value': -149.9289711576049} step=15390
2022-04-20 19:23.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191756/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:23.17 [info     ] CQL_20220420191756: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.00035400767075388054, 'time_algorithm_update': 0.018091992328041477, 'temp_loss': 0.2632274001793206, 'temp': 1.0490114932868913, 'alpha_loss': 5.390418945697316, 'alpha': 0.2953820328789148, 'critic_loss': 2113.1000723141674, 'actor_loss': 154.19972817939623, 'time_step': 0.01855044127904881, 'td_error': 430.6854508257907, 'init_value': -301.34686279296875, 'ave_value': -148.0584734740142} step=15732
2022-04-20 19:23.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191756/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:23.23 [info     ] CQL_20220420191756: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.00035385081642552425, 'time_algorithm_update': 0.017885892711884795, 'temp_loss': 0.27973081171512604, 'temp': 1.0338088437827706, 'alpha_loss': 5.075373329614338, 'alpha': 0.2880585342645645, 'critic_loss': 2027.437632421304, 'actor_loss': 150.9842285022401, 'time_step': 0.018339295136301142, 'td_error': 288.81398798678026, 'init_value': -287.18115234375, 'ave_value': -143.8381231712879} step=16074
2022-04-20 19:23.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191756/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:23.30 [info     ] CQL_20220420191756: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.00036399197160151965, 'time_algorithm_update': 0.018066839865076612, 'temp_loss': 0.27316609863736474, 'temp': 1.0182623458884612, 'alpha_loss': 4.994799192188776, 'alpha': 0.28079938226275974, 'critic_loss': 1949.2722021627146, 'actor_loss': 148.1617991352639, 'time_step': 0.01852953573416548, 'td_error': 442.9997308619691, 'init_value': -271.04559326171875, 'ave_value': -139.19244450971317} step=16416
2022-04-20 19:23.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191756/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:23.37 [info     ] CQL_20220420191756: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.00034626324971516925, 'time_algorithm_update': 0.018019205645511027, 'temp_loss': 0.21546453738596008, 'temp': 1.0045700111584357, 'alpha_loss': 4.8318486415851885, 'alpha': 0.27335935848498205, 'critic_loss': 1876.117902432269, 'actor_loss': 145.35344624658774, 'time_step': 0.018462778532017044, 'td_error': 346.48477140606525, 'init_value': -259.6955871582031, 'ave_value': -139.0192846090407} step=16758
2022-04-20 19:23.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191756/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:23.43 [info     ] CQL_20220420191756: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.0003459453582763672, 'time_algorithm_update': 0.01782357971570645, 'temp_loss': 0.1611109288866844, 'temp': 0.9949634700839283, 'alpha_loss': 4.570525283004805, 'alpha': 0.2659962771580233, 'critic_loss': 1806.3899646781342, 'actor_loss': 142.7330968533343, 'time_step': 0.0182724005994741, 'td_error': 368.2837300168511, 'init_value': -250.5225372314453, 'ave_value': -136.47378989458824} step=17100
2022-04-20 19:23.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420191756/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.519100

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 19:23.44 [info     ] FQE_20220420192343: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00014626836202230798, 'time_algorithm_update': 0.002059906362050987, 'loss': 0.007649874849054199, 'time_step': 0.002278092395828431, 'init_value': -0.24101915955543518, 'ave_value': -0.20958903670931855, 'soft_opc': nan} step=166




2022-04-20 19:23.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192343/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:23.44 [info     ] FQE_20220420192343: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00014919258025755365, 'time_algorithm_update': 0.002075139298496476, 'loss': 0.005952330307574965, 'time_step': 0.0022909425827394047, 'init_value': -0.3741454482078552, 'ave_value': -0.3086452044848655, 'soft_opc': nan} step=332




2022-04-20 19:23.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192343/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:23.45 [info     ] FQE_20220420192343: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00014996097748538098, 'time_algorithm_update': 0.0020797913333019578, 'loss': 0.005692352306557797, 'time_step': 0.0022973382329366295, 'init_value': -0.4248853325843811, 'ave_value': -0.33980497788251923, 'soft_opc': nan} step=498




2022-04-20 19:23.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192343/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:23.45 [info     ] FQE_20220420192343: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.0001426992646182876, 'time_algorithm_update': 0.001984461244330349, 'loss': 0.00563488987201533, 'time_step': 0.002193373369883342, 'init_value': -0.5216037631034851, 'ave_value': -0.40825753335983633, 'soft_opc': nan} step=664




2022-04-20 19:23.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192343/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:23.45 [info     ] FQE_20220420192343: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00015183672847518003, 'time_algorithm_update': 0.002060044242675046, 'loss': 0.005393934803622135, 'time_step': 0.0022762367524296404, 'init_value': -0.6039139032363892, 'ave_value': -0.4648057730650311, 'soft_opc': nan} step=830




2022-04-20 19:23.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192343/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:23.46 [info     ] FQE_20220420192343: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00014940370996314358, 'time_algorithm_update': 0.0020036395773830183, 'loss': 0.0051404130827433555, 'time_step': 0.002218777874866164, 'init_value': -0.6575120687484741, 'ave_value': -0.5154190524237918, 'soft_opc': nan} step=996




2022-04-20 19:23.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192343/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:23.46 [info     ] FQE_20220420192343: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.0001457958336336067, 'time_algorithm_update': 0.00205841121903385, 'loss': 0.005169151036674718, 'time_step': 0.002273029591663774, 'init_value': -0.7247204780578613, 'ave_value': -0.5673648659408361, 'soft_opc': nan} step=1162




2022-04-20 19:23.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192343/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:23.47 [info     ] FQE_20220420192343: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00014884356992790498, 'time_algorithm_update': 0.0020536112498088055, 'loss': 0.0048816642213433264, 'time_step': 0.0022702863417476057, 'init_value': -0.8011077642440796, 'ave_value': -0.6088728040591016, 'soft_opc': nan} step=1328




2022-04-20 19:23.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192343/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:23.47 [info     ] FQE_20220420192343: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00014644933034138507, 'time_algorithm_update': 0.002051589000655944, 'loss': 0.004594708461853036, 'time_step': 0.002267306109508836, 'init_value': -0.849635124206543, 'ave_value': -0.6415870132679875, 'soft_opc': nan} step=1494




2022-04-20 19:23.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192343/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:23.47 [info     ] FQE_20220420192343: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.0001501232744699501, 'time_algorithm_update': 0.0020661655678806535, 'loss': 0.004488946938696486, 'time_step': 0.002281111406992717, 'init_value': -0.8910542726516724, 'ave_value': -0.6534135341107309, 'soft_opc': nan} step=1660




2022-04-20 19:23.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192343/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:23.48 [info     ] FQE_20220420192343: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00014782239155596997, 'time_algorithm_update': 0.0020896871405911735, 'loss': 0.00419984499121601, 'time_step': 0.0023104469460177132, 'init_value': -0.9726033210754395, 'ave_value': -0.7109653208102729, 'soft_opc': nan} step=1826




2022-04-20 19:23.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192343/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:23.48 [info     ] FQE_20220420192343: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00015245862753994493, 'time_algorithm_update': 0.0020450727049126684, 'loss': 0.004201420335273023, 'time_step': 0.002267135194985263, 'init_value': -1.0595215559005737, 'ave_value': -0.7752220200861359, 'soft_opc': nan} step=1992




2022-04-20 19:23.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192343/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:23.49 [info     ] FQE_20220420192343: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.0001481283141906003, 'time_algorithm_update': 0.002140333853572248, 'loss': 0.004265221329518111, 'time_step': 0.0023593945675585643, 'init_value': -1.1708259582519531, 'ave_value': -0.850732458899687, 'soft_opc': nan} step=2158




2022-04-20 19:23.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192343/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:23.49 [info     ] FQE_20220420192343: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00014691898621708513, 'time_algorithm_update': 0.0020514855901879, 'loss': 0.004269539897543299, 'time_step': 0.0022623481520687243, 'init_value': -1.3053075075149536, 'ave_value': -0.9594330873978031, 'soft_opc': nan} step=2324




2022-04-20 19:23.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192343/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:23.50 [info     ] FQE_20220420192343: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00014787266053349138, 'time_algorithm_update': 0.0020392185234161743, 'loss': 0.004318235397860633, 'time_step': 0.0022538670574326113, 'init_value': -1.3608293533325195, 'ave_value': -0.9812577191773836, 'soft_opc': nan} step=2490




2022-04-20 19:23.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192343/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:23.50 [info     ] FQE_20220420192343: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00014938791114163687, 'time_algorithm_update': 0.0020477613770818136, 'loss': 0.004677410421230976, 'time_step': 0.0022653326930769956, 'init_value': -1.4820529222488403, 'ave_value': -1.0589190690232826, 'soft_opc': nan} step=2656




2022-04-20 19:23.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192343/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:23.50 [info     ] FQE_20220420192343: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00015039041817906392, 'time_algorithm_update': 0.0020236380128975376, 'loss': 0.004965033812072487, 'time_step': 0.0022451676518084056, 'init_value': -1.6948127746582031, 'ave_value': -1.1799245375367973, 'soft_opc': nan} step=2822




2022-04-20 19:23.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192343/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:23.51 [info     ] FQE_20220420192343: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00015243851994893638, 'time_algorithm_update': 0.002130818654255695, 'loss': 0.005517210953739887, 'time_step': 0.00235473535147058, 'init_value': -1.7408182621002197, 'ave_value': -1.2051020035738345, 'soft_opc': nan} step=2988




2022-04-20 19:23.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192343/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:23.51 [info     ] FQE_20220420192343: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00014741736722279745, 'time_algorithm_update': 0.002072934644768037, 'loss': 0.006000778187593692, 'time_step': 0.00228949914495629, 'init_value': -1.8927326202392578, 'ave_value': -1.303302677029425, 'soft_opc': nan} step=3154




2022-04-20 19:23.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192343/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:23.52 [info     ] FQE_20220420192343: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00015182954719267697, 'time_algorithm_update': 0.002121869340000382, 'loss': 0.006064482069018869, 'time_step': 0.002343500953122794, 'init_value': -2.0230746269226074, 'ave_value': -1.3838781448097917, 'soft_opc': nan} step=3320




2022-04-20 19:23.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192343/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:23.52 [info     ] FQE_20220420192343: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00014593658677066666, 'time_algorithm_update': 0.0020668018295104244, 'loss': 0.006692321588851915, 'time_step': 0.002285243516944977, 'init_value': -2.072844982147217, 'ave_value': -1.3956438316082633, 'soft_opc': nan} step=3486




2022-04-20 19:23.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192343/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:23.53 [info     ] FQE_20220420192343: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00014417286378791533, 'time_algorithm_update': 0.0020594496324837924, 'loss': 0.007215744857143341, 'time_step': 0.0022777433854987822, 'init_value': -2.264840841293335, 'ave_value': -1.5454808705024892, 'soft_opc': nan} step=3652




2022-04-20 19:23.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192343/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:23.53 [info     ] FQE_20220420192343: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00014484646808670228, 'time_algorithm_update': 0.002032789839319436, 'loss': 0.008426490544161972, 'time_step': 0.002243011830800987, 'init_value': -2.428607940673828, 'ave_value': -1.6273921715515154, 'soft_opc': nan} step=3818




2022-04-20 19:23.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192343/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:23.53 [info     ] FQE_20220420192343: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00014929024569959525, 'time_algorithm_update': 0.0020773956574589372, 'loss': 0.008700817922292927, 'time_step': 0.0022974330258656696, 'init_value': -2.4957404136657715, 'ave_value': -1.6898831598796287, 'soft_opc': nan} step=3984




2022-04-20 19:23.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192343/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:23.54 [info     ] FQE_20220420192343: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00014867696417383402, 'time_algorithm_update': 0.0020356336271906473, 'loss': 0.009550343841280654, 'time_step': 0.002249683242246329, 'init_value': -2.6704630851745605, 'ave_value': -1.8133439458973772, 'soft_opc': nan} step=4150




2022-04-20 19:23.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192343/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:23.54 [info     ] FQE_20220420192343: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00014405652701136577, 'time_algorithm_update': 0.002009199326296887, 'loss': 0.010061016362384859, 'time_step': 0.002218731914658144, 'init_value': -2.785440683364868, 'ave_value': -1.8903797721137872, 'soft_opc': nan} step=4316




2022-04-20 19:23.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192343/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:23.55 [info     ] FQE_20220420192343: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00014647374670189548, 'time_algorithm_update': 0.002058635275047946, 'loss': 0.010774812195450068, 'time_step': 0.0022705865193562336, 'init_value': -2.7731032371520996, 'ave_value': -1.8521678065931475, 'soft_opc': nan} step=4482




2022-04-20 19:23.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192343/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:23.55 [info     ] FQE_20220420192343: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00014919976154005672, 'time_algorithm_update': 0.0020230491477322868, 'loss': 0.011169473373426506, 'time_step': 0.0022390922868108175, 'init_value': -2.964759349822998, 'ave_value': -2.0073875206010836, 'soft_opc': nan} step=4648




2022-04-20 19:23.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192343/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:23.56 [info     ] FQE_20220420192343: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00014748199876532498, 'time_algorithm_update': 0.002073460314647261, 'loss': 0.01190143292671608, 'time_step': 0.0022872786924063443, 'init_value': -3.002502918243408, 'ave_value': -2.0155243763649784, 'soft_opc': nan} step=4814




2022-04-20 19:23.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192343/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:23.56 [info     ] FQE_20220420192343: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00014866547412182912, 'time_algorithm_update': 0.002092818179762507, 'loss': 0.012371892140370357, 'time_step': 0.0023121604000229433, 'init_value': -3.081833839416504, 'ave_value': -2.0544514460740864, 'soft_opc': nan} step=4980




2022-04-20 19:23.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192343/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:23.56 [info     ] FQE_20220420192343: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00014816278434661497, 'time_algorithm_update': 0.0020256128655858786, 'loss': 0.012947574805563697, 'time_step': 0.002238631248474121, 'init_value': -3.161019802093506, 'ave_value': -2.1073243292065356, 'soft_opc': nan} step=5146




2022-04-20 19:23.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192343/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:23.57 [info     ] FQE_20220420192343: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00015022093991199172, 'time_algorithm_update': 0.0020631996982068902, 'loss': 0.013278304531900998, 'time_step': 0.002283478357705725, 'init_value': -3.2816872596740723, 'ave_value': -2.190148213358076, 'soft_opc': nan} step=5312




2022-04-20 19:23.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192343/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:23.57 [info     ] FQE_20220420192343: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.0001488349523889013, 'time_algorithm_update': 0.002026802085968385, 'loss': 0.013949865173739213, 'time_step': 0.0022475762539599315, 'init_value': -3.2158968448638916, 'ave_value': -2.1375341805654604, 'soft_opc': nan} step=5478




2022-04-20 19:23.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192343/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:23.58 [info     ] FQE_20220420192343: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00014773190739643142, 'time_algorithm_update': 0.0020412637526730456, 'loss': 0.01447577910295811, 'time_step': 0.0022558390376079514, 'init_value': -3.391707420349121, 'ave_value': -2.236761625017132, 'soft_opc': nan} step=5644




2022-04-20 19:23.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192343/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:23.58 [info     ] FQE_20220420192343: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00015152506081454726, 'time_algorithm_update': 0.0020641203386237822, 'loss': 0.015270104530093496, 'time_step': 0.002282913908900985, 'init_value': -3.5572099685668945, 'ave_value': -2.366643654722888, 'soft_opc': nan} step=5810




2022-04-20 19:23.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192343/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:23.59 [info     ] FQE_20220420192343: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00014519404215985034, 'time_algorithm_update': 0.0020738653389804334, 'loss': 0.015997039655599385, 'time_step': 0.0022864083209669732, 'init_value': -3.555983066558838, 'ave_value': -2.3548468660395425, 'soft_opc': nan} step=5976




2022-04-20 19:23.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192343/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:23.59 [info     ] FQE_20220420192343: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00015073655599571136, 'time_algorithm_update': 0.002127607184720327, 'loss': 0.01642227117939998, 'time_step': 0.0023467411477881743, 'init_value': -3.6798486709594727, 'ave_value': -2.448910781566624, 'soft_opc': nan} step=6142




2022-04-20 19:23.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192343/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:23.59 [info     ] FQE_20220420192343: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00014864680278732116, 'time_algorithm_update': 0.0020592758454472185, 'loss': 0.01690575134090473, 'time_step': 0.0022777160966252707, 'init_value': -3.6435084342956543, 'ave_value': -2.347236033522331, 'soft_opc': nan} step=6308




2022-04-20 19:23.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192343/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:24.00 [info     ] FQE_20220420192343: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00014494126101574265, 'time_algorithm_update': 0.002038632530763925, 'loss': 0.01730285195215796, 'time_step': 0.0022511396063379496, 'init_value': -3.671419858932495, 'ave_value': -2.349744317586626, 'soft_opc': nan} step=6474




2022-04-20 19:24.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192343/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:24.00 [info     ] FQE_20220420192343: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00014905900840299675, 'time_algorithm_update': 0.0020660549761301063, 'loss': 0.017609327691410815, 'time_step': 0.002280832773231598, 'init_value': -3.7876193523406982, 'ave_value': -2.455373466927726, 'soft_opc': nan} step=6640




2022-04-20 19:24.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192343/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:24.01 [info     ] FQE_20220420192343: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00014925433928707996, 'time_algorithm_update': 0.002069331077208002, 'loss': 0.01826451022419181, 'time_step': 0.002287412264260901, 'init_value': -3.8605892658233643, 'ave_value': -2.5242713578246736, 'soft_opc': nan} step=6806




2022-04-20 19:24.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192343/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:24.01 [info     ] FQE_20220420192343: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00014672796410250376, 'time_algorithm_update': 0.0020134463367691958, 'loss': 0.019740402369608497, 'time_step': 0.0022265322237129672, 'init_value': -3.9966208934783936, 'ave_value': -2.646069968619325, 'soft_opc': nan} step=6972




2022-04-20 19:24.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192343/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:24.02 [info     ] FQE_20220420192343: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00015089741672377988, 'time_algorithm_update': 0.0020716549402259917, 'loss': 0.02020143586174439, 'time_step': 0.0022932018142148673, 'init_value': -4.1047139167785645, 'ave_value': -2.7065083067427884, 'soft_opc': nan} step=7138




2022-04-20 19:24.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192343/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:24.02 [info     ] FQE_20220420192343: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00015023817498999905, 'time_algorithm_update': 0.0020400644784950347, 'loss': 0.021536523611593365, 'time_step': 0.0022598418844751566, 'init_value': -4.210826873779297, 'ave_value': -2.747829689353973, 'soft_opc': nan} step=7304




2022-04-20 19:24.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192343/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:24.02 [info     ] FQE_20220420192343: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00014973979398428676, 'time_algorithm_update': 0.0021250765007662484, 'loss': 0.02205225438334003, 'time_step': 0.002341560570590467, 'init_value': -4.236917495727539, 'ave_value': -2.752665715993525, 'soft_opc': nan} step=7470




2022-04-20 19:24.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192343/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:24.03 [info     ] FQE_20220420192343: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00015136420008647874, 'time_algorithm_update': 0.002088729157505265, 'loss': 0.02287342385052288, 'time_step': 0.00231034497180617, 'init_value': -4.368045806884766, 'ave_value': -2.847695824168287, 'soft_opc': nan} step=7636




2022-04-20 19:24.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192343/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:24.03 [info     ] FQE_20220420192343: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00015332181769681265, 'time_algorithm_update': 0.0020801446524011083, 'loss': 0.023533948392786802, 'time_step': 0.002302001757794116, 'init_value': -4.450879096984863, 'ave_value': -2.9121265266258436, 'soft_opc': nan} step=7802




2022-04-20 19:24.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192343/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:24.04 [info     ] FQE_20220420192343: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00014910640486751693, 'time_algorithm_update': 0.0020447308758655227, 'loss': 0.024832589316170616, 'time_step': 0.0022601377533142827, 'init_value': -4.517200469970703, 'ave_value': -2.992320239973498, 'soft_opc': nan} step=7968




2022-04-20 19:24.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192343/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:24.04 [info     ] FQE_20220420192343: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.0001501218382134495, 'time_algorithm_update': 0.00210256748888866, 'loss': 0.02737786245100335, 'time_step': 0.00232119014464229, 'init_value': -4.610271453857422, 'ave_value': -3.058670666867548, 'soft_opc': nan} step=8134




2022-04-20 19:24.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192343/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:24.05 [info     ] FQE_20220420192343: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00015133116618696465, 'time_algorithm_update': 0.002090590545930058, 'loss': 0.026157960583209003, 'time_step': 0.0023126085120511344, 'init_value': -4.614907264709473, 'ave_value': -3.0198091928620596, 'soft_opc': nan} step=8300




2022-04-20 19:24.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192343/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

Read chunk # 38 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.27108923e-02  1.24000047e-02
  1.42999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.34732792e-01  6.00000000e-01  3.37421461e-01]
Read chunk # 39 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.38489108e-01  4.94000047e-02
 -1.56000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -7.49080829e-02  7.04145269e-02]
Read chunk # 40 out of 4999
torch.Size([44400, 6])
2022-04-20 19:24.05 [debug    ] RoundIterator is selected.
2022-04-20 19:24.05 [info     ] Directory is created at d3rlpy_logs/FQE_20220420192405
2022-04-20 19:24.05 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 19:24.05 [debug    ] Building models...
2022-04-20 19:24.05 [debug    ] Models have been built.
2022-04-20 19:24.05 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420192405/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'max

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 19:24.06 [info     ] FQE_20220420192405: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.0001513798569524011, 'time_algorithm_update': 0.002052250989647799, 'loss': 0.025029514599483202, 'time_step': 0.0022735976895620655, 'init_value': -1.3627147674560547, 'ave_value': -1.3470870931809014, 'soft_opc': nan} step=344




2022-04-20 19:24.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192405/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:24.07 [info     ] FQE_20220420192405: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.0001492909220762031, 'time_algorithm_update': 0.002045958541160406, 'loss': 0.02442519104129873, 'time_step': 0.0022644109504167424, 'init_value': -2.2538418769836426, 'ave_value': -2.1752266504743076, 'soft_opc': nan} step=688




2022-04-20 19:24.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192405/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:24.07 [info     ] FQE_20220420192405: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00015197936878647914, 'time_algorithm_update': 0.002075717892757682, 'loss': 0.027388844241084922, 'time_step': 0.002300168192663858, 'init_value': -3.39756441116333, 'ave_value': -3.244212678544693, 'soft_opc': nan} step=1032




2022-04-20 19:24.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192405/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:24.08 [info     ] FQE_20220420192405: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00015275076378223508, 'time_algorithm_update': 0.001982030480407005, 'loss': 0.029497966939193564, 'time_step': 0.0022022065728209738, 'init_value': -4.163058280944824, 'ave_value': -3.954893977698442, 'soft_opc': nan} step=1376




2022-04-20 19:24.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192405/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:24.09 [info     ] FQE_20220420192405: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00015122945918593297, 'time_algorithm_update': 0.0020243220551069393, 'loss': 0.03490131190804721, 'time_step': 0.0022466827270596528, 'init_value': -5.041135787963867, 'ave_value': -4.846796081758834, 'soft_opc': nan} step=1720




2022-04-20 19:24.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192405/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:24.10 [info     ] FQE_20220420192405: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00015696259432060775, 'time_algorithm_update': 0.002040238574493763, 'loss': 0.03916343707905346, 'time_step': 0.0022672144479529803, 'init_value': -5.7169294357299805, 'ave_value': -5.529530670124668, 'soft_opc': nan} step=2064




2022-04-20 19:24.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192405/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:24.11 [info     ] FQE_20220420192405: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00015384998432425566, 'time_algorithm_update': 0.002060335042864777, 'loss': 0.04675859234804764, 'time_step': 0.0022848428681839345, 'init_value': -6.500165939331055, 'ave_value': -6.351988135976297, 'soft_opc': nan} step=2408




2022-04-20 19:24.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192405/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:24.12 [info     ] FQE_20220420192405: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00015478217324545217, 'time_algorithm_update': 0.002019819132117338, 'loss': 0.0559735678212154, 'time_step': 0.002245277859443842, 'init_value': -7.023956775665283, 'ave_value': -6.941384270404642, 'soft_opc': nan} step=2752




2022-04-20 19:24.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192405/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:24.12 [info     ] FQE_20220420192405: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00015505801799685457, 'time_algorithm_update': 0.0021058050699012225, 'loss': 0.06565099224437375, 'time_step': 0.0023323526216107743, 'init_value': -7.627935409545898, 'ave_value': -7.637554722641771, 'soft_opc': nan} step=3096




2022-04-20 19:24.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192405/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:24.13 [info     ] FQE_20220420192405: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00015056826347528503, 'time_algorithm_update': 0.0019793032213698985, 'loss': 0.07989695269971826, 'time_step': 0.002197378596594167, 'init_value': -8.156516075134277, 'ave_value': -8.294286269721416, 'soft_opc': nan} step=3440




2022-04-20 19:24.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192405/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:24.14 [info     ] FQE_20220420192405: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00015415840370710507, 'time_algorithm_update': 0.0020277527875678485, 'loss': 0.09322897626119557, 'time_step': 0.0022551209427589592, 'init_value': -8.671919822692871, 'ave_value': -8.98163500250661, 'soft_opc': nan} step=3784




2022-04-20 19:24.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192405/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:24.15 [info     ] FQE_20220420192405: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.0001534022564111754, 'time_algorithm_update': 0.0020737377710120623, 'loss': 0.10492181609557985, 'time_step': 0.002298982337463734, 'init_value': -9.05249309539795, 'ave_value': -9.555516327390455, 'soft_opc': nan} step=4128




2022-04-20 19:24.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192405/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:24.16 [info     ] FQE_20220420192405: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.0001531215601189192, 'time_algorithm_update': 0.0020231348137522854, 'loss': 0.115747997039106, 'time_step': 0.0022467125293820405, 'init_value': -9.393199920654297, 'ave_value': -10.154246991999376, 'soft_opc': nan} step=4472




2022-04-20 19:24.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192405/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:24.17 [info     ] FQE_20220420192405: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.000155215346535971, 'time_algorithm_update': 0.002059666223304216, 'loss': 0.13227187648556346, 'time_step': 0.0022850958413855975, 'init_value': -9.759801864624023, 'ave_value': -10.823831756781736, 'soft_opc': nan} step=4816




2022-04-20 19:24.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192405/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:24.18 [info     ] FQE_20220420192405: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.0001526641291241313, 'time_algorithm_update': 0.0020462919113247895, 'loss': 0.14446914996843524, 'time_step': 0.0022696485353070634, 'init_value': -9.960049629211426, 'ave_value': -11.376285225029212, 'soft_opc': nan} step=5160




2022-04-20 19:24.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192405/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:24.18 [info     ] FQE_20220420192405: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00015191907106443893, 'time_algorithm_update': 0.002036284568697907, 'loss': 0.16285458881797832, 'time_step': 0.0022558909516001858, 'init_value': -10.560521125793457, 'ave_value': -12.269829672909351, 'soft_opc': nan} step=5504




2022-04-20 19:24.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192405/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:24.19 [info     ] FQE_20220420192405: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.0001512696576672931, 'time_algorithm_update': 0.0020274707051210626, 'loss': 0.18618867461309704, 'time_step': 0.002248603937237762, 'init_value': -10.437349319458008, 'ave_value': -12.399514023439753, 'soft_opc': nan} step=5848




2022-04-20 19:24.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192405/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:24.20 [info     ] FQE_20220420192405: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00015386315279228744, 'time_algorithm_update': 0.002037755278653877, 'loss': 0.19477775509382578, 'time_step': 0.0022641586702923443, 'init_value': -10.797708511352539, 'ave_value': -13.19055416328111, 'soft_opc': nan} step=6192




2022-04-20 19:24.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192405/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:24.21 [info     ] FQE_20220420192405: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00015425682067871094, 'time_algorithm_update': 0.002058071452517842, 'loss': 0.21518108739279385, 'time_step': 0.0022843771202619685, 'init_value': -11.328588485717773, 'ave_value': -14.042716058819378, 'soft_opc': nan} step=6536




2022-04-20 19:24.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192405/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:24.22 [info     ] FQE_20220420192405: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.0001544009807497956, 'time_algorithm_update': 0.0020921140216117683, 'loss': 0.22929402827393525, 'time_step': 0.002319026825039886, 'init_value': -11.60731029510498, 'ave_value': -14.455846126087488, 'soft_opc': nan} step=6880




2022-04-20 19:24.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192405/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:24.23 [info     ] FQE_20220420192405: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00015466850857402003, 'time_algorithm_update': 0.0020322862059571024, 'loss': 0.25575727392746095, 'time_step': 0.00225364884664846, 'init_value': -12.08216381072998, 'ave_value': -15.268303002260085, 'soft_opc': nan} step=7224




2022-04-20 19:24.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192405/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:24.23 [info     ] FQE_20220420192405: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00015282076458598292, 'time_algorithm_update': 0.002012678357057793, 'loss': 0.2754288467546102, 'time_step': 0.002235660719317059, 'init_value': -12.508445739746094, 'ave_value': -15.920183913852718, 'soft_opc': nan} step=7568




2022-04-20 19:24.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192405/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:24.24 [info     ] FQE_20220420192405: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00015170075172601745, 'time_algorithm_update': 0.002090348060740981, 'loss': 0.313459345675615, 'time_step': 0.0023096065188563148, 'init_value': -13.298195838928223, 'ave_value': -16.992290029608487, 'soft_opc': nan} step=7912




2022-04-20 19:24.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192405/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:24.25 [info     ] FQE_20220420192405: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00015436632688655409, 'time_algorithm_update': 0.0020295222138249597, 'loss': 0.3404572152987469, 'time_step': 0.002254716185636299, 'init_value': -13.44991683959961, 'ave_value': -17.24274812005009, 'soft_opc': nan} step=8256




2022-04-20 19:24.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192405/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:24.26 [info     ] FQE_20220420192405: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00015389156896014546, 'time_algorithm_update': 0.002051205829132435, 'loss': 0.3614895043266547, 'time_step': 0.0022744425507478937, 'init_value': -14.226701736450195, 'ave_value': -18.058090995688744, 'soft_opc': nan} step=8600




2022-04-20 19:24.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192405/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:24.27 [info     ] FQE_20220420192405: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.000155182771904524, 'time_algorithm_update': 0.0020735645016958545, 'loss': 0.402917361948206, 'time_step': 0.0022988243158473527, 'init_value': -14.853998184204102, 'ave_value': -18.55795502359056, 'soft_opc': nan} step=8944




2022-04-20 19:24.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192405/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:24.28 [info     ] FQE_20220420192405: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00015380424122477687, 'time_algorithm_update': 0.0020830354025197584, 'loss': 0.4362168393942506, 'time_step': 0.0023082529389581016, 'init_value': -15.30518627166748, 'ave_value': -18.92399290833341, 'soft_opc': nan} step=9288




2022-04-20 19:24.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192405/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:24.29 [info     ] FQE_20220420192405: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00015468098396478699, 'time_algorithm_update': 0.002096770114676897, 'loss': 0.48333942695239257, 'time_step': 0.002325622148292009, 'init_value': -16.239168167114258, 'ave_value': -19.72189850430319, 'soft_opc': nan} step=9632




2022-04-20 19:24.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192405/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:24.30 [info     ] FQE_20220420192405: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00015716705211373262, 'time_algorithm_update': 0.0020520735618680024, 'loss': 0.5199873658590192, 'time_step': 0.0022817427335783493, 'init_value': -16.8701114654541, 'ave_value': -20.502146984775433, 'soft_opc': nan} step=9976




2022-04-20 19:24.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192405/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:24.30 [info     ] FQE_20220420192405: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00015252759290295979, 'time_algorithm_update': 0.002044837142145911, 'loss': 0.5587399030039304, 'time_step': 0.0022681722807329757, 'init_value': -17.314476013183594, 'ave_value': -20.733247281583907, 'soft_opc': nan} step=10320




2022-04-20 19:24.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192405/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:24.31 [info     ] FQE_20220420192405: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.0001492029012635697, 'time_algorithm_update': 0.0020399828289830407, 'loss': 0.590718480076121, 'time_step': 0.0022579237472179323, 'init_value': -17.902446746826172, 'ave_value': -21.145632502966276, 'soft_opc': nan} step=10664




2022-04-20 19:24.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192405/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:24.32 [info     ] FQE_20220420192405: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00015422979066538256, 'time_algorithm_update': 0.002093611068503801, 'loss': 0.6113081911557116, 'time_step': 0.002318752366443013, 'init_value': -18.06214141845703, 'ave_value': -21.335896892599585, 'soft_opc': nan} step=11008




2022-04-20 19:24.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192405/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:24.33 [info     ] FQE_20220420192405: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00014063646627026936, 'time_algorithm_update': 0.0018850613472073576, 'loss': 0.6642343020824673, 'time_step': 0.0020885384360025097, 'init_value': -18.49211883544922, 'ave_value': -21.539594620170895, 'soft_opc': nan} step=11352




2022-04-20 19:24.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192405/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:24.34 [info     ] FQE_20220420192405: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.000136141860207846, 'time_algorithm_update': 0.0018201560475105464, 'loss': 0.7175983660421226, 'time_step': 0.002018694267716519, 'init_value': -19.134071350097656, 'ave_value': -21.882194351680315, 'soft_opc': nan} step=11696




2022-04-20 19:24.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192405/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:24.34 [info     ] FQE_20220420192405: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.0001373748446619788, 'time_algorithm_update': 0.0018372078274571618, 'loss': 0.7582285246205365, 'time_step': 0.0020370019036670063, 'init_value': -19.69253158569336, 'ave_value': -22.090294744486727, 'soft_opc': nan} step=12040




2022-04-20 19:24.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192405/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:24.35 [info     ] FQE_20220420192405: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.0001372390015180721, 'time_algorithm_update': 0.0018536545509515807, 'loss': 0.805852560651337, 'time_step': 0.00205446952997252, 'init_value': -20.78849220275879, 'ave_value': -22.850385995058193, 'soft_opc': nan} step=12384




2022-04-20 19:24.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192405/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:24.36 [info     ] FQE_20220420192405: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00013284766396810842, 'time_algorithm_update': 0.0017973447954931924, 'loss': 0.8701163283440956, 'time_step': 0.0019925382248190946, 'init_value': -21.99481201171875, 'ave_value': -23.791282403590117, 'soft_opc': nan} step=12728




2022-04-20 19:24.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192405/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:24.37 [info     ] FQE_20220420192405: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00014764070510864258, 'time_algorithm_update': 0.00203904856083005, 'loss': 0.9197360395107331, 'time_step': 0.0022580956303796104, 'init_value': -22.75240707397461, 'ave_value': -24.431793646243094, 'soft_opc': nan} step=13072




2022-04-20 19:24.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192405/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:24.38 [info     ] FQE_20220420192405: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00014652692994406057, 'time_algorithm_update': 0.001990631569263547, 'loss': 0.9583490966537664, 'time_step': 0.0022035511427147443, 'init_value': -23.121116638183594, 'ave_value': -24.507916312032748, 'soft_opc': nan} step=13416




2022-04-20 19:24.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192405/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:24.38 [info     ] FQE_20220420192405: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00013823980508848678, 'time_algorithm_update': 0.001834004424339117, 'loss': 1.0071804711032053, 'time_step': 0.0020360953586046086, 'init_value': -23.635112762451172, 'ave_value': -24.87747404372743, 'soft_opc': nan} step=13760




2022-04-20 19:24.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192405/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:24.39 [info     ] FQE_20220420192405: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00014901230501574139, 'time_algorithm_update': 0.0020431543505469033, 'loss': 1.0359640991867543, 'time_step': 0.0022626643957093704, 'init_value': -24.09548568725586, 'ave_value': -25.020840827051366, 'soft_opc': nan} step=14104




2022-04-20 19:24.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192405/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:24.40 [info     ] FQE_20220420192405: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.0001533814640932305, 'time_algorithm_update': 0.0020293233006499533, 'loss': 1.055222186736416, 'time_step': 0.0022528074508489566, 'init_value': -24.662296295166016, 'ave_value': -25.42972382430264, 'soft_opc': nan} step=14448




2022-04-20 19:24.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192405/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:24.41 [info     ] FQE_20220420192405: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00015095292135726575, 'time_algorithm_update': 0.001996440249820088, 'loss': 1.0727664112589907, 'time_step': 0.002214939095253168, 'init_value': -24.231243133544922, 'ave_value': -24.93331969050703, 'soft_opc': nan} step=14792




2022-04-20 19:24.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192405/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:24.42 [info     ] FQE_20220420192405: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.0001508642074673675, 'time_algorithm_update': 0.002036521601122479, 'loss': 1.058232846615706, 'time_step': 0.0022565930388694587, 'init_value': -24.571035385131836, 'ave_value': -25.188610090108522, 'soft_opc': nan} step=15136




2022-04-20 19:24.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192405/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:24.43 [info     ] FQE_20220420192405: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.000154782866322717, 'time_algorithm_update': 0.0020659683748733165, 'loss': 1.082868471340991, 'time_step': 0.0022908885811650475, 'init_value': -24.337215423583984, 'ave_value': -24.939201326665746, 'soft_opc': nan} step=15480




2022-04-20 19:24.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192405/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:24.43 [info     ] FQE_20220420192405: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00015183728794718898, 'time_algorithm_update': 0.002022630946580754, 'loss': 1.1153297453555604, 'time_step': 0.0022442188373831815, 'init_value': -26.138057708740234, 'ave_value': -26.16994743716876, 'soft_opc': nan} step=15824




2022-04-20 19:24.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192405/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:24.44 [info     ] FQE_20220420192405: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.0001510603483333144, 'time_algorithm_update': 0.0020034895386806754, 'loss': 1.1323828745828293, 'time_step': 0.002223243547040363, 'init_value': -26.0728816986084, 'ave_value': -26.039034398035188, 'soft_opc': nan} step=16168




2022-04-20 19:24.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192405/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:24.45 [info     ] FQE_20220420192405: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00014890765034875205, 'time_algorithm_update': 0.0020254379095033157, 'loss': 1.1363052765210702, 'time_step': 0.002242511788079905, 'init_value': -26.281494140625, 'ave_value': -26.074916289558924, 'soft_opc': nan} step=16512




2022-04-20 19:24.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192405/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:24.46 [info     ] FQE_20220420192405: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00014905943426974985, 'time_algorithm_update': 0.0020023674465889153, 'loss': 1.1385827028614923, 'time_step': 0.0022205551003300866, 'init_value': -26.387252807617188, 'ave_value': -26.17852001043859, 'soft_opc': nan} step=16856




2022-04-20 19:24.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192405/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:24.47 [info     ] FQE_20220420192405: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00015074361202328703, 'time_algorithm_update': 0.0020205919132676234, 'loss': 1.1580449151319214, 'time_step': 0.002239198878754017, 'init_value': -26.921207427978516, 'ave_value': -26.726364244515736, 'soft_opc': nan} step=17200




2022-04-20 19:24.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420192405/model_17200.pt
search iteration:  27
using hyper params:  [0.009967957025118471, 0.002713761410718981, 1.3009221444138546e-05, 5]
2022-04-20 19:24.47 [debug    ] RoundIterator is selected.
2022-04-20 19:24.47 [info     ] Directory is created at d3rlpy_logs/CQL_20220420192447
2022-04-20 19:24.47 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 19:24.47 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 19:24.47 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220420192447/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.009967957025118471, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'w

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:24.53 [info     ] CQL_20220420192447: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.0003463113517091985, 'time_algorithm_update': 0.01739814337234051, 'temp_loss': 4.594645100727416, 'temp': 0.9978819207141274, 'alpha_loss': -17.72905397415161, 'alpha': 1.0175526215319048, 'critic_loss': 51.10793491831997, 'actor_loss': 2.9934256557832684, 'time_step': 0.017839280485409743, 'td_error': 4.058148603416233, 'init_value': -6.230286121368408, 'ave_value': -4.723884746445729} step=342
2022-04-20 19:24.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420192447/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:24.59 [info     ] CQL_20220420192447: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.0003393707219620197, 'time_algorithm_update': 0.0170900821685791, 'temp_loss': 4.479051117311444, 'temp': 0.9935215624452335, 'alpha_loss': -10.889763971518354, 'alpha': 1.046897458402734, 'critic_loss': 24.408492629291022, 'actor_loss': 7.145544773653934, 'time_step': 0.01752473178662752, 'td_error': 4.156718296172354, 'init_value': -15.495229721069336, 'ave_value': -9.360217695085517} step=684
2022-04-20 19:24.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420192447/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:25.05 [info     ] CQL_20220420192447: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.0003240673165572317, 'time_algorithm_update': 0.016193714058190062, 'temp_loss': 4.024378792584291, 'temp': 0.989355989366944, 'alpha_loss': -7.486454149435835, 'alpha': 1.0695553362718102, 'critic_loss': 26.431456911633585, 'actor_loss': 12.7410306149756, 'time_step': 0.016606938072115357, 'td_error': 5.88512967823755, 'init_value': -24.137052536010742, 'ave_value': -14.35578878985332} step=1026
2022-04-20 19:25.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420192447/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:25.12 [info     ] CQL_20220420192447: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.0003542237811618381, 'time_algorithm_update': 0.017578841649998, 'temp_loss': 3.701340723455998, 'temp': 0.9854287282068129, 'alpha_loss': -5.837639015320449, 'alpha': 1.0904412910952206, 'critic_loss': 33.24213622466863, 'actor_loss': 18.183721960636607, 'time_step': 0.018029515506231296, 'td_error': 7.673917049971407, 'init_value': -31.293603897094727, 'ave_value': -18.589315113294703} step=1368
2022-04-20 19:25.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420192447/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:25.18 [info     ] CQL_20220420192447: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.0003452133714107045, 'time_algorithm_update': 0.017457618350871125, 'temp_loss': 3.4483548883806194, 'temp': 0.9815938847455364, 'alpha_loss': -4.537222799501921, 'alpha': 1.1104092918641386, 'critic_loss': 42.966129988954776, 'actor_loss': 23.362169912684035, 'time_step': 0.01789974260051348, 'td_error': 10.495587079513458, 'init_value': -38.08720779418945, 'ave_value': -22.372925641929715} step=1710
2022-04-20 19:25.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420192447/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:25.24 [info     ] CQL_20220420192447: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.0003474678909569456, 'time_algorithm_update': 0.017474260246544554, 'temp_loss': 3.2401263881147955, 'temp': 0.9778155586872882, 'alpha_loss': -3.307660297517888, 'alpha': 1.1285236648648802, 'critic_loss': 54.242125260202506, 'actor_loss': 28.141250939397086, 'time_step': 0.017919879210622686, 'td_error': 13.826963579546904, 'init_value': -45.57188034057617, 'ave_value': -26.579069412736942} step=2052
2022-04-20 19:25.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420192447/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:25.31 [info     ] CQL_20220420192447: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.00035135857543052985, 'time_algorithm_update': 0.01747367953696446, 'temp_loss': 2.9969563637560572, 'temp': 0.9741231739869591, 'alpha_loss': -2.058991587827015, 'alpha': 1.1434234046099478, 'critic_loss': 66.73796773096274, 'actor_loss': 32.6278146554155, 'time_step': 0.01792188206611321, 'td_error': 17.341542156340648, 'init_value': -52.45728302001953, 'ave_value': -30.653499555038035} step=2394
2022-04-20 19:25.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420192447/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:25.37 [info     ] CQL_20220420192447: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.00034604225939477396, 'time_algorithm_update': 0.01746493543100636, 'temp_loss': 2.80915213467782, 'temp': 0.97049631309091, 'alpha_loss': -0.817407883232601, 'alpha': 1.153000999960983, 'critic_loss': 80.54695119355854, 'actor_loss': 36.8230226304796, 'time_step': 0.017908716062356157, 'td_error': 21.02200747445872, 'init_value': -58.46596145629883, 'ave_value': -33.95420852387609} step=2736
2022-04-20 19:25.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420192447/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:25.44 [info     ] CQL_20220420192447: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.00035600006928917957, 'time_algorithm_update': 0.01770009144007811, 'temp_loss': 2.6226023282223974, 'temp': 0.9669290589310272, 'alpha_loss': 0.3770787558812452, 'alpha': 1.1544022605433102, 'critic_loss': 94.46910543609084, 'actor_loss': 40.72254702919408, 'time_step': 0.01815624892363074, 'td_error': 24.392061153178975, 'init_value': -64.32586669921875, 'ave_value': -37.54481106249196} step=3078
2022-04-20 19:25.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420192447/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:25.50 [info     ] CQL_20220420192447: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.0003551370219180458, 'time_algorithm_update': 0.01763140248973467, 'temp_loss': 2.4627019435341597, 'temp': 0.9633957379394107, 'alpha_loss': 1.4703631423140076, 'alpha': 1.1460206533036037, 'critic_loss': 109.17480185436226, 'actor_loss': 44.47560327094898, 'time_step': 0.018087648508841533, 'td_error': 28.207314318794964, 'init_value': -69.87837219238281, 'ave_value': -40.77357519461899} step=3420
2022-04-20 19:25.50 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420192447/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:25.56 [info     ] CQL_20220420192447: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.00035554554030212045, 'time_algorithm_update': 0.01770850231772975, 'temp_loss': 2.303324138211925, 'temp': 0.9598996442312385, 'alpha_loss': 2.497703595642458, 'alpha': 1.1249955153604696, 'critic_loss': 124.54652219627336, 'actor_loss': 48.00597334744637, 'time_step': 0.018165753598798785, 'td_error': 31.568112773740378, 'init_value': -75.6501693725586, 'ave_value': -43.4021309521593} step=3762
2022-04-20 19:25.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420192447/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:26.03 [info     ] CQL_20220420192447: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.0003564636609707659, 'time_algorithm_update': 0.017559878310264898, 'temp_loss': 2.1606475310018887, 'temp': 0.9564435755648808, 'alpha_loss': 3.362374053189629, 'alpha': 1.0926716784287613, 'critic_loss': 140.29225165383858, 'actor_loss': 51.50192483823899, 'time_step': 0.01801695740013792, 'td_error': 35.30584696913429, 'init_value': -80.7856674194336, 'ave_value': -47.13612175000412} step=4104
2022-04-20 19:26.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420192447/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:26.09 [info     ] CQL_20220420192447: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.0003526712718762849, 'time_algorithm_update': 0.01776773608915987, 'temp_loss': 2.0380271791714675, 'temp': 0.9530058813373945, 'alpha_loss': 4.0877137598282065, 'alpha': 1.0533360354384484, 'critic_loss': 156.7911556980066, 'actor_loss': 54.65900016249272, 'time_step': 0.018215569836354396, 'td_error': 38.44271147813441, 'init_value': -85.88028717041016, 'ave_value': -50.439339546914304} step=4446
2022-04-20 19:26.09 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420192447/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:26.16 [info     ] CQL_20220420192447: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.0003507255810743187, 'time_algorithm_update': 0.01764015635551765, 'temp_loss': 1.9332006409851432, 'temp': 0.9495677409464853, 'alpha_loss': 4.656443836396201, 'alpha': 1.012050682863994, 'critic_loss': 172.74336032978974, 'actor_loss': 57.754279621860434, 'time_step': 0.018088710238361918, 'td_error': 42.90853566135849, 'init_value': -91.25190734863281, 'ave_value': -52.93594445454779} step=4788
2022-04-20 19:26.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420192447/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:26.22 [info     ] CQL_20220420192447: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.0003528873822842425, 'time_algorithm_update': 0.017650747856898616, 'temp_loss': 1.821130865498593, 'temp': 0.9461147398279425, 'alpha_loss': 5.126473293666951, 'alpha': 0.9713774821911639, 'critic_loss': 189.58740292376245, 'actor_loss': 60.64211784050479, 'time_step': 0.01810185602533887, 'td_error': 44.82128882544923, 'init_value': -94.27314758300781, 'ave_value': -54.34434725684491} step=5130
2022-04-20 19:26.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420192447/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:26.29 [info     ] CQL_20220420192447: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.00035208080247131705, 'time_algorithm_update': 0.017701807774995502, 'temp_loss': 1.7156756813066047, 'temp': 0.9427374393967857, 'alpha_loss': 5.4705493924213435, 'alpha': 0.9324005795152563, 'critic_loss': 207.4010506791678, 'actor_loss': 63.321838055437766, 'time_step': 0.018151494494655675, 'td_error': 47.06412627212525, 'init_value': -98.40601348876953, 'ave_value': -56.66722030243895} step=5472
2022-04-20 19:26.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420192447/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:26.35 [info     ] CQL_20220420192447: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.0003576606337787115, 'time_algorithm_update': 0.017690062522888184, 'temp_loss': 1.6341981976701503, 'temp': 0.9393177938740156, 'alpha_loss': 5.797440187275758, 'alpha': 0.8955905535067731, 'critic_loss': 226.21290396528636, 'actor_loss': 65.89706050582797, 'time_step': 0.018143716611360248, 'td_error': 48.951071595789884, 'init_value': -101.5916748046875, 'ave_value': -58.37909062001082} step=5814
2022-04-20 19:26.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420192447/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:26.41 [info     ] CQL_20220420192447: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.0003522125601071363, 'time_algorithm_update': 0.017593049863625688, 'temp_loss': 1.5590360751974652, 'temp': 0.9358893707830306, 'alpha_loss': 6.008338850841188, 'alpha': 0.8611020527736485, 'critic_loss': 243.8767865275779, 'actor_loss': 68.26713973597477, 'time_step': 0.01804325594539531, 'td_error': 51.77326325329167, 'init_value': -106.46763610839844, 'ave_value': -61.039686790906096} step=6156
2022-04-20 19:26.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420192447/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:26.48 [info     ] CQL_20220420192447: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.0003507325523778012, 'time_algorithm_update': 0.017700679818092036, 'temp_loss': 1.4748314129330262, 'temp': 0.9324746754085809, 'alpha_loss': 6.194900382331937, 'alpha': 0.8288663329436765, 'critic_loss': 262.52301025390625, 'actor_loss': 70.53291194759615, 'time_step': 0.01814951603872734, 'td_error': 54.06420479009231, 'init_value': -109.2731704711914, 'ave_value': -63.34813923473562} step=6498
2022-04-20 19:26.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420192447/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:26.54 [info     ] CQL_20220420192447: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.0003501253518444753, 'time_algorithm_update': 0.017741063882035817, 'temp_loss': 1.3742349365649864, 'temp': 0.929094207914252, 'alpha_loss': 6.364297125771729, 'alpha': 0.7982957866805339, 'critic_loss': 279.70419141981336, 'actor_loss': 72.66536918997068, 'time_step': 0.018188780511331836, 'td_error': 56.278206023144485, 'init_value': -113.44538879394531, 'ave_value': -66.31204388316687} step=6840
2022-04-20 19:26.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420192447/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:27.01 [info     ] CQL_20220420192447: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.0003525109318961874, 'time_algorithm_update': 0.017725169310095715, 'temp_loss': 1.3083987532303347, 'temp': 0.9257351269847468, 'alpha_loss': 6.495695296783893, 'alpha': 0.7691388288785143, 'critic_loss': 299.0871474505865, 'actor_loss': 74.85991521467243, 'time_step': 0.018176613495363828, 'td_error': 55.62031886769765, 'init_value': -116.57868957519531, 'ave_value': -67.15775729080846} step=7182
2022-04-20 19:27.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420192447/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:27.07 [info     ] CQL_20220420192447: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.00034876037062260144, 'time_algorithm_update': 0.017616888235884105, 'temp_loss': 1.2410189457107008, 'temp': 0.9224080971458501, 'alpha_loss': 6.50552317203834, 'alpha': 0.7416170260362458, 'critic_loss': 317.16029750533966, 'actor_loss': 76.80594369542528, 'time_step': 0.018062832062704523, 'td_error': 58.22361830992627, 'init_value': -119.58442687988281, 'ave_value': -69.22500572697268} step=7524
2022-04-20 19:27.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420192447/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:27.14 [info     ] CQL_20220420192447: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.0003555141694364492, 'time_algorithm_update': 0.017796675364176433, 'temp_loss': 1.1958645117736002, 'temp': 0.9190379205154396, 'alpha_loss': 6.535321584221912, 'alpha': 0.7157254409023196, 'critic_loss': 337.53429452996505, 'actor_loss': 78.78295443350808, 'time_step': 0.01825305035239772, 'td_error': 60.08873900254127, 'init_value': -122.80174255371094, 'ave_value': -70.24709701932765} step=7866
2022-04-20 19:27.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420192447/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:27.20 [info     ] CQL_20220420192447: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.00035059033778675816, 'time_algorithm_update': 0.017632489315947596, 'temp_loss': 1.1280763300887324, 'temp': 0.9156873928524597, 'alpha_loss': 6.592047970894485, 'alpha': 0.6909389678846326, 'critic_loss': 355.40580151652733, 'actor_loss': 80.46521404333282, 'time_step': 0.0180819641079819, 'td_error': 61.3909475803432, 'init_value': -123.66749572753906, 'ave_value': -71.28366153338337} step=8208
2022-04-20 19:27.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420192447/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:27.27 [info     ] CQL_20220420192447: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.00034690182111416645, 'time_algorithm_update': 0.017704583748042235, 'temp_loss': 1.0531606195788634, 'temp': 0.9123705301368445, 'alpha_loss': 6.583977815003423, 'alpha': 0.6672608270282634, 'critic_loss': 372.39594764040226, 'actor_loss': 82.15153173257036, 'time_step': 0.018147821314850744, 'td_error': 62.271232481617794, 'init_value': -129.1461639404297, 'ave_value': -74.38866728791783} step=8550
2022-04-20 19:27.27 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420192447/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:27.33 [info     ] CQL_20220420192447: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.00034961086964746665, 'time_algorithm_update': 0.0175689214851424, 'temp_loss': 0.9696943071284141, 'temp': 0.9091471475467348, 'alpha_loss': 6.448725555375305, 'alpha': 0.6447457634566123, 'critic_loss': 390.65324062771265, 'actor_loss': 83.77747041579576, 'time_step': 0.018012194605598674, 'td_error': 60.44969254604031, 'init_value': -127.69830322265625, 'ave_value': -73.81966700511741} step=8892
2022-04-20 19:27.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420192447/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:27.39 [info     ] CQL_20220420192447: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.0003509674853051615, 'time_algorithm_update': 0.017576784418340315, 'temp_loss': 0.9227563816596541, 'temp': 0.9059795629211337, 'alpha_loss': 6.385460194091351, 'alpha': 0.6233512145733973, 'critic_loss': 409.4592058505231, 'actor_loss': 85.18259568242301, 'time_step': 0.018026009637709946, 'td_error': 62.59007559665183, 'init_value': -131.98483276367188, 'ave_value': -76.1794488430493} step=9234
2022-04-20 19:27.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420192447/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:27.46 [info     ] CQL_20220420192447: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.00034890119095294794, 'time_algorithm_update': 0.01741858532554225, 'temp_loss': 0.8797131658297533, 'temp': 0.9027533010084029, 'alpha_loss': 6.23637822984952, 'alpha': 0.6028636259990826, 'critic_loss': 425.5539979098136, 'actor_loss': 86.39034933792918, 'time_step': 0.01786286301083035, 'td_error': 62.37725825832435, 'init_value': -133.49017333984375, 'ave_value': -76.8659148459029} step=9576
2022-04-20 19:27.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420192447/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:27.52 [info     ] CQL_20220420192447: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.0003537120874862225, 'time_algorithm_update': 0.017545500693962587, 'temp_loss': 0.8388732214028026, 'temp': 0.8995679587300061, 'alpha_loss': 6.129857394430372, 'alpha': 0.5834322338215789, 'critic_loss': 439.1537495217128, 'actor_loss': 87.56970926474409, 'time_step': 0.01799915896521674, 'td_error': 64.61787732068068, 'init_value': -136.18881225585938, 'ave_value': -78.65629324158972} step=9918
2022-04-20 19:27.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420192447/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:27.59 [info     ] CQL_20220420192447: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.00035359636384841295, 'time_algorithm_update': 0.017583184074937253, 'temp_loss': 0.7841985631538065, 'temp': 0.8963572675721687, 'alpha_loss': 6.008269056242112, 'alpha': 0.5647625421222887, 'critic_loss': 451.99403506273416, 'actor_loss': 88.63545186896073, 'time_step': 0.018036862562971507, 'td_error': 61.71679207051416, 'init_value': -134.94546508789062, 'ave_value': -79.63206146239563} step=10260
2022-04-20 19:27.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420192447/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:28.05 [info     ] CQL_20220420192447: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.00035421192994591785, 'time_algorithm_update': 0.017705703339381526, 'temp_loss': 0.7377337974806627, 'temp': 0.8932911093123475, 'alpha_loss': 5.79027929919505, 'alpha': 0.5468077854803431, 'critic_loss': 462.3858373095418, 'actor_loss': 89.3995619433665, 'time_step': 0.01816103472347148, 'td_error': 62.5340003811486, 'init_value': -135.97256469726562, 'ave_value': -79.24453202259097} step=10602
2022-04-20 19:28.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420192447/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:28.11 [info     ] CQL_20220420192447: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.00035397560275786106, 'time_algorithm_update': 0.0176027462496395, 'temp_loss': 0.7330805055187111, 'temp': 0.8901080252134312, 'alpha_loss': 5.699557726843315, 'alpha': 0.5297063651837801, 'critic_loss': 472.2690881204884, 'actor_loss': 90.24160458748801, 'time_step': 0.018053742180093688, 'td_error': 61.93651003037671, 'init_value': -138.59938049316406, 'ave_value': -80.83152255391282} step=10944
2022-04-20 19:28.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420192447/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:28.18 [info     ] CQL_20220420192447: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.0003491340324892635, 'time_algorithm_update': 0.017695910749379654, 'temp_loss': 0.656534457034622, 'temp': 0.8870082556852821, 'alpha_loss': 5.55779500872071, 'alpha': 0.5129447492242557, 'critic_loss': 482.12684934738786, 'actor_loss': 90.9697486698976, 'time_step': 0.01814446463222392, 'td_error': 61.03930608329469, 'init_value': -138.3651885986328, 'ave_value': -80.71333814932352} step=11286
2022-04-20 19:28.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420192447/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:28.24 [info     ] CQL_20220420192447: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.000354593957376759, 'time_algorithm_update': 0.017843558774356953, 'temp_loss': 0.6949984348918262, 'temp': 0.8837545866157577, 'alpha_loss': 5.30803174233576, 'alpha': 0.49714552964034836, 'critic_loss': 492.36907958984375, 'actor_loss': 91.79952110602842, 'time_step': 0.018297709219636974, 'td_error': 61.82531912798651, 'init_value': -140.91400146484375, 'ave_value': -82.77876915416537} step=11628
2022-04-20 19:28.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420192447/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:28.31 [info     ] CQL_20220420192447: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.00035335864239965963, 'time_algorithm_update': 0.017712760389896862, 'temp_loss': 0.6304960907899846, 'temp': 0.8805785067597328, 'alpha_loss': 5.201769078684132, 'alpha': 0.4818674759906635, 'critic_loss': 499.62339256242007, 'actor_loss': 92.46547607511107, 'time_step': 0.018164904494034618, 'td_error': 61.19831524629991, 'init_value': -141.89138793945312, 'ave_value': -83.07716625543395} step=11970
2022-04-20 19:28.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420192447/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:28.37 [info     ] CQL_20220420192447: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.00035479682230809976, 'time_algorithm_update': 0.017783485657987538, 'temp_loss': 0.6262753868355737, 'temp': 0.8773491204830638, 'alpha_loss': 5.058358727142825, 'alpha': 0.4670490126686487, 'critic_loss': 507.69561196488945, 'actor_loss': 93.09501603332876, 'time_step': 0.018237338428608856, 'td_error': 61.945494829618156, 'init_value': -141.18966674804688, 'ave_value': -82.71350127789955} step=12312
2022-04-20 19:28.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420192447/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:28.44 [info     ] CQL_20220420192447: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.0003537127846165707, 'time_algorithm_update': 0.017890231651172303, 'temp_loss': 0.6077329614880489, 'temp': 0.8741563201299187, 'alpha_loss': 4.87670920536532, 'alpha': 0.4526995525025485, 'critic_loss': 512.8687317608393, 'actor_loss': 93.47472825524403, 'time_step': 0.018341468091596636, 'td_error': 60.36205012458738, 'init_value': -142.31961059570312, 'ave_value': -84.29438518207486} step=12654
2022-04-20 19:28.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420192447/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:28.50 [info     ] CQL_20220420192447: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.0003550261781926741, 'time_algorithm_update': 0.017898231221918474, 'temp_loss': 0.6281197884866194, 'temp': 0.8708798608236146, 'alpha_loss': 4.710325732565763, 'alpha': 0.43892619318780846, 'critic_loss': 518.7447945221126, 'actor_loss': 94.01563718026145, 'time_step': 0.0183522994058174, 'td_error': 60.21008667782249, 'init_value': -141.3683624267578, 'ave_value': -82.9643219277407} step=12996
2022-04-20 19:28.50 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420192447/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:28.57 [info     ] CQL_20220420192447: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.00035539914292898793, 'time_algorithm_update': 0.0178584411130314, 'temp_loss': 0.6149656656715605, 'temp': 0.8674367171281959, 'alpha_loss': 4.568897218034978, 'alpha': 0.4256948580344518, 'critic_loss': 524.1296660663091, 'actor_loss': 94.50971601162738, 'time_step': 0.018313776679903443, 'td_error': 61.64736458158397, 'init_value': -143.8864288330078, 'ave_value': -84.92483770936191} step=13338
2022-04-20 19:28.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420192447/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:29.03 [info     ] CQL_20220420192447: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.0003511996297111288, 'time_algorithm_update': 0.017892373932732478, 'temp_loss': 0.5673216275151884, 'temp': 0.8642044524003191, 'alpha_loss': 4.466532151601468, 'alpha': 0.4127181977556463, 'critic_loss': 526.224181058114, 'actor_loss': 94.76560563650744, 'time_step': 0.018339141767624526, 'td_error': 57.44056221629276, 'init_value': -142.82080078125, 'ave_value': -84.58732247333134} step=13680
2022-04-20 19:29.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420192447/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:29.10 [info     ] CQL_20220420192447: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.00035643856427822895, 'time_algorithm_update': 0.01785615382835879, 'temp_loss': 0.5957161902357437, 'temp': 0.8608925368702203, 'alpha_loss': 4.291067088556568, 'alpha': 0.4002304384757204, 'critic_loss': 531.9827012625354, 'actor_loss': 95.2068231370714, 'time_step': 0.018312127966629833, 'td_error': 58.491942382288116, 'init_value': -140.101318359375, 'ave_value': -84.03574262422585} step=14022
2022-04-20 19:29.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420192447/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:29.16 [info     ] CQL_20220420192447: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.00035330775188423735, 'time_algorithm_update': 0.017963238626892804, 'temp_loss': 0.5613011630219326, 'temp': 0.8575634888389654, 'alpha_loss': 4.16689105619464, 'alpha': 0.3881825646113234, 'critic_loss': 537.6283336438631, 'actor_loss': 95.59419553879408, 'time_step': 0.0184152272709629, 'td_error': 57.90048624070653, 'init_value': -142.96136474609375, 'ave_value': -85.34327347086122} step=14364
2022-04-20 19:29.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420192447/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:29.23 [info     ] CQL_20220420192447: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.0003578467675816943, 'time_algorithm_update': 0.017937932098120973, 'temp_loss': 0.5510875954570478, 'temp': 0.8543353492056417, 'alpha_loss': 4.040928627315321, 'alpha': 0.37644024716134655, 'critic_loss': 540.618820101197, 'actor_loss': 95.81773838243987, 'time_step': 0.018393384085761175, 'td_error': 55.40961265307358, 'init_value': -140.5087890625, 'ave_value': -84.46536226583628} step=14706
2022-04-20 19:29.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420192447/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:29.29 [info     ] CQL_20220420192447: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.0003479238142047012, 'time_algorithm_update': 0.017760531247010707, 'temp_loss': 0.5293006687071066, 'temp': 0.8511797834558097, 'alpha_loss': 3.9070757808741075, 'alpha': 0.36508934243380675, 'critic_loss': 543.2822037189327, 'actor_loss': 96.00870658919128, 'time_step': 0.01820837963394254, 'td_error': 55.68072156002683, 'init_value': -142.9307403564453, 'ave_value': -85.2495436497536} step=15048
2022-04-20 19:29.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420192447/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:29.36 [info     ] CQL_20220420192447: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.00034949096322756763, 'time_algorithm_update': 0.01782916163840489, 'temp_loss': 0.5274934588185354, 'temp': 0.8479711388983922, 'alpha_loss': 3.7694133422528093, 'alpha': 0.35409659452257103, 'critic_loss': 546.9552031399911, 'actor_loss': 96.27088580215187, 'time_step': 0.018275948295816344, 'td_error': 56.6012970729344, 'init_value': -140.8169708251953, 'ave_value': -85.29004039318644} step=15390
2022-04-20 19:29.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420192447/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:29.42 [info     ] CQL_20220420192447: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.0003549711048951623, 'time_algorithm_update': 0.017825977146974085, 'temp_loss': 0.5264509228651809, 'temp': 0.8447389536433749, 'alpha_loss': 3.6720048384359707, 'alpha': 0.3434295064350318, 'critic_loss': 550.420002809045, 'actor_loss': 96.46197797541032, 'time_step': 0.018280469186124745, 'td_error': 56.34897514094196, 'init_value': -142.71652221679688, 'ave_value': -86.26382857773889} step=15732
2022-04-20 19:29.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420192447/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:29.49 [info     ] CQL_20220420192447: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.00035791578348617106, 'time_algorithm_update': 0.01789745949862296, 'temp_loss': 0.5349734715725246, 'temp': 0.8414589766173335, 'alpha_loss': 3.553611515906819, 'alpha': 0.3330835218143742, 'critic_loss': 554.2472854525025, 'actor_loss': 96.6806720265171, 'time_step': 0.01835492131305717, 'td_error': 55.85247423287871, 'init_value': -140.7459716796875, 'ave_value': -86.20080922041693} step=16074
2022-04-20 19:29.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420192447/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:29.55 [info     ] CQL_20220420192447: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.0003548776894284968, 'time_algorithm_update': 0.01793104026749817, 'temp_loss': 0.5139973185172206, 'temp': 0.8381551777758793, 'alpha_loss': 3.424918999100289, 'alpha': 0.3230545071133396, 'critic_loss': 556.4312314039086, 'actor_loss': 96.77431164568628, 'time_step': 0.01838429141462895, 'td_error': 57.766322077015424, 'init_value': -143.4764862060547, 'ave_value': -87.69093785918275} step=16416
2022-04-20 19:29.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420192447/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:30.02 [info     ] CQL_20220420192447: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.00035720261913991115, 'time_algorithm_update': 0.018024515687373645, 'temp_loss': 0.5121589322623453, 'temp': 0.8350031644280194, 'alpha_loss': 3.313227299012636, 'alpha': 0.31336484788454066, 'critic_loss': 560.0827545701412, 'actor_loss': 97.00269670096057, 'time_step': 0.01848330274659988, 'td_error': 55.26593587656932, 'init_value': -141.26522827148438, 'ave_value': -86.13073095250573} step=16758
2022-04-20 19:30.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420192447/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:30.09 [info     ] CQL_20220420192447: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.000354582803291187, 'time_algorithm_update': 0.017938924114606535, 'temp_loss': 0.5181278285859098, 'temp': 0.8316920723831445, 'alpha_loss': 3.213286338493838, 'alpha': 0.30397682833044154, 'critic_loss': 564.0112281487002, 'actor_loss': 97.15589211001034, 'time_step': 0.01839372567963182, 'td_error': 55.01731468557226, 'init_value': -140.47280883789062, 'ave_value': -85.95487704412525} step=17100
2022-04-20 19:30.09 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420192447/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.51910

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 19:30.09 [info     ] FQE_20220420193009: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00014604574226471316, 'time_algorithm_update': 0.0020504917006894767, 'loss': 0.008732990762892079, 'time_step': 0.002265298222920981, 'init_value': -0.23524031043052673, 'ave_value': -0.18130130721172233, 'soft_opc': nan} step=166




2022-04-20 19:30.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193009/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:30.10 [info     ] FQE_20220420193009: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00014677105179752213, 'time_algorithm_update': 0.0021045466503465033, 'loss': 0.0071786561944376095, 'time_step': 0.0023214400532733962, 'init_value': -0.45548486709594727, 'ave_value': -0.33143812986762, 'soft_opc': nan} step=332




2022-04-20 19:30.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193009/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:30.10 [info     ] FQE_20220420193009: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00014888665762292333, 'time_algorithm_update': 0.002121623740138778, 'loss': 0.006270261833455458, 'time_step': 0.0023431878492056607, 'init_value': -0.5357247591018677, 'ave_value': -0.3590783558720891, 'soft_opc': nan} step=498




2022-04-20 19:30.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193009/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:30.10 [info     ] FQE_20220420193009: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00014883782490190253, 'time_algorithm_update': 0.002044266965015825, 'loss': 0.0063993076678836745, 'time_step': 0.002259500055428011, 'init_value': -0.629662036895752, 'ave_value': -0.3989310704664587, 'soft_opc': nan} step=664




2022-04-20 19:30.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193009/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:30.11 [info     ] FQE_20220420193009: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00014954302684370293, 'time_algorithm_update': 0.002055784305894231, 'loss': 0.006133416548360094, 'time_step': 0.002273711813501565, 'init_value': -0.728840708732605, 'ave_value': -0.4541634093512789, 'soft_opc': nan} step=830




2022-04-20 19:30.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193009/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:30.11 [info     ] FQE_20220420193009: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.0001461606427847621, 'time_algorithm_update': 0.0020569706537637367, 'loss': 0.005836501532425004, 'time_step': 0.0022712601236550205, 'init_value': -0.7758380174636841, 'ave_value': -0.47759801563429266, 'soft_opc': nan} step=996




2022-04-20 19:30.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193009/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:30.12 [info     ] FQE_20220420193009: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00014401343931634742, 'time_algorithm_update': 0.001991264791373747, 'loss': 0.005748549417356949, 'time_step': 0.0022014494401862822, 'init_value': -0.8512012362480164, 'ave_value': -0.5048433713916991, 'soft_opc': nan} step=1162




2022-04-20 19:30.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193009/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:30.12 [info     ] FQE_20220420193009: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00014561199280152838, 'time_algorithm_update': 0.0020873446062386752, 'loss': 0.005556305350051318, 'time_step': 0.0023065992148525744, 'init_value': -0.8957592248916626, 'ave_value': -0.533471841555139, 'soft_opc': nan} step=1328




2022-04-20 19:30.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193009/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:30.13 [info     ] FQE_20220420193009: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00014585472015013178, 'time_algorithm_update': 0.0020483990749680854, 'loss': 0.005420748743295939, 'time_step': 0.002261157495429717, 'init_value': -0.9153374433517456, 'ave_value': -0.5295299102501894, 'soft_opc': nan} step=1494




2022-04-20 19:30.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193009/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:30.13 [info     ] FQE_20220420193009: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00014704106801963714, 'time_algorithm_update': 0.002033897193081408, 'loss': 0.005643189187632896, 'time_step': 0.0022523273904639556, 'init_value': -0.9497165083885193, 'ave_value': -0.5396498350812509, 'soft_opc': nan} step=1660




2022-04-20 19:30.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193009/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:30.13 [info     ] FQE_20220420193009: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00014966510864625494, 'time_algorithm_update': 0.0020445470350334443, 'loss': 0.005390220487220998, 'time_step': 0.002263599131480757, 'init_value': -1.0190492868423462, 'ave_value': -0.5989579599786986, 'soft_opc': nan} step=1826




2022-04-20 19:30.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193009/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:30.14 [info     ] FQE_20220420193009: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00014870425304734563, 'time_algorithm_update': 0.0020480285207909272, 'loss': 0.005173737469342458, 'time_step': 0.0022668091647596246, 'init_value': -1.0096793174743652, 'ave_value': -0.5744764333496475, 'soft_opc': nan} step=1992




2022-04-20 19:30.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193009/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:30.14 [info     ] FQE_20220420193009: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.0001455645963370082, 'time_algorithm_update': 0.002081523458641696, 'loss': 0.0054641114646030295, 'time_step': 0.002296508076679276, 'init_value': -1.0596611499786377, 'ave_value': -0.5838215873708847, 'soft_opc': nan} step=2158




2022-04-20 19:30.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193009/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:30.15 [info     ] FQE_20220420193009: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00014603425221270826, 'time_algorithm_update': 0.002057525048772973, 'loss': 0.005191628361844559, 'time_step': 0.002273029591663774, 'init_value': -1.1231472492218018, 'ave_value': -0.6222249337824414, 'soft_opc': nan} step=2324




2022-04-20 19:30.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193009/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:30.15 [info     ] FQE_20220420193009: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.0001494769590446748, 'time_algorithm_update': 0.0020685296460806607, 'loss': 0.005082796047250909, 'time_step': 0.002286712807345103, 'init_value': -1.1715259552001953, 'ave_value': -0.6470254104941043, 'soft_opc': nan} step=2490




2022-04-20 19:30.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193009/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:30.15 [info     ] FQE_20220420193009: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.0001430726913084467, 'time_algorithm_update': 0.002045347029904285, 'loss': 0.00555998429302575, 'time_step': 0.0022563460361526675, 'init_value': -1.2102775573730469, 'ave_value': -0.648751223349088, 'soft_opc': nan} step=2656




2022-04-20 19:30.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193009/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:30.16 [info     ] FQE_20220420193009: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00015035882053605044, 'time_algorithm_update': 0.0021172331040164075, 'loss': 0.005586847972899047, 'time_step': 0.002341162727539798, 'init_value': -1.2823692560195923, 'ave_value': -0.6865276478587541, 'soft_opc': nan} step=2822




2022-04-20 19:30.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193009/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:30.16 [info     ] FQE_20220420193009: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00014841843800372388, 'time_algorithm_update': 0.0021048267203641227, 'loss': 0.005709794890668513, 'time_step': 0.0023224899567753435, 'init_value': -1.3457720279693604, 'ave_value': -0.7313007349081271, 'soft_opc': nan} step=2988




2022-04-20 19:30.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193009/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:30.17 [info     ] FQE_20220420193009: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00014890245644443007, 'time_algorithm_update': 0.002078291881515319, 'loss': 0.0063015451768401965, 'time_step': 0.002296539674322289, 'init_value': -1.4029719829559326, 'ave_value': -0.7454270336594483, 'soft_opc': nan} step=3154




2022-04-20 19:30.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193009/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:30.17 [info     ] FQE_20220420193009: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00014734268188476562, 'time_algorithm_update': 0.0020558647362582654, 'loss': 0.006309790665098657, 'time_step': 0.00227358255041651, 'init_value': -1.4626014232635498, 'ave_value': -0.7521242764266627, 'soft_opc': nan} step=3320




2022-04-20 19:30.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193009/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:30.18 [info     ] FQE_20220420193009: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00014298651591840997, 'time_algorithm_update': 0.002016916332474674, 'loss': 0.006645203894845782, 'time_step': 0.00222881587154894, 'init_value': -1.4794602394104004, 'ave_value': -0.7632069915730237, 'soft_opc': nan} step=3486




2022-04-20 19:30.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193009/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:30.18 [info     ] FQE_20220420193009: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00014475885644016495, 'time_algorithm_update': 0.001999167074640113, 'loss': 0.006942711478794913, 'time_step': 0.0022164698106696807, 'init_value': -1.539233684539795, 'ave_value': -0.7842271911089418, 'soft_opc': nan} step=3652




2022-04-20 19:30.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193009/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:30.18 [info     ] FQE_20220420193009: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00014491109962922982, 'time_algorithm_update': 0.0020092093800923912, 'loss': 0.007388977109488234, 'time_step': 0.0022200302905346974, 'init_value': -1.6413702964782715, 'ave_value': -0.8520741395079656, 'soft_opc': nan} step=3818




2022-04-20 19:30.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193009/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:30.19 [info     ] FQE_20220420193009: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.0001476802021624094, 'time_algorithm_update': 0.002055370664022055, 'loss': 0.00778615368244025, 'time_step': 0.0022759078496910005, 'init_value': -1.6888153553009033, 'ave_value': -0.8751838121175145, 'soft_opc': nan} step=3984




2022-04-20 19:30.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193009/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:30.19 [info     ] FQE_20220420193009: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00014874446822936275, 'time_algorithm_update': 0.0020005932773452207, 'loss': 0.008725928488406297, 'time_step': 0.002220604793134942, 'init_value': -1.759289026260376, 'ave_value': -0.8945403581489347, 'soft_opc': nan} step=4150




2022-04-20 19:30.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193009/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:30.20 [info     ] FQE_20220420193009: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00014474162136215762, 'time_algorithm_update': 0.0020332307700651237, 'loss': 0.009284868183495274, 'time_step': 0.002249464931258236, 'init_value': -1.8828256130218506, 'ave_value': -0.9759417033357848, 'soft_opc': nan} step=4316




2022-04-20 19:30.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193009/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:30.20 [info     ] FQE_20220420193009: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00014676961554102152, 'time_algorithm_update': 0.0020705720028245307, 'loss': 0.009808412478137088, 'time_step': 0.0022841261093875013, 'init_value': -1.8805384635925293, 'ave_value': -0.9542667494356833, 'soft_opc': nan} step=4482




2022-04-20 19:30.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193009/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:30.21 [info     ] FQE_20220420193009: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.0001500370990799134, 'time_algorithm_update': 0.002045714711568442, 'loss': 0.009754034037651292, 'time_step': 0.00226409894874297, 'init_value': -2.028254985809326, 'ave_value': -1.0472665151090337, 'soft_opc': nan} step=4648




2022-04-20 19:30.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193009/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:30.21 [info     ] FQE_20220420193009: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00015026977263301252, 'time_algorithm_update': 0.002047436783112675, 'loss': 0.010809260245176953, 'time_step': 0.0022659086319337407, 'init_value': -2.10386323928833, 'ave_value': -1.0471140455001513, 'soft_opc': nan} step=4814




2022-04-20 19:30.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193009/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:30.21 [info     ] FQE_20220420193009: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.0001490532633769943, 'time_algorithm_update': 0.0020658251750900083, 'loss': 0.010909865037341747, 'time_step': 0.0022811200245317206, 'init_value': -2.1830196380615234, 'ave_value': -1.119500085533605, 'soft_opc': nan} step=4980




2022-04-20 19:30.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193009/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:30.22 [info     ] FQE_20220420193009: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00014880479100238845, 'time_algorithm_update': 0.0020372307444193275, 'loss': 0.011351703830254096, 'time_step': 0.0022524997412440287, 'init_value': -2.337228536605835, 'ave_value': -1.2213711120295212, 'soft_opc': nan} step=5146




2022-04-20 19:30.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193009/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:30.22 [info     ] FQE_20220420193009: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00014751359640833843, 'time_algorithm_update': 0.0020918027464165746, 'loss': 0.011846459913909077, 'time_step': 0.0023100534117365457, 'init_value': -2.4278178215026855, 'ave_value': -1.2386256074543354, 'soft_opc': nan} step=5312




2022-04-20 19:30.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193009/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:30.23 [info     ] FQE_20220420193009: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00014794160084552076, 'time_algorithm_update': 0.0020212581358760237, 'loss': 0.012423173471140754, 'time_step': 0.0022356984126998716, 'init_value': -2.4684627056121826, 'ave_value': -1.267036552610178, 'soft_opc': nan} step=5478




2022-04-20 19:30.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193009/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:30.23 [info     ] FQE_20220420193009: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.0001460658498557217, 'time_algorithm_update': 0.002025400299623788, 'loss': 0.013388541461170527, 'time_step': 0.0022391885159963585, 'init_value': -2.529426336288452, 'ave_value': -1.2871464519379745, 'soft_opc': nan} step=5644




2022-04-20 19:30.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193009/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:30.23 [info     ] FQE_20220420193009: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00015065468937517648, 'time_algorithm_update': 0.0020833001079329527, 'loss': 0.013847928919201067, 'time_step': 0.0023041518337755316, 'init_value': -2.7132604122161865, 'ave_value': -1.3993385994756544, 'soft_opc': nan} step=5810




2022-04-20 19:30.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193009/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:30.24 [info     ] FQE_20220420193009: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00014552725366799227, 'time_algorithm_update': 0.002037904348718115, 'loss': 0.01456424888259877, 'time_step': 0.002251441220203078, 'init_value': -2.7457785606384277, 'ave_value': -1.4187475891330759, 'soft_opc': nan} step=5976




2022-04-20 19:30.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193009/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:30.24 [info     ] FQE_20220420193009: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00014767158462340572, 'time_algorithm_update': 0.0020860634654401296, 'loss': 0.015827667839118247, 'time_step': 0.0023013410798038343, 'init_value': -2.9237122535705566, 'ave_value': -1.5838056249804124, 'soft_opc': nan} step=6142




2022-04-20 19:30.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193009/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:30.25 [info     ] FQE_20220420193009: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.000150012682719403, 'time_algorithm_update': 0.002100945955299469, 'loss': 0.01657206982413464, 'time_step': 0.002318899315523814, 'init_value': -3.0745229721069336, 'ave_value': -1.63655764135251, 'soft_opc': nan} step=6308




2022-04-20 19:30.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193009/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:30.25 [info     ] FQE_20220420193009: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00015052973505962327, 'time_algorithm_update': 0.0021209372095314853, 'loss': 0.017177845204826892, 'time_step': 0.0023420632603656814, 'init_value': -3.150115489959717, 'ave_value': -1.6868934422553106, 'soft_opc': nan} step=6474




2022-04-20 19:30.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193009/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:30.26 [info     ] FQE_20220420193009: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00014919545277055488, 'time_algorithm_update': 0.002095015652208443, 'loss': 0.018609729652988714, 'time_step': 0.0023123011531600035, 'init_value': -3.286869525909424, 'ave_value': -1.7520835802978458, 'soft_opc': nan} step=6640




2022-04-20 19:30.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193009/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:30.26 [info     ] FQE_20220420193009: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00014488811952522002, 'time_algorithm_update': 0.0020605957651712806, 'loss': 0.019221126392248076, 'time_step': 0.002274191523172769, 'init_value': -3.328601360321045, 'ave_value': -1.7674593154345177, 'soft_opc': nan} step=6806




2022-04-20 19:30.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193009/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:30.26 [info     ] FQE_20220420193009: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00014738720583628458, 'time_algorithm_update': 0.002034438661782138, 'loss': 0.0191980865405289, 'time_step': 0.002248880374862487, 'init_value': -3.390259265899658, 'ave_value': -1.788332963271721, 'soft_opc': nan} step=6972




2022-04-20 19:30.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193009/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:30.27 [info     ] FQE_20220420193009: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00015124068202742612, 'time_algorithm_update': 0.0020636162125920675, 'loss': 0.02079990047804383, 'time_step': 0.0022883056158042817, 'init_value': -3.597846508026123, 'ave_value': -1.949374669760063, 'soft_opc': nan} step=7138




2022-04-20 19:30.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193009/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:30.27 [info     ] FQE_20220420193009: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00014556028756750636, 'time_algorithm_update': 0.0019633181123848422, 'loss': 0.021396664975502765, 'time_step': 0.00217524637658912, 'init_value': -3.587996482849121, 'ave_value': -1.9514681384214914, 'soft_opc': nan} step=7304




2022-04-20 19:30.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193009/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:30.28 [info     ] FQE_20220420193009: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00014909778732851328, 'time_algorithm_update': 0.0020154786397175617, 'loss': 0.022530696921454495, 'time_step': 0.0022300094007009484, 'init_value': -3.655174493789673, 'ave_value': -1.9962086402521644, 'soft_opc': nan} step=7470




2022-04-20 19:30.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193009/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:30.28 [info     ] FQE_20220420193009: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00014618075037577065, 'time_algorithm_update': 0.0020389959036585794, 'loss': 0.02318820746121816, 'time_step': 0.0022518132106367364, 'init_value': -3.7418229579925537, 'ave_value': -1.9817150810525779, 'soft_opc': nan} step=7636




2022-04-20 19:30.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193009/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:30.29 [info     ] FQE_20220420193009: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00014658002968294075, 'time_algorithm_update': 0.0021031879516969242, 'loss': 0.023349495213310492, 'time_step': 0.002317932714898902, 'init_value': -3.764448881149292, 'ave_value': -2.004145417115777, 'soft_opc': nan} step=7802




2022-04-20 19:30.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193009/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:30.29 [info     ] FQE_20220420193009: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00014753657651234823, 'time_algorithm_update': 0.0020766545491046214, 'loss': 0.023905017218939645, 'time_step': 0.0022945145526564264, 'init_value': -3.7853751182556152, 'ave_value': -1.982563452345428, 'soft_opc': nan} step=7968




2022-04-20 19:30.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193009/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:30.29 [info     ] FQE_20220420193009: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00014697069145110716, 'time_algorithm_update': 0.0020713791789778746, 'loss': 0.024633130619683898, 'time_step': 0.0022902000381285884, 'init_value': -3.8961784839630127, 'ave_value': -2.0397300707889503, 'soft_opc': nan} step=8134




2022-04-20 19:30.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193009/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:30.30 [info     ] FQE_20220420193009: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00014805937387857092, 'time_algorithm_update': 0.002065379935574819, 'loss': 0.025457084904065782, 'time_step': 0.002288714948906956, 'init_value': -3.9667208194732666, 'ave_value': -2.1135451413103725, 'soft_opc': nan} step=8300




2022-04-20 19:30.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193009/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

[ 0.00000000e+00  7.95731469e+08  4.27108923e-02  1.24000047e-02
  1.42999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.34732792e-01  6.00000000e-01  3.37421461e-01]
Read chunk # 39 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.38489108e-01  4.94000047e-02
 -1.56000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -7.49080829e-02  7.04145269e-02]
Read chunk # 40 out of 4999
torch.Size([44400, 6])
2022-04-20 19:30.31 [debug    ] RoundIterator is selected.
2022-04-20 19:30.31 [info     ] Directory is created at d3rlpy_logs/FQE_20220420193031
2022-04-20 19:30.31 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 19:30.31 [debug    ] Building models...
2022-04-20 19:30.31 [debug    ] Models have been built.
2022-04-20 19:30.31 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420193031/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size':

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 19:30.31 [info     ] FQE_20220420193031: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00015500880951105163, 'time_algorithm_update': 0.0020516230616458627, 'loss': 0.023694899785982142, 'time_step': 0.0022746712662452873, 'init_value': -1.223892092704773, 'ave_value': -1.2441102501105619, 'soft_opc': nan} step=344




2022-04-20 19:30.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193031/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:30.32 [info     ] FQE_20220420193031: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00015416256217069404, 'time_algorithm_update': 0.002054132001344548, 'loss': 0.022377996488885825, 'time_step': 0.0022759659345759904, 'init_value': -2.0748071670532227, 'ave_value': -2.1123929906401546, 'soft_opc': nan} step=688




2022-04-20 19:30.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193031/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:30.33 [info     ] FQE_20220420193031: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00015809716180313464, 'time_algorithm_update': 0.0020591235438058545, 'loss': 0.02547517111308353, 'time_step': 0.002288543900778127, 'init_value': -3.08115291595459, 'ave_value': -3.12839567203511, 'soft_opc': nan} step=1032




2022-04-20 19:30.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193031/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:30.34 [info     ] FQE_20220420193031: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00015282076458598292, 'time_algorithm_update': 0.0020561932131301524, 'loss': 0.02774526963692678, 'time_step': 0.002276920301969661, 'init_value': -3.9489378929138184, 'ave_value': -3.965368656666429, 'soft_opc': nan} step=1376




2022-04-20 19:30.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193031/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:30.35 [info     ] FQE_20220420193031: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00015675744345021802, 'time_algorithm_update': 0.0020766133485838425, 'loss': 0.03254727547349365, 'time_step': 0.0023011641446934188, 'init_value': -5.068192481994629, 'ave_value': -5.073856655837179, 'soft_opc': nan} step=1720




2022-04-20 19:30.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193031/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:30.36 [info     ] FQE_20220420193031: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00015539623970209167, 'time_algorithm_update': 0.002079470213069472, 'loss': 0.037805633719599004, 'time_step': 0.002301216818565546, 'init_value': -5.701170921325684, 'ave_value': -5.696667751347697, 'soft_opc': nan} step=2064




2022-04-20 19:30.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193031/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:30.37 [info     ] FQE_20220420193031: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00015586614608764648, 'time_algorithm_update': 0.0020458656688069187, 'loss': 0.04598279461280854, 'time_step': 0.0022719058879586153, 'init_value': -6.783422470092773, 'ave_value': -6.781095385417208, 'soft_opc': nan} step=2408




2022-04-20 19:30.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193031/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:30.37 [info     ] FQE_20220420193031: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00015473989553229753, 'time_algorithm_update': 0.002021362615186115, 'loss': 0.055985132249906054, 'time_step': 0.0022433649661929107, 'init_value': -7.400246620178223, 'ave_value': -7.371453941888637, 'soft_opc': nan} step=2752




2022-04-20 19:30.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193031/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:30.38 [info     ] FQE_20220420193031: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00015461999316548192, 'time_algorithm_update': 0.00204692053240399, 'loss': 0.06677650427716503, 'time_step': 0.0022742415583410927, 'init_value': -8.143684387207031, 'ave_value': -8.085672866761147, 'soft_opc': nan} step=3096




2022-04-20 19:30.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193031/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:30.39 [info     ] FQE_20220420193031: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00015719477520432582, 'time_algorithm_update': 0.002041968495346779, 'loss': 0.08038934642384045, 'time_step': 0.002267619898152906, 'init_value': -9.146875381469727, 'ave_value': -9.116818497632002, 'soft_opc': nan} step=3440




2022-04-20 19:30.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193031/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:30.40 [info     ] FQE_20220420193031: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00015863083129705384, 'time_algorithm_update': 0.002116397369739621, 'loss': 0.09472596623155094, 'time_step': 0.0023473341797673425, 'init_value': -9.714534759521484, 'ave_value': -9.737636159132192, 'soft_opc': nan} step=3784




2022-04-20 19:30.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193031/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:30.41 [info     ] FQE_20220420193031: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.0001552569311718608, 'time_algorithm_update': 0.0020874170369880145, 'loss': 0.106696319541634, 'time_step': 0.002313901518666467, 'init_value': -10.241447448730469, 'ave_value': -10.472473002124477, 'soft_opc': nan} step=4128




2022-04-20 19:30.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193031/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:30.42 [info     ] FQE_20220420193031: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00015518693036811296, 'time_algorithm_update': 0.0020284611125325046, 'loss': 0.11835324740427178, 'time_step': 0.0022511710954266923, 'init_value': -10.842309951782227, 'ave_value': -11.208418886215837, 'soft_opc': nan} step=4472




2022-04-20 19:30.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193031/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:30.43 [info     ] FQE_20220420193031: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00015348611876021985, 'time_algorithm_update': 0.0020771816719410033, 'loss': 0.13119172307576032, 'time_step': 0.0023005244343779806, 'init_value': -11.195609092712402, 'ave_value': -11.904279116333068, 'soft_opc': nan} step=4816




2022-04-20 19:30.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193031/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:30.44 [info     ] FQE_20220420193031: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00015509197878283123, 'time_algorithm_update': 0.002046845680059389, 'loss': 0.14225451123824906, 'time_step': 0.002268055843752484, 'init_value': -11.653175354003906, 'ave_value': -12.589635718459482, 'soft_opc': nan} step=5160




2022-04-20 19:30.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193031/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:30.44 [info     ] FQE_20220420193031: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.000155500894369081, 'time_algorithm_update': 0.00205655014792154, 'loss': 0.15361176614736227, 'time_step': 0.0022814758988313898, 'init_value': -12.212610244750977, 'ave_value': -13.431390129895629, 'soft_opc': nan} step=5504




2022-04-20 19:30.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193031/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:30.45 [info     ] FQE_20220420193031: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.0001579100309416305, 'time_algorithm_update': 0.0020583036334015604, 'loss': 0.17121627057772562, 'time_step': 0.002283621666043304, 'init_value': -12.29787826538086, 'ave_value': -13.913328902842897, 'soft_opc': nan} step=5848




2022-04-20 19:30.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193031/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:30.46 [info     ] FQE_20220420193031: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00015633328016414198, 'time_algorithm_update': 0.002061346935671429, 'loss': 0.1803678262946304, 'time_step': 0.0022853203984194025, 'init_value': -12.39927864074707, 'ave_value': -14.416776782274665, 'soft_opc': nan} step=6192




2022-04-20 19:30.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193031/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:30.47 [info     ] FQE_20220420193031: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.000156365854795589, 'time_algorithm_update': 0.002090948958729589, 'loss': 0.1938837802600722, 'time_step': 0.0023163827352745588, 'init_value': -12.729259490966797, 'ave_value': -15.348666752200272, 'soft_opc': nan} step=6536




2022-04-20 19:30.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193031/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:30.48 [info     ] FQE_20220420193031: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.0001563679340273835, 'time_algorithm_update': 0.0020958323811375817, 'loss': 0.20116301047680682, 'time_step': 0.002324222825294317, 'init_value': -12.851314544677734, 'ave_value': -15.861755839080827, 'soft_opc': nan} step=6880




2022-04-20 19:30.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193031/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:30.49 [info     ] FQE_20220420193031: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00015785735706950343, 'time_algorithm_update': 0.002092588779538177, 'loss': 0.2082077556798711, 'time_step': 0.002319858517757682, 'init_value': -12.816875457763672, 'ave_value': -16.22245129040518, 'soft_opc': nan} step=7224




2022-04-20 19:30.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193031/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:30.50 [info     ] FQE_20220420193031: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00015418196833410927, 'time_algorithm_update': 0.002046149830485499, 'loss': 0.21786191473650032, 'time_step': 0.0022696582383887713, 'init_value': -13.031482696533203, 'ave_value': -16.859680042783353, 'soft_opc': nan} step=7568




2022-04-20 19:30.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193031/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:30.50 [info     ] FQE_20220420193031: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00015295938003894894, 'time_algorithm_update': 0.00201877189237018, 'loss': 0.22839312065869224, 'time_step': 0.0022401996823244318, 'init_value': -13.259957313537598, 'ave_value': -17.55307318815072, 'soft_opc': nan} step=7912




2022-04-20 19:30.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193031/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:30.51 [info     ] FQE_20220420193031: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00015759329463160314, 'time_algorithm_update': 0.0020678542381109195, 'loss': 0.23850513481383406, 'time_step': 0.0022958662620810575, 'init_value': -13.252939224243164, 'ave_value': -17.888976964266416, 'soft_opc': nan} step=8256




2022-04-20 19:30.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193031/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:30.52 [info     ] FQE_20220420193031: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00015669645265091296, 'time_algorithm_update': 0.002064028451609057, 'loss': 0.24458075075973432, 'time_step': 0.002290594023327495, 'init_value': -13.196374893188477, 'ave_value': -18.164680677882252, 'soft_opc': nan} step=8600




2022-04-20 19:30.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193031/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:30.53 [info     ] FQE_20220420193031: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00015687595966250398, 'time_algorithm_update': 0.002018060795096464, 'loss': 0.2552698069132951, 'time_step': 0.002245008945465088, 'init_value': -13.434938430786133, 'ave_value': -18.780693550616803, 'soft_opc': nan} step=8944




2022-04-20 19:30.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193031/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:30.54 [info     ] FQE_20220420193031: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00015789478324180426, 'time_algorithm_update': 0.002066514519758003, 'loss': 0.2635259663599522, 'time_step': 0.002296650825544845, 'init_value': -13.814111709594727, 'ave_value': -19.37097177916282, 'soft_opc': nan} step=9288




2022-04-20 19:30.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193031/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:30.55 [info     ] FQE_20220420193031: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00015737081682959267, 'time_algorithm_update': 0.0020979393360226655, 'loss': 0.277300966665322, 'time_step': 0.0023240412390509316, 'init_value': -13.86005687713623, 'ave_value': -19.77462873840285, 'soft_opc': nan} step=9632




2022-04-20 19:30.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193031/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:30.56 [info     ] FQE_20220420193031: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00015631664630978606, 'time_algorithm_update': 0.002043913963229157, 'loss': 0.28776554619828454, 'time_step': 0.002269075360409049, 'init_value': -14.232900619506836, 'ave_value': -20.29154144918907, 'soft_opc': nan} step=9976




2022-04-20 19:30.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193031/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:30.56 [info     ] FQE_20220420193031: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00015403364979943565, 'time_algorithm_update': 0.0020363247671792675, 'loss': 0.30381868708727144, 'time_step': 0.0022583728612855423, 'init_value': -14.629581451416016, 'ave_value': -20.753955796658826, 'soft_opc': nan} step=10320




2022-04-20 19:30.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193031/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:30.57 [info     ] FQE_20220420193031: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00015607545542162517, 'time_algorithm_update': 0.00205336892327597, 'loss': 0.3112707582226586, 'time_step': 0.00228212739146033, 'init_value': -14.906970977783203, 'ave_value': -21.068047986112468, 'soft_opc': nan} step=10664




2022-04-20 19:30.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193031/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:30.58 [info     ] FQE_20220420193031: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00015712200209151868, 'time_algorithm_update': 0.0020790703074876652, 'loss': 0.32393811157428076, 'time_step': 0.002305292805960012, 'init_value': -14.912420272827148, 'ave_value': -20.985701992519637, 'soft_opc': nan} step=11008




2022-04-20 19:30.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193031/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:30.59 [info     ] FQE_20220420193031: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00015603179155394088, 'time_algorithm_update': 0.002054706562397092, 'loss': 0.3360201880379125, 'time_step': 0.0022805048975833627, 'init_value': -15.545637130737305, 'ave_value': -21.599203739305622, 'soft_opc': nan} step=11352




2022-04-20 19:30.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193031/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:31.00 [info     ] FQE_20220420193031: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00015707972437836403, 'time_algorithm_update': 0.0021119651406310324, 'loss': 0.3402578639689573, 'time_step': 0.0023382209068120914, 'init_value': -15.40788745880127, 'ave_value': -21.490713442638853, 'soft_opc': nan} step=11696




2022-04-20 19:31.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193031/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:31.01 [info     ] FQE_20220420193031: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00015900647917459177, 'time_algorithm_update': 0.0020736435125040453, 'loss': 0.35492816718490144, 'time_step': 0.002303722985955172, 'init_value': -15.91166877746582, 'ave_value': -21.704189298066588, 'soft_opc': nan} step=12040




2022-04-20 19:31.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193031/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:31.02 [info     ] FQE_20220420193031: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00015645456868548725, 'time_algorithm_update': 0.0020555902359097504, 'loss': 0.36127884191092713, 'time_step': 0.0022822583830633828, 'init_value': -16.314956665039062, 'ave_value': -21.96269812569282, 'soft_opc': nan} step=12384




2022-04-20 19:31.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193031/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:31.02 [info     ] FQE_20220420193031: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00015454652697540993, 'time_algorithm_update': 0.0020404437253641527, 'loss': 0.38352725981886304, 'time_step': 0.002262159835460574, 'init_value': -16.747188568115234, 'ave_value': -22.119074808457146, 'soft_opc': nan} step=12728




2022-04-20 19:31.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193031/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:31.03 [info     ] FQE_20220420193031: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.0001567338788232138, 'time_algorithm_update': 0.0020628231902455173, 'loss': 0.3926247431420136, 'time_step': 0.0022903583770574527, 'init_value': -17.232887268066406, 'ave_value': -22.453281342137505, 'soft_opc': nan} step=13072




2022-04-20 19:31.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193031/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:31.04 [info     ] FQE_20220420193031: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00015502267105634823, 'time_algorithm_update': 0.002057354117548743, 'loss': 0.4107157508896794, 'time_step': 0.0022812090640844302, 'init_value': -17.53274917602539, 'ave_value': -22.612218309756837, 'soft_opc': nan} step=13416




2022-04-20 19:31.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193031/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:31.05 [info     ] FQE_20220420193031: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00015768339467603108, 'time_algorithm_update': 0.002044525950454002, 'loss': 0.4191605863486265, 'time_step': 0.002269879330036252, 'init_value': -17.888687133789062, 'ave_value': -22.845216085639706, 'soft_opc': nan} step=13760




2022-04-20 19:31.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193031/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:31.06 [info     ] FQE_20220420193031: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00015888657680777617, 'time_algorithm_update': 0.002079509025396303, 'loss': 0.4284933695493829, 'time_step': 0.0023076568925103477, 'init_value': -18.35379981994629, 'ave_value': -23.11233233688483, 'soft_opc': nan} step=14104




2022-04-20 19:31.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193031/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:31.07 [info     ] FQE_20220420193031: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00015571297601211901, 'time_algorithm_update': 0.002067183339318564, 'loss': 0.4394129880130031, 'time_step': 0.0022929657337277436, 'init_value': -18.411888122558594, 'ave_value': -22.95122502521855, 'soft_opc': nan} step=14448




2022-04-20 19:31.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193031/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:31.08 [info     ] FQE_20220420193031: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00015562842058580975, 'time_algorithm_update': 0.0020597674125848813, 'loss': 0.45439949860731355, 'time_step': 0.002283564140630323, 'init_value': -18.913118362426758, 'ave_value': -23.528668785260084, 'soft_opc': nan} step=14792




2022-04-20 19:31.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193031/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:31.08 [info     ] FQE_20220420193031: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.0001570422982060632, 'time_algorithm_update': 0.002086280390273693, 'loss': 0.4669882594824357, 'time_step': 0.002312115458555, 'init_value': -19.31216812133789, 'ave_value': -23.760267990609414, 'soft_opc': nan} step=15136




2022-04-20 19:31.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193031/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:31.09 [info     ] FQE_20220420193031: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00015620575394741324, 'time_algorithm_update': 0.0021093993685966316, 'loss': 0.49598215463527934, 'time_step': 0.0023369671300400136, 'init_value': -20.192359924316406, 'ave_value': -24.280182921781567, 'soft_opc': nan} step=15480




2022-04-20 19:31.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193031/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:31.10 [info     ] FQE_20220420193031: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00015905152919680574, 'time_algorithm_update': 0.0021246158799459766, 'loss': 0.5001218096937897, 'time_step': 0.002356213885684346, 'init_value': -20.318517684936523, 'ave_value': -24.40464416772488, 'soft_opc': nan} step=15824




2022-04-20 19:31.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193031/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:31.11 [info     ] FQE_20220420193031: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.0001591173715369646, 'time_algorithm_update': 0.002159436774808307, 'loss': 0.5090214686663171, 'time_step': 0.002387780782788299, 'init_value': -20.23016357421875, 'ave_value': -24.115961694448917, 'soft_opc': nan} step=16168




2022-04-20 19:31.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193031/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:31.12 [info     ] FQE_20220420193031: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00015835914500924043, 'time_algorithm_update': 0.002075408780297568, 'loss': 0.5077812361346861, 'time_step': 0.00230338407117267, 'init_value': -20.599529266357422, 'ave_value': -24.350979905004973, 'soft_opc': nan} step=16512




2022-04-20 19:31.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193031/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:31.13 [info     ] FQE_20220420193031: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00015770626622577046, 'time_algorithm_update': 0.0020717430946438813, 'loss': 0.5229324953478955, 'time_step': 0.0023011710754660673, 'init_value': -20.731895446777344, 'ave_value': -24.69325549734404, 'soft_opc': nan} step=16856




2022-04-20 19:31.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193031/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:31.14 [info     ] FQE_20220420193031: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00015900509302006213, 'time_algorithm_update': 0.0020965483299521513, 'loss': 0.5257079724525643, 'time_step': 0.0023259673007698946, 'init_value': -20.90877914428711, 'ave_value': -24.924702101076644, 'soft_opc': nan} step=17200




2022-04-20 19:31.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193031/model_17200.pt
search iteration:  28
using hyper params:  [0.006546432455432492, 0.002476663492383323, 5.268203453933879e-05, 1]
2022-04-20 19:31.14 [debug    ] RoundIterator is selected.
2022-04-20 19:31.14 [info     ] Directory is created at d3rlpy_logs/CQL_20220420193114
2022-04-20 19:31.14 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 19:31.14 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 19:31.14 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220420193114/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.006546432455432492, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'we

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:31.20 [info     ] CQL_20220420193114: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.000302333580820184, 'time_algorithm_update': 0.01765351407012047, 'temp_loss': 4.585157521286903, 'temp': 0.9921484261925457, 'alpha_loss': -14.495807957230953, 'alpha': 1.0164992582728292, 'critic_loss': 19.148116557918794, 'actor_loss': -1.3248988151201728, 'time_step': 0.01805223637854147, 'td_error': 3.6564005575923555, 'init_value': -0.7699969410896301, 'ave_value': 0.4864325231125763} step=342
2022-04-20 19:31.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193114/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:31.27 [info     ] CQL_20220420193114: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.0003024862523664508, 'time_algorithm_update': 0.018183220199674194, 'temp_loss': 4.055728545662952, 'temp': 0.9754605394357826, 'alpha_loss': -8.099957648773639, 'alpha': 1.0439079358563785, 'critic_loss': 20.989118492394162, 'actor_loss': -0.30748554731975175, 'time_step': 0.01858241934525339, 'td_error': 3.8250360174809184, 'init_value': -2.623215436935425, 'ave_value': 0.26237523347072234} step=684
2022-04-20 19:31.27 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193114/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:31.33 [info     ] CQL_20220420193114: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.00030150190431472155, 'time_algorithm_update': 0.018570752171745076, 'temp_loss': 3.3429746816032813, 'temp': 0.9606656099272053, 'alpha_loss': -3.9033535717175023, 'alpha': 1.0625906924755253, 'critic_loss': 35.46287711182533, 'actor_loss': 1.0494047193325053, 'time_step': 0.018978555997212727, 'td_error': 4.476688597765969, 'init_value': -4.0425543785095215, 'ave_value': 0.5607814227715806} step=1026
2022-04-20 19:31.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193114/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:31.40 [info     ] CQL_20220420193114: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.00030614618669476423, 'time_algorithm_update': 0.01842805168084931, 'temp_loss': 2.8338361180316634, 'temp': 0.947164110098666, 'alpha_loss': -0.8739974853202799, 'alpha': 1.0722186171520522, 'critic_loss': 54.77451823050516, 'actor_loss': 2.365889464161898, 'time_step': 0.018835012675725925, 'td_error': 5.717173404485862, 'init_value': -7.000853061676025, 'ave_value': -0.5644883224220426} step=1368
2022-04-20 19:31.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193114/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:31.47 [info     ] CQL_20220420193114: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.0003067680269654034, 'time_algorithm_update': 0.018421953881693164, 'temp_loss': 2.431919699523881, 'temp': 0.9345512226311087, 'alpha_loss': 1.527605643278064, 'alpha': 1.0701510770279064, 'critic_loss': 76.56301894383124, 'actor_loss': 3.6954407690212743, 'time_step': 0.0188306200574016, 'td_error': 6.834718462259337, 'init_value': -9.213912963867188, 'ave_value': -1.231927142498998} step=1710
2022-04-20 19:31.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193114/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:31.54 [info     ] CQL_20220420193114: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.000303384156254997, 'time_algorithm_update': 0.018372528037132574, 'temp_loss': 2.0712874977909332, 'temp': 0.922704509475775, 'alpha_loss': 3.6299171732379154, 'alpha': 1.0536346676056845, 'critic_loss': 99.64100805360671, 'actor_loss': 4.987912511267857, 'time_step': 0.01877216358630978, 'td_error': 8.819993530113836, 'init_value': -12.382621765136719, 'ave_value': -2.1646451081187874} step=2052
2022-04-20 19:31.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193114/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:32.00 [info     ] CQL_20220420193114: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.0003056923548380534, 'time_algorithm_update': 0.018558111107140257, 'temp_loss': 1.780805764491098, 'temp': 0.9116221131288518, 'alpha_loss': 5.481006006051225, 'alpha': 1.0223455118854143, 'critic_loss': 126.93678163227283, 'actor_loss': 6.684293627738953, 'time_step': 0.018965433215537265, 'td_error': 10.824994152427701, 'init_value': -15.572854995727539, 'ave_value': -3.4353147223108524} step=2394
2022-04-20 19:32.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193114/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:32.07 [info     ] CQL_20220420193114: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.00030456370080423636, 'time_algorithm_update': 0.018529024040489867, 'temp_loss': 1.4835963501916294, 'temp': 0.9012650734151316, 'alpha_loss': 7.012906538115607, 'alpha': 0.9825034931040647, 'critic_loss': 157.34436838250411, 'actor_loss': 8.39882384754761, 'time_step': 0.018936221362554538, 'td_error': 13.17174756448932, 'init_value': -19.360919952392578, 'ave_value': -4.648008500428887} step=2736
2022-04-20 19:32.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193114/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:32.14 [info     ] CQL_20220420193114: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.00031152385020116616, 'time_algorithm_update': 0.019122472980566192, 'temp_loss': 1.2510452932781644, 'temp': 0.8916708039261444, 'alpha_loss': 8.187244788945069, 'alpha': 0.9404889378282759, 'critic_loss': 192.86593369154903, 'actor_loss': 10.296898242325812, 'time_step': 0.01953703060484769, 'td_error': 18.96671163817027, 'init_value': -22.544254302978516, 'ave_value': -6.623864488113034} step=3078
2022-04-20 19:32.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193114/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:32.21 [info     ] CQL_20220420193114: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.0003098061210230777, 'time_algorithm_update': 0.01883519044396473, 'temp_loss': 1.0343585606032646, 'temp': 0.8827422060813123, 'alpha_loss': 9.242819766552127, 'alpha': 0.9000294747059805, 'critic_loss': 232.4823990760491, 'actor_loss': 12.43651697370741, 'time_step': 0.0192456747356214, 'td_error': 21.567545837844722, 'init_value': -27.97674560546875, 'ave_value': -8.693992092872525} step=3420
2022-04-20 19:32.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193114/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:32.28 [info     ] CQL_20220420193114: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.00030492063154254046, 'time_algorithm_update': 0.018864764804728547, 'temp_loss': 0.836971963946408, 'temp': 0.8745219916628119, 'alpha_loss': 10.133948367938661, 'alpha': 0.8618371145069947, 'critic_loss': 276.9481748614395, 'actor_loss': 14.813079299982528, 'time_step': 0.01927328109741211, 'td_error': 24.94896576799081, 'init_value': -33.07909393310547, 'ave_value': -10.233270445084786} step=3762
2022-04-20 19:32.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193114/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:32.34 [info     ] CQL_20220420193114: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.00030423047249777274, 'time_algorithm_update': 0.018635982658430847, 'temp_loss': 0.66360071721917, 'temp': 0.8671462640427706, 'alpha_loss': 10.93877477255481, 'alpha': 0.8260148258237113, 'critic_loss': 326.1797974346674, 'actor_loss': 17.426501539018417, 'time_step': 0.019035059109068754, 'td_error': 31.551961328536848, 'init_value': -38.65713882446289, 'ave_value': -13.557513548001513} step=4104
2022-04-20 19:32.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193114/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:32.41 [info     ] CQL_20220420193114: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.00030231196977938824, 'time_algorithm_update': 0.018507533603244357, 'temp_loss': 0.4774273260175223, 'temp': 0.8607258320900432, 'alpha_loss': 11.962473220992507, 'alpha': 0.7922978024733695, 'critic_loss': 383.6748786613955, 'actor_loss': 20.453274685039855, 'time_step': 0.018904105264541, 'td_error': 40.62958718421256, 'init_value': -44.964805603027344, 'ave_value': -15.691028357072993} step=4446
2022-04-20 19:32.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193114/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:32.48 [info     ] CQL_20220420193114: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.00029876148491575006, 'time_algorithm_update': 0.018307764627780134, 'temp_loss': 0.2787971093771402, 'temp': 0.8558840423996685, 'alpha_loss': 13.05725173504032, 'alpha': 0.7598896382147806, 'critic_loss': 450.1317631236294, 'actor_loss': 23.9056531783433, 'time_step': 0.018706241546318544, 'td_error': 54.070397724792905, 'init_value': -52.79603958129883, 'ave_value': -18.25194204511943} step=4788
2022-04-20 19:32.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193114/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:32.54 [info     ] CQL_20220420193114: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.000298435227912769, 'time_algorithm_update': 0.01829164348847685, 'temp_loss': 0.14487955029454758, 'temp': 0.852793829657181, 'alpha_loss': 14.005593626122726, 'alpha': 0.729172050255781, 'critic_loss': 529.4725758513512, 'actor_loss': 27.945123594406752, 'time_step': 0.018688133585522747, 'td_error': 66.91661200297916, 'init_value': -61.123313903808594, 'ave_value': -22.099583638262104} step=5130
2022-04-20 19:32.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193114/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:33.01 [info     ] CQL_20220420193114: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.000304105686165436, 'time_algorithm_update': 0.01816478737613611, 'temp_loss': -0.005151826844752183, 'temp': 0.8514368079210582, 'alpha_loss': 15.055658555170249, 'alpha': 0.699990989869101, 'critic_loss': 619.5926712660762, 'actor_loss': 32.46263237445675, 'time_step': 0.01856718216723169, 'td_error': 109.01914039939753, 'init_value': -71.52457427978516, 'ave_value': -25.658165076004494} step=5472
2022-04-20 19:33.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193114/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:33.08 [info     ] CQL_20220420193114: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.0003063588114509806, 'time_algorithm_update': 0.01848646841551128, 'temp_loss': -0.09263782914488287, 'temp': 0.8526779078600699, 'alpha_loss': 15.9170225321898, 'alpha': 0.6722659025973047, 'critic_loss': 722.1869794165182, 'actor_loss': 37.59943630263122, 'time_step': 0.018897999099820678, 'td_error': 129.66720907919506, 'init_value': -82.96537017822266, 'ave_value': -30.55511569893038} step=5814
2022-04-20 19:33.08 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193114/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:33.14 [info     ] CQL_20220420193114: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.00030710613518430475, 'time_algorithm_update': 0.018354548348320857, 'temp_loss': -0.21064952260417025, 'temp': 0.8560627221364027, 'alpha_loss': 16.860170372745447, 'alpha': 0.6459471196459051, 'critic_loss': 838.9826688710709, 'actor_loss': 43.51341563219216, 'time_step': 0.018767673369736698, 'td_error': 169.77410333426215, 'init_value': -100.09846496582031, 'ave_value': -37.79613789113255} step=6156
2022-04-20 19:33.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193114/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:33.21 [info     ] CQL_20220420193114: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.00030222203996446397, 'time_algorithm_update': 0.01825069474895098, 'temp_loss': -0.27651307071771536, 'temp': 0.8619696851710827, 'alpha_loss': 17.699719855659886, 'alpha': 0.6209947735245465, 'critic_loss': 966.8497830218042, 'actor_loss': 49.899956229137395, 'time_step': 0.01865368558649431, 'td_error': 245.1816775826957, 'init_value': -119.1318359375, 'ave_value': -45.127757619248854} step=6498
2022-04-20 19:33.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193114/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:33.27 [info     ] CQL_20220420193114: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.0003003397880241885, 'time_algorithm_update': 0.01811690706955759, 'temp_loss': -0.3969353408667079, 'temp': 0.8718458787042495, 'alpha_loss': 18.768009314063, 'alpha': 0.5970686220983316, 'critic_loss': 1108.754142716614, 'actor_loss': 57.04928587751779, 'time_step': 0.018513993213051243, 'td_error': 198.50506352719162, 'init_value': -137.53176879882812, 'ave_value': -50.226968708747144} step=6840
2022-04-20 19:33.27 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193114/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:33.34 [info     ] CQL_20220420193114: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.0003000874268381219, 'time_algorithm_update': 0.017875541023343627, 'temp_loss': -0.4366335048236781, 'temp': 0.8836534382307042, 'alpha_loss': 19.09749512086835, 'alpha': 0.5748054935918216, 'critic_loss': 1267.915656374212, 'actor_loss': 64.84286727682192, 'time_step': 0.018272816089161655, 'td_error': 372.6351518473057, 'init_value': -156.3043212890625, 'ave_value': -58.275380387005505} step=7182
2022-04-20 19:33.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193114/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:33.40 [info     ] CQL_20220420193114: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.0003001118264003107, 'time_algorithm_update': 0.017986186066566157, 'temp_loss': -0.48782951672893693, 'temp': 0.8975598076630754, 'alpha_loss': 19.868232936189887, 'alpha': 0.5531237909668371, 'critic_loss': 1440.907514605606, 'actor_loss': 73.33736003630342, 'time_step': 0.018383895444591142, 'td_error': 440.0411104775655, 'init_value': -180.06765747070312, 'ave_value': -65.81294016265117} step=7524
2022-04-20 19:33.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193114/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:33.48 [info     ] CQL_20220420193114: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.00032967782159994916, 'time_algorithm_update': 0.020448914745397735, 'temp_loss': -0.5084584054390067, 'temp': 0.9120856127892322, 'alpha_loss': 20.40389963897348, 'alpha': 0.5330712817914305, 'critic_loss': 1623.6118456745705, 'actor_loss': 82.42741257963125, 'time_step': 0.02088894690686499, 'td_error': 609.6393447833959, 'init_value': -206.94876098632812, 'ave_value': -75.79911878743687} step=7866
2022-04-20 19:33.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193114/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:33.55 [info     ] CQL_20220420193114: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.0003089723531265705, 'time_algorithm_update': 0.01880234933038901, 'temp_loss': -0.5930440661372148, 'temp': 0.9277153783722928, 'alpha_loss': 21.221296614373635, 'alpha': 0.5134833362715984, 'critic_loss': 1819.9333663851196, 'actor_loss': 92.04098936828257, 'time_step': 0.01920952155576115, 'td_error': 729.9351730834372, 'init_value': -229.126220703125, 'ave_value': -82.31473025306926} step=8208
2022-04-20 19:33.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193114/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:34.01 [info     ] CQL_20220420193114: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.00029860881336948327, 'time_algorithm_update': 0.017876172623439143, 'temp_loss': -0.6098175060210956, 'temp': 0.9446122313452046, 'alpha_loss': 21.96871811325787, 'alpha': 0.4946491774934077, 'critic_loss': 2028.9927039787783, 'actor_loss': 102.66129717910499, 'time_step': 0.018276619632341708, 'td_error': 499.351705258284, 'init_value': -252.229736328125, 'ave_value': -90.08941901661254} step=8550
2022-04-20 19:34.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193114/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:34.08 [info     ] CQL_20220420193114: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.00030130252503512197, 'time_algorithm_update': 0.01784281842192711, 'temp_loss': -0.7030471352784083, 'temp': 0.9620333129202413, 'alpha_loss': 23.97783900980364, 'alpha': 0.4765454155136967, 'critic_loss': 2259.351724903486, 'actor_loss': 114.51844323587696, 'time_step': 0.01824267426429436, 'td_error': 1081.2365714398072, 'init_value': -296.8724670410156, 'ave_value': -106.79518921718942} step=8892
2022-04-20 19:34.08 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193114/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:34.14 [info     ] CQL_20220420193114: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.000308135099578322, 'time_algorithm_update': 0.01758716747774715, 'temp_loss': -0.669241170959863, 'temp': 0.9797729659847348, 'alpha_loss': 22.86013322685197, 'alpha': 0.4588721182785536, 'critic_loss': 2517.6507289953397, 'actor_loss': 126.9706061067637, 'time_step': 0.017996023272910312, 'td_error': 828.5552885500199, 'init_value': -319.30859375, 'ave_value': -113.96381507373609} step=9234
2022-04-20 19:34.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193114/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:34.20 [info     ] CQL_20220420193114: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.00029469860924614803, 'time_algorithm_update': 0.017076709117108617, 'temp_loss': -0.7389121713084087, 'temp': 0.9966500019818022, 'alpha_loss': 24.770279282017757, 'alpha': 0.44298107651939167, 'critic_loss': 2770.7774701034814, 'actor_loss': 139.73602573774014, 'time_step': 0.01746617702015659, 'td_error': 1816.6768365883736, 'init_value': -376.35430908203125, 'ave_value': -133.08207951884012} step=9576
2022-04-20 19:34.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193114/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:34.26 [info     ] CQL_20220420193114: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.00028968275639048793, 'time_algorithm_update': 0.016738637148985387, 'temp_loss': -0.764004342693683, 'temp': 1.0147787548645197, 'alpha_loss': 25.63698147054304, 'alpha': 0.42657208765110777, 'critic_loss': 3070.203344155473, 'actor_loss': 155.1910753752056, 'time_step': 0.017119349791989688, 'td_error': 2144.1163799056385, 'init_value': -422.31805419921875, 'ave_value': -147.114004603227} step=9918
2022-04-20 19:34.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193114/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:34.33 [info     ] CQL_20220420193114: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.00030390282123409515, 'time_algorithm_update': 0.01765444334487469, 'temp_loss': -0.9349795513682895, 'temp': 1.0348256718345552, 'alpha_loss': 15.950757497583913, 'alpha': 0.41395602344769483, 'critic_loss': 3239.9970574629933, 'actor_loss': 160.21757801792077, 'time_step': 0.018057851066366273, 'td_error': 1152.2342925371452, 'init_value': -422.95587158203125, 'ave_value': -147.11137415536354} step=10260
2022-04-20 19:34.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193114/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:34.39 [info     ] CQL_20220420193114: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.00030035024497941225, 'time_algorithm_update': 0.017779343309458236, 'temp_loss': -0.7764984645951561, 'temp': 1.052296577489864, 'alpha_loss': 16.965362830468784, 'alpha': 0.40328605481755664, 'critic_loss': 3449.706548679642, 'actor_loss': 169.84695492571558, 'time_step': 0.018178248963160823, 'td_error': 1319.4921633624308, 'init_value': -448.5381774902344, 'ave_value': -160.28705730856} step=10602
2022-04-20 19:34.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193114/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:34.46 [info     ] CQL_20220420193114: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.00029807272013167887, 'time_algorithm_update': 0.01757752337650946, 'temp_loss': -0.6734674592769285, 'temp': 1.067842782240862, 'alpha_loss': 16.57437197227924, 'alpha': 0.39257530075067665, 'critic_loss': 3632.1428736636512, 'actor_loss': 178.51411727994505, 'time_step': 0.017975763967859815, 'td_error': 1620.130204860594, 'init_value': -468.50390625, 'ave_value': -167.31252060674333} step=10944
2022-04-20 19:34.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193114/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:34.52 [info     ] CQL_20220420193114: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.0003036908936082271, 'time_algorithm_update': 0.017736647561279652, 'temp_loss': -0.6521095007723361, 'temp': 1.0824720075255947, 'alpha_loss': 16.68137486496864, 'alpha': 0.38141012949901715, 'critic_loss': 3816.005394651179, 'actor_loss': 187.38453138920298, 'time_step': 0.018136667229278744, 'td_error': 1433.3975472717598, 'init_value': -478.93682861328125, 'ave_value': -170.6770218189772} step=11286
2022-04-20 19:34.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193114/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:34.58 [info     ] CQL_20220420193114: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.0002998887446888706, 'time_algorithm_update': 0.017686847357722055, 'temp_loss': -0.6537694540964059, 'temp': 1.0977783356493676, 'alpha_loss': 16.110466584824678, 'alpha': 0.37046984204074795, 'critic_loss': 3985.203639694125, 'actor_loss': 195.3855531034414, 'time_step': 0.018084718469987836, 'td_error': 1757.7362294594739, 'init_value': -509.14312744140625, 'ave_value': -184.46266586116843} step=11628
2022-04-20 19:34.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193114/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:35.05 [info     ] CQL_20220420193114: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.00029919579712270995, 'time_algorithm_update': 0.017685433577375804, 'temp_loss': -0.7046512026361554, 'temp': 1.1138047143729806, 'alpha_loss': 17.088510516094185, 'alpha': 0.35917548024863527, 'critic_loss': 4154.448084424113, 'actor_loss': 204.02352244951572, 'time_step': 0.018081265583372953, 'td_error': 1826.6603856446, 'init_value': -525.2630004882812, 'ave_value': -191.97012525845219} step=11970
2022-04-20 19:35.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193114/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:35.11 [info     ] CQL_20220420193114: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.000303253095749526, 'time_algorithm_update': 0.017795256703917742, 'temp_loss': -0.8014544852796877, 'temp': 1.1322698098177102, 'alpha_loss': 16.45306134084512, 'alpha': 0.347763288177942, 'critic_loss': 4309.30105680053, 'actor_loss': 211.5584255910059, 'time_step': 0.01819575181481434, 'td_error': 1451.7850609698949, 'init_value': -533.0519409179688, 'ave_value': -192.68505057388597} step=12312
2022-04-20 19:35.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193114/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:35.18 [info     ] CQL_20220420193114: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.0003036978649117096, 'time_algorithm_update': 0.01777925059112192, 'temp_loss': -0.8824197143338054, 'temp': 1.153006030453576, 'alpha_loss': 12.293564541298046, 'alpha': 0.33835275392783315, 'critic_loss': 4398.53626344915, 'actor_loss': 214.3514187907615, 'time_step': 0.018182750333819473, 'td_error': 1430.1943116006078, 'init_value': -547.9183349609375, 'ave_value': -197.27645698105968} step=12654
2022-04-20 19:35.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193114/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:35.24 [info     ] CQL_20220420193114: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.0002996566002829033, 'time_algorithm_update': 0.017684879358748944, 'temp_loss': -0.7106193714715235, 'temp': 1.1706782523651569, 'alpha_loss': 12.310096892697072, 'alpha': 0.32971358072688006, 'critic_loss': 4491.421278925667, 'actor_loss': 218.01027674981725, 'time_step': 0.018083925832781875, 'td_error': 1531.1134197538327, 'init_value': -558.1725463867188, 'ave_value': -201.09442079798595} step=12996
2022-04-20 19:35.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193114/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:35.31 [info     ] CQL_20220420193114: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.00030666973158630014, 'time_algorithm_update': 0.017649265757778233, 'temp_loss': -0.6764332044608238, 'temp': 1.187036348713769, 'alpha_loss': 12.100996778722395, 'alpha': 0.3209561913856986, 'critic_loss': 4580.947281329953, 'actor_loss': 222.43646690859433, 'time_step': 0.018058275618748357, 'td_error': 1608.5628746073173, 'init_value': -573.4705200195312, 'ave_value': -203.36361853454565} step=13338
2022-04-20 19:35.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193114/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:35.37 [info     ] CQL_20220420193114: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.00030294635839629594, 'time_algorithm_update': 0.01787347263760037, 'temp_loss': -0.6306546699168564, 'temp': 1.2028341167851497, 'alpha_loss': 11.784939824489125, 'alpha': 0.3120491626667, 'critic_loss': 4673.280148511742, 'actor_loss': 227.20136617917066, 'time_step': 0.01827489353759944, 'td_error': 1629.8754916148173, 'init_value': -580.8453369140625, 'ave_value': -206.45171436224973} step=13680
2022-04-20 19:35.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193114/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:35.44 [info     ] CQL_20220420193114: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.00030388609010573717, 'time_algorithm_update': 0.017825095974213896, 'temp_loss': -0.5996047102503086, 'temp': 1.2184987144860608, 'alpha_loss': 11.660496110804598, 'alpha': 0.30317694323453287, 'critic_loss': 4752.891787880345, 'actor_loss': 230.8121453000788, 'time_step': 0.01822645692100302, 'td_error': 1681.8814658373522, 'init_value': -606.961669921875, 'ave_value': -217.58175419601235} step=14022
2022-04-20 19:35.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193114/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:35.50 [info     ] CQL_20220420193114: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.0002987775189137598, 'time_algorithm_update': 0.017790235971149645, 'temp_loss': -0.5491074660019568, 'temp': 1.2335453368069833, 'alpha_loss': 11.141375766162984, 'alpha': 0.2944117689516112, 'critic_loss': 4835.0805863943715, 'actor_loss': 235.0088157430727, 'time_step': 0.018183754201520953, 'td_error': 1644.266508156641, 'init_value': -613.7918701171875, 'ave_value': -216.6826294593768} step=14364
2022-04-20 19:35.50 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193114/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:35.57 [info     ] CQL_20220420193114: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.00030110314575552245, 'time_algorithm_update': 0.018004847548858463, 'temp_loss': -0.5229591083064763, 'temp': 1.2481818349040739, 'alpha_loss': 10.956253100556937, 'alpha': 0.28580900103028056, 'critic_loss': 4911.2801513671875, 'actor_loss': 238.59314919633474, 'time_step': 0.018405857839082416, 'td_error': 1707.3009041636496, 'init_value': -625.4664306640625, 'ave_value': -222.2056346627506} step=14706
2022-04-20 19:35.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193114/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:36.03 [info     ] CQL_20220420193114: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.0003038930614092197, 'time_algorithm_update': 0.017938050610280178, 'temp_loss': -0.47999539586840667, 'temp': 1.2634105762543038, 'alpha_loss': 10.746439986758762, 'alpha': 0.2773436612901632, 'critic_loss': 4987.38313159608, 'actor_loss': 242.59283911275585, 'time_step': 0.018342978773061295, 'td_error': 1699.0338233645884, 'init_value': -636.0615844726562, 'ave_value': -224.18551708159146} step=15048
2022-04-20 19:36.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193114/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:36.10 [info     ] CQL_20220420193114: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.00030264240956445884, 'time_algorithm_update': 0.017836669035125197, 'temp_loss': -0.474621614266993, 'temp': 1.2783730995585347, 'alpha_loss': 10.434955196771009, 'alpha': 0.26903900303687267, 'critic_loss': 5059.810471205684, 'actor_loss': 246.20008118389643, 'time_step': 0.018237182968541196, 'td_error': 1839.287446678074, 'init_value': -648.8897705078125, 'ave_value': -226.98689579587798} step=15390
2022-04-20 19:36.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193114/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:36.16 [info     ] CQL_20220420193114: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.00030045342027095324, 'time_algorithm_update': 0.017825433385302448, 'temp_loss': -0.4291379797593718, 'temp': 1.2927199190820169, 'alpha_loss': 10.164509371707315, 'alpha': 0.2609361048504623, 'critic_loss': 5136.15727938825, 'actor_loss': 250.3366855375948, 'time_step': 0.018227197970563207, 'td_error': 1756.1425219203807, 'init_value': -658.6212158203125, 'ave_value': -231.89475458046337} step=15732
2022-04-20 19:36.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193114/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:36.23 [info     ] CQL_20220420193114: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.0003072002477813185, 'time_algorithm_update': 0.018294170585989256, 'temp_loss': -0.4642268386587762, 'temp': 1.3077577837029395, 'alpha_loss': 9.8455572337435, 'alpha': 0.25308053822893845, 'critic_loss': 5208.714968675758, 'actor_loss': 253.67854362621642, 'time_step': 0.018700110284905685, 'td_error': 1757.577318225701, 'init_value': -671.6554565429688, 'ave_value': -236.08954060614647} step=16074
2022-04-20 19:36.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193114/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:36.29 [info     ] CQL_20220420193114: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.00030837839806986136, 'time_algorithm_update': 0.01803003207981935, 'temp_loss': -0.36123018643181576, 'temp': 1.3217208936200504, 'alpha_loss': 9.438207993033336, 'alpha': 0.2455989228703125, 'critic_loss': 5284.050091659814, 'actor_loss': 257.7303537290696, 'time_step': 0.018439614284805388, 'td_error': 1778.0407720222872, 'init_value': -688.8836059570312, 'ave_value': -241.3263471204633} step=16416
2022-04-20 19:36.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193114/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:36.36 [info     ] CQL_20220420193114: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.00030530823601616756, 'time_algorithm_update': 0.017758024366278397, 'temp_loss': -0.274090896671017, 'temp': 1.3329162548857125, 'alpha_loss': 9.122523845984922, 'alpha': 0.23833746406418538, 'critic_loss': 5364.1096391287465, 'actor_loss': 261.65464921025506, 'time_step': 0.018160845104016755, 'td_error': 1828.448928620626, 'init_value': -700.9617919921875, 'ave_value': -245.03822095244854} step=16758
2022-04-20 19:36.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193114/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:36.42 [info     ] CQL_20220420193114: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.00030702945084599726, 'time_algorithm_update': 0.017827451577660632, 'temp_loss': -0.30035178335002294, 'temp': 1.3449209639203479, 'alpha_loss': 8.692483738849038, 'alpha': 0.23125968346295997, 'critic_loss': 5433.983242815698, 'actor_loss': 265.2275131404051, 'time_step': 0.0182320221125731, 'td_error': 1846.6625140956253, 'init_value': -704.9680786132812, 'ave_value': -242.50384259163798} step=17100
2022-04-20 19:36.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193114/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.5

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 19:36.43 [info     ] FQE_20220420193642: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00015267406601503672, 'time_algorithm_update': 0.0020658136850380034, 'loss': 0.008633861657658434, 'time_step': 0.0022892750889421947, 'init_value': 0.14522193372249603, 'ave_value': 0.1713168218845149, 'soft_opc': nan} step=166




2022-04-20 19:36.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193642/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:36.43 [info     ] FQE_20220420193642: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00014976421034479715, 'time_algorithm_update': 0.002021944666483316, 'loss': 0.006431639280046774, 'time_step': 0.00224052997956793, 'init_value': 0.006995960138738155, 'ave_value': 0.07918820113199554, 'soft_opc': nan} step=332




2022-04-20 19:36.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193642/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:36.44 [info     ] FQE_20220420193642: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.0001528076378695936, 'time_algorithm_update': 0.0020712312445583113, 'loss': 0.005945592417370483, 'time_step': 0.002292755138443177, 'init_value': -0.0552268885076046, 'ave_value': 0.03618967536677447, 'soft_opc': nan} step=498




2022-04-20 19:36.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193642/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:36.44 [info     ] FQE_20220420193642: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.0001533534153398261, 'time_algorithm_update': 0.0021426763879247458, 'loss': 0.00618467325378344, 'time_step': 0.0023656149944627143, 'init_value': -0.138878732919693, 'ave_value': -0.013880318028197901, 'soft_opc': nan} step=664




2022-04-20 19:36.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193642/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:36.44 [info     ] FQE_20220420193642: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00015301589506218233, 'time_algorithm_update': 0.0021023233252835563, 'loss': 0.005745167534024152, 'time_step': 0.0023192641246749693, 'init_value': -0.19979935884475708, 'ave_value': -0.03955397152358683, 'soft_opc': nan} step=830




2022-04-20 19:36.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193642/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:36.45 [info     ] FQE_20220420193642: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.000157834535621735, 'time_algorithm_update': 0.0021079936659479715, 'loss': 0.00547819495262825, 'time_step': 0.002336101359631642, 'init_value': -0.20673364400863647, 'ave_value': -0.04700664711428118, 'soft_opc': nan} step=996




2022-04-20 19:36.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193642/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:36.45 [info     ] FQE_20220420193642: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00014853908354977527, 'time_algorithm_update': 0.002088466322565653, 'loss': 0.005334544507506113, 'time_step': 0.002304825438074319, 'init_value': -0.29531195759773254, 'ave_value': -0.11966664115478796, 'soft_opc': nan} step=1162




2022-04-20 19:36.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193642/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:36.46 [info     ] FQE_20220420193642: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00015012614698295133, 'time_algorithm_update': 0.0020659113504800453, 'loss': 0.005035138616046633, 'time_step': 0.0022826812353478857, 'init_value': -0.3446720838546753, 'ave_value': -0.1399673348171049, 'soft_opc': nan} step=1328




2022-04-20 19:36.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193642/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:36.46 [info     ] FQE_20220420193642: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00015131680362195853, 'time_algorithm_update': 0.0020363661180059596, 'loss': 0.005207846730846508, 'time_step': 0.0022545492792704017, 'init_value': -0.38377976417541504, 'ave_value': -0.18064269010220899, 'soft_opc': nan} step=1494




2022-04-20 19:36.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193642/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:36.47 [info     ] FQE_20220420193642: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.0001556729695883142, 'time_algorithm_update': 0.0020997624799429654, 'loss': 0.004832170222021909, 'time_step': 0.002322428197745817, 'init_value': -0.4555789828300476, 'ave_value': -0.23927445397922048, 'soft_opc': nan} step=1660




2022-04-20 19:36.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193642/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:36.47 [info     ] FQE_20220420193642: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00015893758061420485, 'time_algorithm_update': 0.002103416316480522, 'loss': 0.004584908928632377, 'time_step': 0.0023291312068341725, 'init_value': -0.48366445302963257, 'ave_value': -0.2573434068573857, 'soft_opc': nan} step=1826




2022-04-20 19:36.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193642/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:36.47 [info     ] FQE_20220420193642: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00015420080667518708, 'time_algorithm_update': 0.0020593462220157483, 'loss': 0.00435852464807433, 'time_step': 0.00228050817926246, 'init_value': -0.5026191473007202, 'ave_value': -0.2629003970292271, 'soft_opc': nan} step=1992




2022-04-20 19:36.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193642/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:36.48 [info     ] FQE_20220420193642: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.0001507308109697089, 'time_algorithm_update': 0.0021044317498264543, 'loss': 0.0046304525451240945, 'time_step': 0.0023212993001363365, 'init_value': -0.6099022626876831, 'ave_value': -0.3571917020661236, 'soft_opc': nan} step=2158




2022-04-20 19:36.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193642/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:36.48 [info     ] FQE_20220420193642: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00015768085617616953, 'time_algorithm_update': 0.002163000853664904, 'loss': 0.004712151930841382, 'time_step': 0.0023924428296376423, 'init_value': -0.6927217245101929, 'ave_value': -0.4308452190143299, 'soft_opc': nan} step=2324




2022-04-20 19:36.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193642/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:36.49 [info     ] FQE_20220420193642: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00015155378594455948, 'time_algorithm_update': 0.0020409908639379293, 'loss': 0.004742327625919358, 'time_step': 0.002258704369326672, 'init_value': -0.7268054485321045, 'ave_value': -0.4636613426826525, 'soft_opc': nan} step=2490




2022-04-20 19:36.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193642/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:36.49 [info     ] FQE_20220420193642: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00015145468424601727, 'time_algorithm_update': 0.0020895822938666285, 'loss': 0.004995487736964442, 'time_step': 0.0023117596844592727, 'init_value': -0.786812961101532, 'ave_value': -0.4850052255075761, 'soft_opc': nan} step=2656




2022-04-20 19:36.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193642/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:36.50 [info     ] FQE_20220420193642: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00015620725700654178, 'time_algorithm_update': 0.00208075793392687, 'loss': 0.005350517099218286, 'time_step': 0.0023035687136362835, 'init_value': -0.8754722476005554, 'ave_value': -0.5479897408326668, 'soft_opc': nan} step=2822




2022-04-20 19:36.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193642/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:36.50 [info     ] FQE_20220420193642: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00015072362968720585, 'time_algorithm_update': 0.002036855881472668, 'loss': 0.005538257940655507, 'time_step': 0.0022578454879393062, 'init_value': -0.9192208051681519, 'ave_value': -0.5722170900096194, 'soft_opc': nan} step=2988




2022-04-20 19:36.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193642/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:36.51 [info     ] FQE_20220420193642: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.0001557246748223362, 'time_algorithm_update': 0.002090623579829572, 'loss': 0.0063690500255359946, 'time_step': 0.002318270235176546, 'init_value': -1.0283658504486084, 'ave_value': -0.6438185745195762, 'soft_opc': nan} step=3154




2022-04-20 19:36.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193642/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:36.51 [info     ] FQE_20220420193642: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00015485717589596668, 'time_algorithm_update': 0.002113751618258924, 'loss': 0.006336529213228229, 'time_step': 0.0023365308003253245, 'init_value': -1.1445354223251343, 'ave_value': -0.6929218345573424, 'soft_opc': nan} step=3320




2022-04-20 19:36.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193642/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:36.51 [info     ] FQE_20220420193642: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00015146186552852033, 'time_algorithm_update': 0.002055870481284268, 'loss': 0.0066989963111209585, 'time_step': 0.0022742978061538145, 'init_value': -1.2204241752624512, 'ave_value': -0.7617494584564622, 'soft_opc': nan} step=3486




2022-04-20 19:36.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193642/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:36.52 [info     ] FQE_20220420193642: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00014941376375864787, 'time_algorithm_update': 0.001978917294237987, 'loss': 0.007187900721819241, 'time_step': 0.002197606017790645, 'init_value': -1.3144276142120361, 'ave_value': -0.8094554333234424, 'soft_opc': nan} step=3652




2022-04-20 19:36.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193642/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:36.52 [info     ] FQE_20220420193642: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00015567871461431664, 'time_algorithm_update': 0.002102537327502147, 'loss': 0.007852720316213343, 'time_step': 0.00232522889792201, 'init_value': -1.4991576671600342, 'ave_value': -0.9208502758676103, 'soft_opc': nan} step=3818




2022-04-20 19:36.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193642/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:36.53 [info     ] FQE_20220420193642: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.0001524428287184382, 'time_algorithm_update': 0.0020934975290872963, 'loss': 0.00838291402260986, 'time_step': 0.0023158917944115327, 'init_value': -1.518665075302124, 'ave_value': -0.9187802506791143, 'soft_opc': nan} step=3984




2022-04-20 19:36.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193642/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:36.53 [info     ] FQE_20220420193642: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00015494335128600338, 'time_algorithm_update': 0.002003004752009748, 'loss': 0.009298061497554362, 'time_step': 0.0022279656077005775, 'init_value': -1.5841351747512817, 'ave_value': -0.9689795477890518, 'soft_opc': nan} step=4150




2022-04-20 19:36.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193642/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:36.53 [info     ] FQE_20220420193642: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00015185539980968796, 'time_algorithm_update': 0.002086186983499182, 'loss': 0.009685367838120514, 'time_step': 0.002310678183314312, 'init_value': -1.7846919298171997, 'ave_value': -1.070795903336005, 'soft_opc': nan} step=4316




2022-04-20 19:36.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193642/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:36.54 [info     ] FQE_20220420193642: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00015615555177251976, 'time_algorithm_update': 0.0020850997373282193, 'loss': 0.009976504334102732, 'time_step': 0.00231257547815162, 'init_value': -1.8012096881866455, 'ave_value': -1.0590056036049416, 'soft_opc': nan} step=4482




2022-04-20 19:36.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193642/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:36.54 [info     ] FQE_20220420193642: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00015411463128515036, 'time_algorithm_update': 0.002106696726327919, 'loss': 0.011326070466774505, 'time_step': 0.002326550253902573, 'init_value': -1.8868639469146729, 'ave_value': -1.0934400354591745, 'soft_opc': nan} step=4648




2022-04-20 19:36.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193642/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:36.55 [info     ] FQE_20220420193642: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.0001567788870937853, 'time_algorithm_update': 0.002087338861212673, 'loss': 0.011775436156838908, 'time_step': 0.002310856279120388, 'init_value': -2.0776281356811523, 'ave_value': -1.2391999464641485, 'soft_opc': nan} step=4814




2022-04-20 19:36.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193642/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:36.55 [info     ] FQE_20220420193642: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00015535842941468022, 'time_algorithm_update': 0.002102511474885136, 'loss': 0.012150847083041781, 'time_step': 0.002326288855219462, 'init_value': -2.1092560291290283, 'ave_value': -1.2407161141183596, 'soft_opc': nan} step=4980




2022-04-20 19:36.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193642/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:36.56 [info     ] FQE_20220420193642: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00015280045658709054, 'time_algorithm_update': 0.002066915293773973, 'loss': 0.013139957668686128, 'time_step': 0.002287952296705131, 'init_value': -2.1444077491760254, 'ave_value': -1.2530350559047079, 'soft_opc': nan} step=5146




2022-04-20 19:36.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193642/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:36.56 [info     ] FQE_20220420193642: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00015429703586072806, 'time_algorithm_update': 0.002104899969445654, 'loss': 0.013777649957905468, 'time_step': 0.002330357769885695, 'init_value': -2.319209337234497, 'ave_value': -1.3560377256151892, 'soft_opc': nan} step=5312




2022-04-20 19:36.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193642/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:36.57 [info     ] FQE_20220420193642: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00015570313097482705, 'time_algorithm_update': 0.0021278987447899507, 'loss': 0.014514786721465278, 'time_step': 0.002354119197431817, 'init_value': -2.3504014015197754, 'ave_value': -1.320659349692633, 'soft_opc': nan} step=5478




2022-04-20 19:36.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193642/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:36.57 [info     ] FQE_20220420193642: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00015791496598576926, 'time_algorithm_update': 0.0020854674189923757, 'loss': 0.015406746245466217, 'time_step': 0.0023112426321190522, 'init_value': -2.598416328430176, 'ave_value': -1.4709522430822757, 'soft_opc': nan} step=5644




2022-04-20 19:36.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193642/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:36.57 [info     ] FQE_20220420193642: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.0001549562775945089, 'time_algorithm_update': 0.00211317855191518, 'loss': 0.016308886390347707, 'time_step': 0.0023356403212949454, 'init_value': -2.7691447734832764, 'ave_value': -1.6082162401596973, 'soft_opc': nan} step=5810




2022-04-20 19:36.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193642/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:36.58 [info     ] FQE_20220420193642: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00015382450747202677, 'time_algorithm_update': 0.002083437988557011, 'loss': 0.01806291418727936, 'time_step': 0.002306441226637507, 'init_value': -2.8049659729003906, 'ave_value': -1.6051810073020223, 'soft_opc': nan} step=5976




2022-04-20 19:36.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193642/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:36.58 [info     ] FQE_20220420193642: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00015570600348782827, 'time_algorithm_update': 0.0021228732832943103, 'loss': 0.018484346633805073, 'time_step': 0.002349775957773967, 'init_value': -2.823422908782959, 'ave_value': -1.6490683543252516, 'soft_opc': nan} step=6142




2022-04-20 19:36.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193642/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:36.59 [info     ] FQE_20220420193642: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00015152218830154603, 'time_algorithm_update': 0.0020476349865097597, 'loss': 0.019938241836267065, 'time_step': 0.002267003059387207, 'init_value': -2.908134937286377, 'ave_value': -1.654075267212885, 'soft_opc': nan} step=6308




2022-04-20 19:36.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193642/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:36.59 [info     ] FQE_20220420193642: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00015642125922513296, 'time_algorithm_update': 0.002127256738134177, 'loss': 0.021164080310410942, 'time_step': 0.002355199262320277, 'init_value': -3.0410780906677246, 'ave_value': -1.6802243872168097, 'soft_opc': nan} step=6474




2022-04-20 19:36.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193642/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:37.00 [info     ] FQE_20220420193642: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.0001543731574552605, 'time_algorithm_update': 0.00211606542748141, 'loss': 0.02181893867210496, 'time_step': 0.0023373035063226537, 'init_value': -3.1692280769348145, 'ave_value': -1.7752644952420178, 'soft_opc': nan} step=6640




2022-04-20 19:37.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193642/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:37.00 [info     ] FQE_20220420193642: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00015164857887359987, 'time_algorithm_update': 0.002075417932257595, 'loss': 0.02225535844750029, 'time_step': 0.002294294805411833, 'init_value': -3.191697120666504, 'ave_value': -1.7575336160624886, 'soft_opc': nan} step=6806




2022-04-20 19:37.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193642/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:37.00 [info     ] FQE_20220420193642: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.000154578542134848, 'time_algorithm_update': 0.0020963212093674994, 'loss': 0.02322023246137156, 'time_step': 0.0023196260613131234, 'init_value': -3.2829630374908447, 'ave_value': -1.8070312753621791, 'soft_opc': nan} step=6972




2022-04-20 19:37.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193642/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:37.01 [info     ] FQE_20220420193642: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.0001554647123957255, 'time_algorithm_update': 0.0020509886454386883, 'loss': 0.024774806944271886, 'time_step': 0.0022761965372476234, 'init_value': -3.319382905960083, 'ave_value': -1.8617620414173282, 'soft_opc': nan} step=7138




2022-04-20 19:37.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193642/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:37.01 [info     ] FQE_20220420193642: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00015420511544468892, 'time_algorithm_update': 0.0021058737513530686, 'loss': 0.024832077654000057, 'time_step': 0.0023290134338011226, 'init_value': -3.419494152069092, 'ave_value': -1.9422097973055668, 'soft_opc': nan} step=7304




2022-04-20 19:37.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193642/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:37.02 [info     ] FQE_20220420193642: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00015713364244943642, 'time_algorithm_update': 0.0020718114921845585, 'loss': 0.026148802858878332, 'time_step': 0.0022976441555712596, 'init_value': -3.496115207672119, 'ave_value': -1.957082944665406, 'soft_opc': nan} step=7470




2022-04-20 19:37.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193642/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:37.02 [info     ] FQE_20220420193642: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00015617853187652956, 'time_algorithm_update': 0.00212568978229201, 'loss': 0.027204338569861715, 'time_step': 0.0023511102400630354, 'init_value': -3.629373788833618, 'ave_value': -2.083231230946006, 'soft_opc': nan} step=7636




2022-04-20 19:37.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193642/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:37.03 [info     ] FQE_20220420193642: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00015083565769425357, 'time_algorithm_update': 0.0021207691675209136, 'loss': 0.028802126072380828, 'time_step': 0.0023397063634481774, 'init_value': -3.8707752227783203, 'ave_value': -2.2497785291022008, 'soft_opc': nan} step=7802




2022-04-20 19:37.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193642/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:37.03 [info     ] FQE_20220420193642: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00015192721263471856, 'time_algorithm_update': 0.0020608270024678795, 'loss': 0.030905534280464053, 'time_step': 0.002277815198323813, 'init_value': -3.881148338317871, 'ave_value': -2.2041419139719225, 'soft_opc': nan} step=7968




2022-04-20 19:37.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193642/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:37.03 [info     ] FQE_20220420193642: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00016062661825892437, 'time_algorithm_update': 0.002091447991060923, 'loss': 0.031425683757750296, 'time_step': 0.0023212978638798357, 'init_value': -4.022101402282715, 'ave_value': -2.3482570007443426, 'soft_opc': nan} step=8134




2022-04-20 19:37.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193642/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:37.04 [info     ] FQE_20220420193642: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00015509990324457008, 'time_algorithm_update': 0.0020950630486729635, 'loss': 0.03230449860825776, 'time_step': 0.0023186522794057087, 'init_value': -3.9675867557525635, 'ave_value': -2.357699254235706, 'soft_opc': nan} step=8300




2022-04-20 19:37.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193642/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

2022-04-20 19:37.04 [info     ] Directory is created at d3rlpy_logs/FQE_20220420193704
2022-04-20 19:37.04 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 19:37.04 [debug    ] Building models...
2022-04-20 19:37.04 [debug    ] Models have been built.
2022-04-20 19:37.04 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420193704/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 19:37.05 [info     ] FQE_20220420193704: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.0001577554747115734, 'time_algorithm_update': 0.0020894151787425198, 'loss': 0.024184481154174306, 'time_step': 0.002319414255230926, 'init_value': -1.0178735256195068, 'ave_value': -1.0391173486304175, 'soft_opc': nan} step=344




2022-04-20 19:37.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193704/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:37.06 [info     ] FQE_20220420193704: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00015240561130434969, 'time_algorithm_update': 0.0020641823147618493, 'loss': 0.02123495487788649, 'time_step': 0.0022843542487122294, 'init_value': -1.8069953918457031, 'ave_value': -1.8216839736243626, 'soft_opc': nan} step=688




2022-04-20 19:37.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193704/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:37.07 [info     ] FQE_20220420193704: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00015291779540305915, 'time_algorithm_update': 0.0020519446494967437, 'loss': 0.02559258148269078, 'time_step': 0.002271357663842135, 'init_value': -2.816629409790039, 'ave_value': -2.8427233101548377, 'soft_opc': nan} step=1032




2022-04-20 19:37.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193704/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:37.08 [info     ] FQE_20220420193704: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00015227808508762094, 'time_algorithm_update': 0.002058250959529433, 'loss': 0.02859820015349447, 'time_step': 0.0022785351719967154, 'init_value': -3.503744125366211, 'ave_value': -3.5838691595706855, 'soft_opc': nan} step=1376




2022-04-20 19:37.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193704/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:37.08 [info     ] FQE_20220420193704: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00015292680540750193, 'time_algorithm_update': 0.0020690775194833447, 'loss': 0.03357730272529257, 'time_step': 0.002291969781698183, 'init_value': -4.396990776062012, 'ave_value': -4.604641621767938, 'soft_opc': nan} step=1720




2022-04-20 19:37.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193704/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:37.09 [info     ] FQE_20220420193704: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.0001521256080893583, 'time_algorithm_update': 0.0020656024300774864, 'loss': 0.0402327929785865, 'time_step': 0.0022875881472299267, 'init_value': -4.934751510620117, 'ave_value': -5.296206795847094, 'soft_opc': nan} step=2064




2022-04-20 19:37.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193704/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:37.10 [info     ] FQE_20220420193704: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.0001529905685158663, 'time_algorithm_update': 0.002076077599858129, 'loss': 0.04965834950400127, 'time_step': 0.002297876186149065, 'init_value': -5.705668926239014, 'ave_value': -6.287053279095405, 'soft_opc': nan} step=2408




2022-04-20 19:37.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193704/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:37.11 [info     ] FQE_20220420193704: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00015806042870809866, 'time_algorithm_update': 0.002130282479663228, 'loss': 0.05972782213533254, 'time_step': 0.0023601477922395218, 'init_value': -6.007052898406982, 'ave_value': -6.824915342261125, 'soft_opc': nan} step=2752




2022-04-20 19:37.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193704/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:37.12 [info     ] FQE_20220420193704: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00015796339789102244, 'time_algorithm_update': 0.0020801757657250694, 'loss': 0.07070159202246645, 'time_step': 0.0023117100083550743, 'init_value': -6.455536365509033, 'ave_value': -7.5327550568559145, 'soft_opc': nan} step=3096




2022-04-20 19:37.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193704/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:37.13 [info     ] FQE_20220420193704: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00015563119289486906, 'time_algorithm_update': 0.002053628827250281, 'loss': 0.08423680239248761, 'time_step': 0.002279542213262514, 'init_value': -6.992153644561768, 'ave_value': -8.383388075742635, 'soft_opc': nan} step=3440




2022-04-20 19:37.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193704/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:37.14 [info     ] FQE_20220420193704: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00015566792598990507, 'time_algorithm_update': 0.0021010207575421, 'loss': 0.09804821196066346, 'time_step': 0.002326671467270962, 'init_value': -7.352685928344727, 'ave_value': -8.999541239037708, 'soft_opc': nan} step=3784




2022-04-20 19:37.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193704/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:37.15 [info     ] FQE_20220420193704: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00016020966130633686, 'time_algorithm_update': 0.002093387897624526, 'loss': 0.1145149291341388, 'time_step': 0.0023227049860843393, 'init_value': -7.983017444610596, 'ave_value': -9.94935726548906, 'soft_opc': nan} step=4128




2022-04-20 19:37.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193704/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:37.15 [info     ] FQE_20220420193704: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00015388255895570267, 'time_algorithm_update': 0.0020634559697883074, 'loss': 0.12907775062093035, 'time_step': 0.0022855602031530337, 'init_value': -8.528961181640625, 'ave_value': -10.758391327860656, 'soft_opc': nan} step=4472




2022-04-20 19:37.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193704/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:37.16 [info     ] FQE_20220420193704: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.0001560962477395701, 'time_algorithm_update': 0.002043184845946556, 'loss': 0.1474258988056072, 'time_step': 0.0022691398165946785, 'init_value': -9.119182586669922, 'ave_value': -11.643734003831675, 'soft_opc': nan} step=4816




2022-04-20 19:37.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193704/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:37.17 [info     ] FQE_20220420193704: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.0001531076985736226, 'time_algorithm_update': 0.002043409402980361, 'loss': 0.16094969853022417, 'time_step': 0.002264058174088944, 'init_value': -9.264627456665039, 'ave_value': -11.987537657409101, 'soft_opc': nan} step=5160




2022-04-20 19:37.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193704/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:37.18 [info     ] FQE_20220420193704: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00015597357306369516, 'time_algorithm_update': 0.0020469953847485917, 'loss': 0.17461730985696491, 'time_step': 0.0022738277912139893, 'init_value': -10.01553726196289, 'ave_value': -12.889014907637694, 'soft_opc': nan} step=5504




2022-04-20 19:37.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193704/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:37.19 [info     ] FQE_20220420193704: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00015543435895165733, 'time_algorithm_update': 0.0020705017932625704, 'loss': 0.19458216538674436, 'time_step': 0.002296733994816625, 'init_value': -10.388198852539062, 'ave_value': -13.455901966586307, 'soft_opc': nan} step=5848




2022-04-20 19:37.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193704/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:37.20 [info     ] FQE_20220420193704: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00015753368998682775, 'time_algorithm_update': 0.0020565958910210187, 'loss': 0.20566875700362372, 'time_step': 0.0022841019685878312, 'init_value': -10.83389663696289, 'ave_value': -13.978316880104778, 'soft_opc': nan} step=6192




2022-04-20 19:37.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193704/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:37.21 [info     ] FQE_20220420193704: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00015389572742373445, 'time_algorithm_update': 0.0020610128724297813, 'loss': 0.21935680486413456, 'time_step': 0.002286746058353158, 'init_value': -11.040203094482422, 'ave_value': -14.418064936216886, 'soft_opc': nan} step=6536




2022-04-20 19:37.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193704/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:37.21 [info     ] FQE_20220420193704: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00015491593715756438, 'time_algorithm_update': 0.002043853665507117, 'loss': 0.22834966324681286, 'time_step': 0.002268357332362685, 'init_value': -11.17485523223877, 'ave_value': -14.709458378440624, 'soft_opc': nan} step=6880




2022-04-20 19:37.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193704/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:37.22 [info     ] FQE_20220420193704: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.0001543940499771473, 'time_algorithm_update': 0.002092200656269872, 'loss': 0.2305760550485967, 'time_step': 0.002317959486052047, 'init_value': -11.338672637939453, 'ave_value': -15.062499316358888, 'soft_opc': nan} step=7224




2022-04-20 19:37.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193704/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:37.23 [info     ] FQE_20220420193704: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.0001585213250892107, 'time_algorithm_update': 0.0021154000315555307, 'loss': 0.23399934838092779, 'time_step': 0.002343228390050489, 'init_value': -11.575057029724121, 'ave_value': -15.418709257889438, 'soft_opc': nan} step=7568




2022-04-20 19:37.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193704/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:37.24 [info     ] FQE_20220420193704: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00015664724416511003, 'time_algorithm_update': 0.002067010763079621, 'loss': 0.23989102190230474, 'time_step': 0.0022949978362682253, 'init_value': -11.851709365844727, 'ave_value': -15.817887502089814, 'soft_opc': nan} step=7912




2022-04-20 19:37.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193704/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:37.25 [info     ] FQE_20220420193704: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00015785527783770893, 'time_algorithm_update': 0.0020744953044625216, 'loss': 0.24977462289876542, 'time_step': 0.0023026251515676807, 'init_value': -12.094566345214844, 'ave_value': -16.214964363046057, 'soft_opc': nan} step=8256




2022-04-20 19:37.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193704/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:37.26 [info     ] FQE_20220420193704: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00015391859897347383, 'time_algorithm_update': 0.0020569528258124062, 'loss': 0.25714933005876317, 'time_step': 0.0022810454978499303, 'init_value': -12.466540336608887, 'ave_value': -16.635405143142282, 'soft_opc': nan} step=8600




2022-04-20 19:37.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193704/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:37.27 [info     ] FQE_20220420193704: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00015620991241100223, 'time_algorithm_update': 0.002071510913760163, 'loss': 0.2629849909490711, 'time_step': 0.002297659252965173, 'init_value': -12.95054817199707, 'ave_value': -17.180400282319063, 'soft_opc': nan} step=8944




2022-04-20 19:37.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193704/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:37.27 [info     ] FQE_20220420193704: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00015648367793061013, 'time_algorithm_update': 0.0020927204642184945, 'loss': 0.2715620285679781, 'time_step': 0.00232253864754078, 'init_value': -13.26933479309082, 'ave_value': -17.550640904191916, 'soft_opc': nan} step=9288




2022-04-20 19:37.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193704/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:37.28 [info     ] FQE_20220420193704: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00016027342441470125, 'time_algorithm_update': 0.0020956431710442832, 'loss': 0.28299131933708005, 'time_step': 0.0023276071215784827, 'init_value': -13.742799758911133, 'ave_value': -18.208770424382642, 'soft_opc': nan} step=9632




2022-04-20 19:37.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193704/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:37.29 [info     ] FQE_20220420193704: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00015375018119812012, 'time_algorithm_update': 0.0020893105240755304, 'loss': 0.2928496519655942, 'time_step': 0.002315352822459021, 'init_value': -13.66717529296875, 'ave_value': -18.290863861304683, 'soft_opc': nan} step=9976




2022-04-20 19:37.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193704/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:37.30 [info     ] FQE_20220420193704: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00015593476073686466, 'time_algorithm_update': 0.002056129450021788, 'loss': 0.2931228018647363, 'time_step': 0.0022833264151284863, 'init_value': -13.88766098022461, 'ave_value': -18.492835150511414, 'soft_opc': nan} step=10320




2022-04-20 19:37.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193704/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:37.31 [info     ] FQE_20220420193704: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00015590149302815282, 'time_algorithm_update': 0.0020673968071161313, 'loss': 0.30050975880260733, 'time_step': 0.002295440019563187, 'init_value': -13.965180397033691, 'ave_value': -18.7649773155381, 'soft_opc': nan} step=10664




2022-04-20 19:37.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193704/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:37.32 [info     ] FQE_20220420193704: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00015817825184311977, 'time_algorithm_update': 0.00211407694705697, 'loss': 0.30893894894655016, 'time_step': 0.0023420834264089893, 'init_value': -14.387531280517578, 'ave_value': -19.161685680282545, 'soft_opc': nan} step=11008




2022-04-20 19:37.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193704/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:37.33 [info     ] FQE_20220420193704: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00015753161075503328, 'time_algorithm_update': 0.00208459967790648, 'loss': 0.32358844811638254, 'time_step': 0.0023134025030357892, 'init_value': -14.655529022216797, 'ave_value': -19.336320141441114, 'soft_opc': nan} step=11352




2022-04-20 19:37.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193704/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:37.34 [info     ] FQE_20220420193704: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00015647120253984317, 'time_algorithm_update': 0.002076067203699156, 'loss': 0.3349257721673957, 'time_step': 0.002303766649822856, 'init_value': -14.896957397460938, 'ave_value': -19.67589735839818, 'soft_opc': nan} step=11696




2022-04-20 19:37.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193704/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:37.34 [info     ] FQE_20220420193704: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.000157810227815495, 'time_algorithm_update': 0.002110262249791345, 'loss': 0.3586864586118175, 'time_step': 0.002341077771297721, 'init_value': -15.057473182678223, 'ave_value': -19.887659370013186, 'soft_opc': nan} step=12040




2022-04-20 19:37.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193704/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:37.35 [info     ] FQE_20220420193704: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00015630417091901913, 'time_algorithm_update': 0.002070010401481806, 'loss': 0.3794921904878128, 'time_step': 0.0022981672786002937, 'init_value': -15.653050422668457, 'ave_value': -20.357598138526754, 'soft_opc': nan} step=12384




2022-04-20 19:37.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193704/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:37.36 [info     ] FQE_20220420193704: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.0001577138900756836, 'time_algorithm_update': 0.002029614393101182, 'loss': 0.3931395060891762, 'time_step': 0.0022580491942028667, 'init_value': -16.229785919189453, 'ave_value': -20.783217613868885, 'soft_opc': nan} step=12728




2022-04-20 19:37.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193704/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:37.37 [info     ] FQE_20220420193704: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00015778943549755008, 'time_algorithm_update': 0.0020449618960535804, 'loss': 0.4100319349510205, 'time_step': 0.002274799485539281, 'init_value': -16.606882095336914, 'ave_value': -21.053296341364447, 'soft_opc': nan} step=13072




2022-04-20 19:37.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193704/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:37.38 [info     ] FQE_20220420193704: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00015696328739787257, 'time_algorithm_update': 0.002126270255377126, 'loss': 0.42829615169009844, 'time_step': 0.0023533119711765024, 'init_value': -16.541107177734375, 'ave_value': -21.020126165060308, 'soft_opc': nan} step=13416




2022-04-20 19:37.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193704/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:37.39 [info     ] FQE_20220420193704: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00015806666640348212, 'time_algorithm_update': 0.002082364503727403, 'loss': 0.4432385919539821, 'time_step': 0.0023130836874939676, 'init_value': -17.103750228881836, 'ave_value': -21.701369609972378, 'soft_opc': nan} step=13760




2022-04-20 19:37.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193704/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:37.40 [info     ] FQE_20220420193704: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00015573723371638807, 'time_algorithm_update': 0.002087392779283745, 'loss': 0.47210297107068433, 'time_step': 0.0023160424343375272, 'init_value': -17.468643188476562, 'ave_value': -21.98859943049448, 'soft_opc': nan} step=14104




2022-04-20 19:37.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193704/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:37.40 [info     ] FQE_20220420193704: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00015318047168642976, 'time_algorithm_update': 0.0020820061827814857, 'loss': 0.49935670205673505, 'time_step': 0.0023079348164935444, 'init_value': -17.65882110595703, 'ave_value': -22.19001743906253, 'soft_opc': nan} step=14448




2022-04-20 19:37.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193704/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:37.41 [info     ] FQE_20220420193704: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.0001536850319352261, 'time_algorithm_update': 0.002112456532411797, 'loss': 0.5064994529929272, 'time_step': 0.0023389243802358936, 'init_value': -17.34168243408203, 'ave_value': -21.95517122181686, 'soft_opc': nan} step=14792




2022-04-20 19:37.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193704/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:37.42 [info     ] FQE_20220420193704: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.0001542332560517067, 'time_algorithm_update': 0.0020456542802411454, 'loss': 0.5125197379979803, 'time_step': 0.0022697933884554132, 'init_value': -17.11724853515625, 'ave_value': -21.862313815011635, 'soft_opc': nan} step=15136




2022-04-20 19:37.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193704/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:37.43 [info     ] FQE_20220420193704: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00015528049579886503, 'time_algorithm_update': 0.0020754545233970466, 'loss': 0.5315819914171169, 'time_step': 0.0023001349249551464, 'init_value': -17.328651428222656, 'ave_value': -21.90196610871736, 'soft_opc': nan} step=15480




2022-04-20 19:37.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193704/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:37.44 [info     ] FQE_20220420193704: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00015803755715835925, 'time_algorithm_update': 0.0021312791247700535, 'loss': 0.5421961910513684, 'time_step': 0.002362227717111277, 'init_value': -17.325288772583008, 'ave_value': -21.68697821530136, 'soft_opc': nan} step=15824




2022-04-20 19:37.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193704/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:37.45 [info     ] FQE_20220420193704: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00015457702237506245, 'time_algorithm_update': 0.0020808438922083655, 'loss': 0.5559580153298326, 'time_step': 0.0023064890573191088, 'init_value': -17.738719940185547, 'ave_value': -21.889388261722015, 'soft_opc': nan} step=16168




2022-04-20 19:37.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193704/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:37.46 [info     ] FQE_20220420193704: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00015716358672740848, 'time_algorithm_update': 0.0021010152129239813, 'loss': 0.5853345610369214, 'time_step': 0.0023311979548875676, 'init_value': -17.99420928955078, 'ave_value': -22.016677160628205, 'soft_opc': nan} step=16512




2022-04-20 19:37.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193704/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:37.47 [info     ] FQE_20220420193704: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.0001566209072290465, 'time_algorithm_update': 0.002083040947137877, 'loss': 0.616022891473285, 'time_step': 0.002311571392902108, 'init_value': -18.82501983642578, 'ave_value': -22.586517094115955, 'soft_opc': nan} step=16856




2022-04-20 19:37.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193704/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:37.47 [info     ] FQE_20220420193704: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00015683714733567348, 'time_algorithm_update': 0.002140783293302669, 'loss': 0.6452235697963556, 'time_step': 0.0023726931838102118, 'init_value': -19.297853469848633, 'ave_value': -22.942055908355627, 'soft_opc': nan} step=17200




2022-04-20 19:37.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420193704/model_17200.pt
search iteration:  29
using hyper params:  [0.003875639790014305, 0.008732700503256903, 3.065456889628557e-05, 5]
2022-04-20 19:37.47 [debug    ] RoundIterator is selected.
2022-04-20 19:37.47 [info     ] Directory is created at d3rlpy_logs/CQL_20220420193747
2022-04-20 19:37.47 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 19:37.47 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 19:37.47 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220420193747/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.003875639790014305, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'we

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:37.54 [info     ] CQL_20220420193747: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.00035008491828427676, 'time_algorithm_update': 0.017847175486603674, 'temp_loss': 4.634636440472296, 'temp': 0.9947732765423624, 'alpha_loss': -14.20201707722848, 'alpha': 1.0157767419229473, 'critic_loss': 32.45199165846172, 'actor_loss': 3.9248940756446435, 'time_step': 0.018295796293961376, 'td_error': 9.86899093122446, 'init_value': -10.975980758666992, 'ave_value': -6.802122986885879} step=342
2022-04-20 19:37.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193747/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:38.01 [info     ] CQL_20220420193747: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.0003554130855359529, 'time_algorithm_update': 0.01799901187071326, 'temp_loss': 3.8262760918042815, 'temp': 0.9849621799954197, 'alpha_loss': -4.53217339646398, 'alpha': 1.0360282722272371, 'critic_loss': 26.791663945069786, 'actor_loss': 10.370386847278528, 'time_step': 0.018453920793812178, 'td_error': 7.06166560760076, 'init_value': -19.568872451782227, 'ave_value': -11.371908423861003} step=684
2022-04-20 19:38.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193747/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:38.07 [info     ] CQL_20220420193747: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.0003530093800951863, 'time_algorithm_update': 0.017930321526109128, 'temp_loss': 2.8523138036504823, 'temp': 0.9767869517468569, 'alpha_loss': 0.3500389286360502, 'alpha': 1.042490823924193, 'critic_loss': 50.81196897071705, 'actor_loss': 16.749964042016636, 'time_step': 0.018381611645570277, 'td_error': 8.795840798195036, 'init_value': -28.2684268951416, 'ave_value': -16.009991529506742} step=1026
2022-04-20 19:38.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193747/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:38.14 [info     ] CQL_20220420193747: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.0003560432913707711, 'time_algorithm_update': 0.01793054669921161, 'temp_loss': 2.2802979005010506, 'temp': 0.9696280788957027, 'alpha_loss': 3.7542317997642427, 'alpha': 1.0321474559822974, 'critic_loss': 83.23260104307654, 'actor_loss': 23.041535210191157, 'time_step': 0.018384862364384167, 'td_error': 13.726807055510656, 'init_value': -37.76929473876953, 'ave_value': -20.887969348931367} step=1368
2022-04-20 19:38.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193747/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:38.20 [info     ] CQL_20220420193747: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.0003569669890822026, 'time_algorithm_update': 0.01812633784890872, 'temp_loss': 1.8479448433159387, 'temp': 0.9630412306004797, 'alpha_loss': 6.1238915460151535, 'alpha': 1.0052339413709808, 'critic_loss': 119.63172377201549, 'actor_loss': 29.209323464778432, 'time_step': 0.018582939404493185, 'td_error': 17.952457311218076, 'init_value': -47.9625358581543, 'ave_value': -28.712443064380068} step=1710
2022-04-20 19:38.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193747/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:38.27 [info     ] CQL_20220420193747: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.00035550789526331495, 'time_algorithm_update': 0.018070496313753185, 'temp_loss': 1.5573750793585304, 'temp': 0.9568864865261212, 'alpha_loss': 7.833755129262021, 'alpha': 0.9684653869497846, 'critic_loss': 158.56646168580528, 'actor_loss': 35.02627574072944, 'time_step': 0.018526937529357555, 'td_error': 25.010176777207363, 'init_value': -55.25347900390625, 'ave_value': -31.486686474995032} step=2052
2022-04-20 19:38.27 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193747/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:38.33 [info     ] CQL_20220420193747: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.00035111248841759754, 'time_algorithm_update': 0.01801493363073695, 'temp_loss': 1.2864204136251707, 'temp': 0.9511186707089518, 'alpha_loss': 9.099706797571907, 'alpha': 0.929051278975972, 'critic_loss': 198.56666310628256, 'actor_loss': 40.73090150621202, 'time_step': 0.018463985264649867, 'td_error': 37.491983310655684, 'init_value': -64.94828796386719, 'ave_value': -36.33205165252761} step=2394
2022-04-20 19:38.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193747/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:38.40 [info     ] CQL_20220420193747: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.0003514798761111254, 'time_algorithm_update': 0.017914859872115284, 'temp_loss': 1.0446354865766407, 'temp': 0.9457312540004128, 'alpha_loss': 10.014919679764418, 'alpha': 0.8904783199405113, 'critic_loss': 244.0030066953068, 'actor_loss': 46.340515147872836, 'time_step': 0.018365783998143603, 'td_error': 37.12562516676466, 'init_value': -75.17979431152344, 'ave_value': -41.68248982884325} step=2736
2022-04-20 19:38.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193747/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:38.46 [info     ] CQL_20220420193747: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.000361273163243344, 'time_algorithm_update': 0.017857585734094097, 'temp_loss': 0.8387073411971157, 'temp': 0.9407008330375828, 'alpha_loss': 10.907931400321381, 'alpha': 0.854401664601432, 'critic_loss': 293.7875445628027, 'actor_loss': 51.96026044700578, 'time_step': 0.018318506012185973, 'td_error': 47.20653425618268, 'init_value': -82.79581451416016, 'ave_value': -46.07671884898734} step=3078
2022-04-20 19:38.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193747/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:38.53 [info     ] CQL_20220420193747: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.00035822809788218715, 'time_algorithm_update': 0.01779927775176645, 'temp_loss': 0.6691347098860302, 'temp': 0.9362699166375991, 'alpha_loss': 11.328445376011363, 'alpha': 0.8200644167543155, 'critic_loss': 343.70990284702236, 'actor_loss': 57.135318265323754, 'time_step': 0.018258163803502134, 'td_error': 49.43617726930455, 'init_value': -90.63560485839844, 'ave_value': -50.39837618015035} step=3420
2022-04-20 19:38.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193747/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:38.59 [info     ] CQL_20220420193747: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.00034987438491910523, 'time_algorithm_update': 0.01783338136840285, 'temp_loss': 0.5376560429209157, 'temp': 0.9321092957990211, 'alpha_loss': 11.816364882285136, 'alpha': 0.7887726319812195, 'critic_loss': 396.3878756517555, 'actor_loss': 62.28853300440381, 'time_step': 0.018284752354984394, 'td_error': 64.95523768388045, 'init_value': -97.45562744140625, 'ave_value': -54.3458789874761} step=3762
2022-04-20 19:38.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193747/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:39.06 [info     ] CQL_20220420193747: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.0003528316118563825, 'time_algorithm_update': 0.017796515024196335, 'temp_loss': 0.3947632310432736, 'temp': 0.9284914914976087, 'alpha_loss': 11.805381178158767, 'alpha': 0.7592912743314665, 'critic_loss': 445.4964455052426, 'actor_loss': 66.76159916704859, 'time_step': 0.018247101739136098, 'td_error': 87.72631124106891, 'init_value': -107.41178894042969, 'ave_value': -59.3072767509614} step=4104
2022-04-20 19:39.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193747/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:39.12 [info     ] CQL_20220420193747: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.00035376646365338596, 'time_algorithm_update': 0.01789872200168364, 'temp_loss': 0.27595561458484125, 'temp': 0.9254184523869676, 'alpha_loss': 12.674641591066505, 'alpha': 0.7314403848341334, 'critic_loss': 497.81699723807, 'actor_loss': 71.44377203154983, 'time_step': 0.018352422100758693, 'td_error': 78.99321683406161, 'init_value': -113.60252380371094, 'ave_value': -63.15758711916772} step=4446
2022-04-20 19:39.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193747/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:39.19 [info     ] CQL_20220420193747: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.0003533286657946849, 'time_algorithm_update': 0.017725992621037, 'temp_loss': 0.2567565782834389, 'temp': 0.9228633994247482, 'alpha_loss': 11.940829922581276, 'alpha': 0.7045481623264781, 'critic_loss': 554.2430561801843, 'actor_loss': 75.77964300858348, 'time_step': 0.01817752464472899, 'td_error': 79.46374130129388, 'init_value': -120.9295425415039, 'ave_value': -66.42802642474572} step=4788
2022-04-20 19:39.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193747/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:39.25 [info     ] CQL_20220420193747: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.0003471472109967505, 'time_algorithm_update': 0.017670970911171004, 'temp_loss': 0.1477261577073855, 'temp': 0.9202164481257835, 'alpha_loss': 12.748153693494741, 'alpha': 0.6805695805981843, 'critic_loss': 603.5077159502353, 'actor_loss': 79.67567176149602, 'time_step': 0.018116144408956605, 'td_error': 84.87668841489914, 'init_value': -127.6392822265625, 'ave_value': -69.90907071992889} step=5130
2022-04-20 19:39.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193747/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:39.32 [info     ] CQL_20220420193747: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.00035078483715391995, 'time_algorithm_update': 0.017677361505073413, 'temp_loss': 0.04295301902494584, 'temp': 0.9191004693159583, 'alpha_loss': 13.796479601609079, 'alpha': 0.654839218185659, 'critic_loss': 660.8487481011284, 'actor_loss': 84.09659038789091, 'time_step': 0.018125893079746536, 'td_error': 176.7455103086777, 'init_value': -138.02035522460938, 'ave_value': -75.03330595496203} step=5472
2022-04-20 19:39.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193747/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:39.38 [info     ] CQL_20220420193747: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.0003493836051539371, 'time_algorithm_update': 0.01767102389307747, 'temp_loss': -0.043926784921197864, 'temp': 0.9190623230404324, 'alpha_loss': 14.32626868130868, 'alpha': 0.6310275714648398, 'critic_loss': 714.6332299304985, 'actor_loss': 87.98371592739173, 'time_step': 0.01811743828288296, 'td_error': 125.93304405600934, 'init_value': -139.74351501464844, 'ave_value': -75.17849463590093} step=5814
2022-04-20 19:39.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193747/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:39.45 [info     ] CQL_20220420193747: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.0003499782573409945, 'time_algorithm_update': 0.017692547095449346, 'temp_loss': -0.06550207928541982, 'temp': 0.920434967641942, 'alpha_loss': 12.1296803895493, 'alpha': 0.6088949889816039, 'critic_loss': 769.7454573424936, 'actor_loss': 91.04363520661293, 'time_step': 0.018139929102178206, 'td_error': 84.79236528289204, 'init_value': -152.8317108154297, 'ave_value': -81.25894120623467} step=6156
2022-04-20 19:39.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193747/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:39.51 [info     ] CQL_20220420193747: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.00035020273331313106, 'time_algorithm_update': 0.017607297116552876, 'temp_loss': -0.0408002018536392, 'temp': 0.9212615254678225, 'alpha_loss': 9.762328191110266, 'alpha': 0.5925572549390514, 'critic_loss': 796.247404399671, 'actor_loss': 92.6838276065581, 'time_step': 0.018055533107958342, 'td_error': 93.0923047759669, 'init_value': -154.87069702148438, 'ave_value': -82.3665999073797} step=6498
2022-04-20 19:39.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193747/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:39.57 [info     ] CQL_20220420193747: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.00034769515545047516, 'time_algorithm_update': 0.017637752650076884, 'temp_loss': -0.08029156427133327, 'temp': 0.9223553128758369, 'alpha_loss': 9.503038590414482, 'alpha': 0.5769000123118797, 'critic_loss': 812.4286234894691, 'actor_loss': 94.2155334963436, 'time_step': 0.01808445007480376, 'td_error': 88.85914109086512, 'init_value': -156.95436096191406, 'ave_value': -83.4128667572087} step=6840
2022-04-20 19:39.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193747/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:40.04 [info     ] CQL_20220420193747: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.00035121287518774557, 'time_algorithm_update': 0.017428028653239645, 'temp_loss': -0.15427492467457787, 'temp': 0.9250235703953525, 'alpha_loss': 9.457598517512718, 'alpha': 0.5609536111703393, 'critic_loss': 821.9625658180281, 'actor_loss': 95.47323117618673, 'time_step': 0.017880108621385362, 'td_error': 83.99084960112714, 'init_value': -156.59266662597656, 'ave_value': -83.15347937516499} step=7182
2022-04-20 19:40.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193747/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:40.10 [info     ] CQL_20220420193747: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.0003505756980494449, 'time_algorithm_update': 0.01745337700983237, 'temp_loss': -0.14656834453562198, 'temp': 0.9284973189844723, 'alpha_loss': 9.457060501589412, 'alpha': 0.5446304860170822, 'critic_loss': 837.0165965654696, 'actor_loss': 97.21783781888192, 'time_step': 0.017901972023367185, 'td_error': 97.93827693068091, 'init_value': -161.32241821289062, 'ave_value': -85.3246340177329} step=7524
2022-04-20 19:40.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193747/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:40.17 [info     ] CQL_20220420193747: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.00034894162451314646, 'time_algorithm_update': 0.017648572113081726, 'temp_loss': -0.1520207455185683, 'temp': 0.9325973492616798, 'alpha_loss': 9.1263326455278, 'alpha': 0.5284581144302212, 'critic_loss': 854.4309581734284, 'actor_loss': 98.6130166304739, 'time_step': 0.018095754043400636, 'td_error': 95.58428181998183, 'init_value': -162.15481567382812, 'ave_value': -87.01379114794503} step=7866
2022-04-20 19:40.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193747/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:40.23 [info     ] CQL_20220420193747: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.00035005424454895377, 'time_algorithm_update': 0.01773827466351247, 'temp_loss': -0.1666341519299132, 'temp': 0.9363010109168047, 'alpha_loss': 9.213712946016189, 'alpha': 0.5125320625235463, 'critic_loss': 870.5046920330204, 'actor_loss': 100.04276340328462, 'time_step': 0.018186602676123903, 'td_error': 117.90215159092224, 'init_value': -165.0388641357422, 'ave_value': -88.90102089712548} step=8208
2022-04-20 19:40.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193747/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:40.29 [info     ] CQL_20220420193747: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.00034798446454499895, 'time_algorithm_update': 0.017636823375322665, 'temp_loss': -0.19453952578577208, 'temp': 0.9416205171953168, 'alpha_loss': 8.969474439732512, 'alpha': 0.4967604874693162, 'critic_loss': 883.0175893683182, 'actor_loss': 101.32722221062197, 'time_step': 0.01808062213206152, 'td_error': 91.40531059453126, 'init_value': -162.396240234375, 'ave_value': -87.61041534174872} step=8550
2022-04-20 19:40.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193747/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:40.36 [info     ] CQL_20220420193747: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.00035193231370713975, 'time_algorithm_update': 0.0177005132039388, 'temp_loss': -0.18370083214072455, 'temp': 0.9464940665758144, 'alpha_loss': 8.837641884708962, 'alpha': 0.48136695104035715, 'critic_loss': 889.6265494363349, 'actor_loss': 102.04144911738167, 'time_step': 0.018149550895244754, 'td_error': 113.38513090319802, 'init_value': -166.17457580566406, 'ave_value': -92.12752511275647} step=8892
2022-04-20 19:40.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193747/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:40.42 [info     ] CQL_20220420193747: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.0003578342192354258, 'time_algorithm_update': 0.017858158775240357, 'temp_loss': -0.15564518847246792, 'temp': 0.9510871754055135, 'alpha_loss': 9.117415652637593, 'alpha': 0.4660194038647657, 'critic_loss': 901.1600677311769, 'actor_loss': 103.27825802808617, 'time_step': 0.018315216951202928, 'td_error': 110.10424847042457, 'init_value': -167.7885284423828, 'ave_value': -91.24255126644294} step=9234
2022-04-20 19:40.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193747/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:40.49 [info     ] CQL_20220420193747: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.00035360054663050245, 'time_algorithm_update': 0.01801211304134793, 'temp_loss': -0.21568387752257243, 'temp': 0.9562096320397673, 'alpha_loss': 8.782777628703425, 'alpha': 0.4512083131318901, 'critic_loss': 908.7545099983438, 'actor_loss': 104.21196728979635, 'time_step': 0.018464841340717516, 'td_error': 114.20002043415872, 'init_value': -165.48704528808594, 'ave_value': -89.99071593983686} step=9576
2022-04-20 19:40.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193747/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:40.55 [info     ] CQL_20220420193747: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.00034621445059079175, 'time_algorithm_update': 0.017940744321945815, 'temp_loss': -0.1466458515530964, 'temp': 0.961136280618913, 'alpha_loss': 8.799463810279356, 'alpha': 0.43669209555227156, 'critic_loss': 920.2155169213725, 'actor_loss': 105.22107167829547, 'time_step': 0.018384929288897598, 'td_error': 104.22898897471116, 'init_value': -169.49009704589844, 'ave_value': -92.90176160671622} step=9918
2022-04-20 19:40.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193747/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:41.02 [info     ] CQL_20220420193747: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.00034924696760568004, 'time_algorithm_update': 0.017968189646626078, 'temp_loss': -0.16787980184264, 'temp': 0.9659451610512204, 'alpha_loss': 8.559708384742514, 'alpha': 0.42262170346159683, 'critic_loss': 929.8355587964867, 'actor_loss': 106.07024829708345, 'time_step': 0.018413961979380826, 'td_error': 116.484167875617, 'init_value': -169.9708251953125, 'ave_value': -92.9408926123186} step=10260
2022-04-20 19:41.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193747/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:41.08 [info     ] CQL_20220420193747: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.00035230179279171234, 'time_algorithm_update': 0.017973481563099643, 'temp_loss': -0.18176658157455294, 'temp': 0.9704992523667408, 'alpha_loss': 8.462988306904396, 'alpha': 0.40929636498640853, 'critic_loss': 938.3611758940401, 'actor_loss': 106.90037688316657, 'time_step': 0.018425527371858295, 'td_error': 122.09968311969212, 'init_value': -172.17919921875, 'ave_value': -95.47079499737436} step=10602
2022-04-20 19:41.09 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193747/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:41.15 [info     ] CQL_20220420193747: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.00034930552655493307, 'time_algorithm_update': 0.01785349218468917, 'temp_loss': -0.13360486216625275, 'temp': 0.9751461488113069, 'alpha_loss': 8.703635886398672, 'alpha': 0.39597600907610175, 'critic_loss': 951.8138779311153, 'actor_loss': 108.0610924436335, 'time_step': 0.01830117326033743, 'td_error': 144.88946418479205, 'init_value': -173.3787078857422, 'ave_value': -96.60091036875505} step=10944
2022-04-20 19:41.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193747/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:41.21 [info     ] CQL_20220420193747: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.00034836091493305405, 'time_algorithm_update': 0.017617673204656233, 'temp_loss': -0.10814870111862121, 'temp': 0.9786883582148636, 'alpha_loss': 8.277078486325449, 'alpha': 0.3831980712929664, 'critic_loss': 968.9858833893, 'actor_loss': 108.9996635481628, 'time_step': 0.018062189308523435, 'td_error': 165.3397180619061, 'init_value': -171.20425415039062, 'ave_value': -95.65971915656247} step=11286
2022-04-20 19:41.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193747/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:41.28 [info     ] CQL_20220420193747: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.00034686557033605743, 'time_algorithm_update': 0.017935199347155834, 'temp_loss': -0.012517931626031273, 'temp': 0.9798196864406965, 'alpha_loss': 8.1321366488585, 'alpha': 0.3710073543745175, 'critic_loss': 977.2360930861088, 'actor_loss': 109.76441643252011, 'time_step': 0.018379334120722544, 'td_error': 139.39812437482823, 'init_value': -176.78248596191406, 'ave_value': -97.61406482725799} step=11628
2022-04-20 19:41.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193747/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:41.34 [info     ] CQL_20220420193747: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.00035523950007923863, 'time_algorithm_update': 0.01789777181301898, 'temp_loss': -0.07609703107764236, 'temp': 0.9816224867837471, 'alpha_loss': 8.229362271682561, 'alpha': 0.3591223195282339, 'critic_loss': 984.6453116790593, 'actor_loss': 110.31442294204444, 'time_step': 0.018350336983887074, 'td_error': 192.3365705643714, 'init_value': -179.33041381835938, 'ave_value': -99.73378513664947} step=11970
2022-04-20 19:41.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193747/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:41.41 [info     ] CQL_20220420193747: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.0003496847654643812, 'time_algorithm_update': 0.017994157054968048, 'temp_loss': -0.06403611611291679, 'temp': 0.9838218343885321, 'alpha_loss': 8.057947334490324, 'alpha': 0.34752247556608323, 'critic_loss': 991.0092543217174, 'actor_loss': 110.92815084624709, 'time_step': 0.01843907540304619, 'td_error': 158.26792381178214, 'init_value': -176.47164916992188, 'ave_value': -98.32473368529868} step=12312
2022-04-20 19:41.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193747/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:41.48 [info     ] CQL_20220420193747: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.00034674496678581017, 'time_algorithm_update': 0.017964281533893785, 'temp_loss': -0.027587638334616235, 'temp': 0.9849819173241219, 'alpha_loss': 7.866565350203486, 'alpha': 0.33639765909889285, 'critic_loss': 1000.5332170452988, 'actor_loss': 111.82482658073916, 'time_step': 0.01840965371382864, 'td_error': 177.93156417343826, 'init_value': -177.31619262695312, 'ave_value': -99.57147300224717} step=12654
2022-04-20 19:41.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193747/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:41.54 [info     ] CQL_20220420193747: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.0003535022512513992, 'time_algorithm_update': 0.01774557361825865, 'temp_loss': -0.040787149307846327, 'temp': 0.9862240270564431, 'alpha_loss': 7.913577471560205, 'alpha': 0.32562879337901957, 'critic_loss': 1010.906789500811, 'actor_loss': 112.47555450528685, 'time_step': 0.018196792630424275, 'td_error': 201.99555020789683, 'init_value': -179.48336791992188, 'ave_value': -99.9244045154502} step=12996
2022-04-20 19:41.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193747/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:42.01 [info     ] CQL_20220420193747: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.0003549738934165553, 'time_algorithm_update': 0.01791152340626856, 'temp_loss': -0.019395017342870694, 'temp': 0.9869865259231879, 'alpha_loss': 7.7851686895939345, 'alpha': 0.3151704787510877, 'critic_loss': 1016.6938676443714, 'actor_loss': 112.89289791263334, 'time_step': 0.018362603689494886, 'td_error': 149.03782178516, 'init_value': -180.78952026367188, 'ave_value': -102.12886878882173} step=13338
2022-04-20 19:42.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193747/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:42.07 [info     ] CQL_20220420193747: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.0003489032823439927, 'time_algorithm_update': 0.017833151315387926, 'temp_loss': -0.02480157434243208, 'temp': 0.9876824310648511, 'alpha_loss': 7.670160142998946, 'alpha': 0.30508986272309957, 'critic_loss': 1024.4367798922356, 'actor_loss': 113.4199820178294, 'time_step': 0.018282449036313778, 'td_error': 220.05096877667467, 'init_value': -180.8597869873047, 'ave_value': -101.0782606544547} step=13680
2022-04-20 19:42.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193747/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:42.14 [info     ] CQL_20220420193747: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.0003428333684017784, 'time_algorithm_update': 0.017907267425492493, 'temp_loss': -0.009066352751432803, 'temp': 0.9880364545953204, 'alpha_loss': 7.461883140586273, 'alpha': 0.29532169964578414, 'critic_loss': 1031.744467930487, 'actor_loss': 113.95733078181395, 'time_step': 0.018348979671099032, 'td_error': 212.57501060770022, 'init_value': -182.8389434814453, 'ave_value': -102.91068271981703} step=14022
2022-04-20 19:42.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193747/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:42.20 [info     ] CQL_20220420193747: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.0003478722265589307, 'time_algorithm_update': 0.01791432099035609, 'temp_loss': 0.02862473164429093, 'temp': 0.9878524141004908, 'alpha_loss': 7.4084872223480405, 'alpha': 0.2860131821437189, 'critic_loss': 1036.4975626984535, 'actor_loss': 114.2090718899554, 'time_step': 0.018361997186091907, 'td_error': 171.31835652782195, 'init_value': -182.25161743164062, 'ave_value': -103.12277886644945} step=14364
2022-04-20 19:42.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193747/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:42.26 [info     ] CQL_20220420193747: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.00035020412757382754, 'time_algorithm_update': 0.017706974905136733, 'temp_loss': 0.049308322054165146, 'temp': 0.9865137591348057, 'alpha_loss': 7.07141949977094, 'alpha': 0.2769156554114749, 'critic_loss': 1043.759726541084, 'actor_loss': 114.7021363687794, 'time_step': 0.018153995798345198, 'td_error': 206.79661235562799, 'init_value': -179.26841735839844, 'ave_value': -101.75839306696511} step=14706
2022-04-20 19:42.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193747/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:42.33 [info     ] CQL_20220420193747: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.0003493724510683651, 'time_algorithm_update': 0.01782390597270943, 'temp_loss': 0.04497718996210405, 'temp': 0.985587766818833, 'alpha_loss': 6.6799470862449954, 'alpha': 0.2685121803255806, 'critic_loss': 1049.8950564735815, 'actor_loss': 114.93821328146416, 'time_step': 0.018269866530658208, 'td_error': 230.30682189869253, 'init_value': -180.5416717529297, 'ave_value': -101.80255885139375} step=15048
2022-04-20 19:42.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193747/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:42.39 [info     ] CQL_20220420193747: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.0003514394425509269, 'time_algorithm_update': 0.017718594673781368, 'temp_loss': 0.07258985910499305, 'temp': 0.9831810476835708, 'alpha_loss': 6.507287978428846, 'alpha': 0.2604881805634638, 'critic_loss': 1055.732991357993, 'actor_loss': 115.06362678572448, 'time_step': 0.018165578619081375, 'td_error': 210.5289917987904, 'init_value': -184.1454620361328, 'ave_value': -102.1809174105223} step=15390
2022-04-20 19:42.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193747/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:42.46 [info     ] CQL_20220420193747: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.000353894735637464, 'time_algorithm_update': 0.017722738416571366, 'temp_loss': 0.09721035530997647, 'temp': 0.9806450237656197, 'alpha_loss': 6.173411954216093, 'alpha': 0.2526350271893524, 'critic_loss': 1060.0073368897913, 'actor_loss': 115.34768433598747, 'time_step': 0.018173323737250432, 'td_error': 222.92365462704737, 'init_value': -181.83358764648438, 'ave_value': -102.94452719129421} step=15732
2022-04-20 19:42.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193747/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:42.52 [info     ] CQL_20220420193747: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.000351852143717091, 'time_algorithm_update': 0.017755280461227686, 'temp_loss': 0.16474209523253275, 'temp': 0.976630412521418, 'alpha_loss': 5.853604880689877, 'alpha': 0.2452773650487264, 'critic_loss': 1067.294340791758, 'actor_loss': 115.50809010288171, 'time_step': 0.018205701956274915, 'td_error': 186.62306610070928, 'init_value': -186.40774536132812, 'ave_value': -105.41060178615052} step=16074
2022-04-20 19:42.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193747/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:42.59 [info     ] CQL_20220420193747: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.0003574033926802072, 'time_algorithm_update': 0.01771548268390678, 'temp_loss': 0.18302621399406452, 'temp': 0.9710206272657852, 'alpha_loss': 5.385284017401132, 'alpha': 0.23820992707334765, 'critic_loss': 1073.432620578342, 'actor_loss': 115.80028725785819, 'time_step': 0.018169860393680327, 'td_error': 181.45465736121406, 'init_value': -185.24337768554688, 'ave_value': -104.79864536171293} step=16416
2022-04-20 19:42.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193747/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:43.05 [info     ] CQL_20220420193747: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.0003490531653688665, 'time_algorithm_update': 0.01771261678104512, 'temp_loss': 0.13078270408145168, 'temp': 0.96666425030831, 'alpha_loss': 5.064287157783731, 'alpha': 0.23148331181173437, 'critic_loss': 1075.9733210335, 'actor_loss': 115.90324620475546, 'time_step': 0.01815972760406851, 'td_error': 123.27651084609344, 'init_value': -187.4219970703125, 'ave_value': -105.73327777722868} step=16758
2022-04-20 19:43.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193747/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:43.12 [info     ] CQL_20220420193747: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.0003490838391041895, 'time_algorithm_update': 0.017522135673210634, 'temp_loss': 0.20847982135519647, 'temp': 0.9618889342623147, 'alpha_loss': 4.631279191775628, 'alpha': 0.22526731568644617, 'critic_loss': 1082.5604649593956, 'actor_loss': 116.0224522819296, 'time_step': 0.01796772605494449, 'td_error': 180.00830569966922, 'init_value': -185.05227661132812, 'ave_value': -105.82126556111751} step=17100
2022-04-20 19:43.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420193747/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 19:43.12 [info     ] FQE_20220420194312: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00013597040291292122, 'time_algorithm_update': 0.0019048466739884343, 'loss': 0.0040972131691558895, 'time_step': 0.0021015477467732258, 'init_value': -0.33635181188583374, 'ave_value': -0.3254792407692016, 'soft_opc': nan} step=166




2022-04-20 19:43.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194312/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:43.13 [info     ] FQE_20220420194312: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00013093201510877493, 'time_algorithm_update': 0.0018700131450790957, 'loss': 0.0031898590449961914, 'time_step': 0.002058067953730204, 'init_value': -0.3954598009586334, 'ave_value': -0.3598394253836559, 'soft_opc': nan} step=332




2022-04-20 19:43.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194312/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:43.13 [info     ] FQE_20220420194312: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00013237832540489105, 'time_algorithm_update': 0.0018399781491383012, 'loss': 0.0029710618931945995, 'time_step': 0.002032445137759289, 'init_value': -0.4582177400588989, 'ave_value': -0.40870573236397256, 'soft_opc': nan} step=498




2022-04-20 19:43.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194312/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:43.13 [info     ] FQE_20220420194312: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00013032447860901616, 'time_algorithm_update': 0.001886699573103204, 'loss': 0.0029859408436073774, 'time_step': 0.002075568739190159, 'init_value': -0.5344929099082947, 'ave_value': -0.4634458238913401, 'soft_opc': nan} step=664




2022-04-20 19:43.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194312/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:43.14 [info     ] FQE_20220420194312: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.0001341090144881283, 'time_algorithm_update': 0.0018803541918835007, 'loss': 0.0029885968956423095, 'time_step': 0.0020778653133346372, 'init_value': -0.6254530549049377, 'ave_value': -0.5166799857943981, 'soft_opc': nan} step=830




2022-04-20 19:43.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194312/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:43.14 [info     ] FQE_20220420194312: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00011793676629123918, 'time_algorithm_update': 0.0016880624265555877, 'loss': 0.0029852318104794137, 'time_step': 0.0018600426524518483, 'init_value': -0.6631813645362854, 'ave_value': -0.5373849933480357, 'soft_opc': nan} step=996




2022-04-20 19:43.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194312/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:43.14 [info     ] FQE_20220420194312: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00013099377413830124, 'time_algorithm_update': 0.0018472944397524178, 'loss': 0.0030769055200101114, 'time_step': 0.0020390964416136226, 'init_value': -0.7600734233856201, 'ave_value': -0.6016265684151435, 'soft_opc': nan} step=1162




2022-04-20 19:43.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194312/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:43.15 [info     ] FQE_20220420194312: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.0001353499401046569, 'time_algorithm_update': 0.0019368550863610693, 'loss': 0.0031593693651718996, 'time_step': 0.00213562005973724, 'init_value': -0.8401229381561279, 'ave_value': -0.6546914027617858, 'soft_opc': nan} step=1328




2022-04-20 19:43.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194312/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:43.15 [info     ] FQE_20220420194312: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00013001137469188277, 'time_algorithm_update': 0.0018519177494278874, 'loss': 0.003170498484796115, 'time_step': 0.002042238970836961, 'init_value': -0.875480592250824, 'ave_value': -0.6862919954230656, 'soft_opc': nan} step=1494




2022-04-20 19:43.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194312/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:43.16 [info     ] FQE_20220420194312: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00013573342059032027, 'time_algorithm_update': 0.0018866536128951843, 'loss': 0.0033476895169517675, 'time_step': 0.002084271017327366, 'init_value': -0.9969427585601807, 'ave_value': -0.7537131781714994, 'soft_opc': nan} step=1660




2022-04-20 19:43.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194312/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:43.16 [info     ] FQE_20220420194312: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.0001293234078280897, 'time_algorithm_update': 0.0017956409109644144, 'loss': 0.0036905329465495803, 'time_step': 0.0019837172634630316, 'init_value': -1.0704467296600342, 'ave_value': -0.7911202317623942, 'soft_opc': nan} step=1826




2022-04-20 19:43.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194312/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:43.16 [info     ] FQE_20220420194312: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.0001356544264827866, 'time_algorithm_update': 0.0018449203077569064, 'loss': 0.0037927862869699047, 'time_step': 0.0020443876105618766, 'init_value': -1.1190462112426758, 'ave_value': -0.796671821754258, 'soft_opc': nan} step=1992




2022-04-20 19:43.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194312/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:43.17 [info     ] FQE_20220420194312: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00014595956687467643, 'time_algorithm_update': 0.0019943484340805605, 'loss': 0.004118217890296715, 'time_step': 0.002208481352013278, 'init_value': -1.2255313396453857, 'ave_value': -0.8425333778130578, 'soft_opc': nan} step=2158




2022-04-20 19:43.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194312/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:43.17 [info     ] FQE_20220420194312: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00014331398240054947, 'time_algorithm_update': 0.002060442085725715, 'loss': 0.004506131995985487, 'time_step': 0.0022693757551262178, 'init_value': -1.300802230834961, 'ave_value': -0.8637385659322545, 'soft_opc': nan} step=2324




2022-04-20 19:43.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194312/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:43.18 [info     ] FQE_20220420194312: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00014614197145025414, 'time_algorithm_update': 0.0020660621574126094, 'loss': 0.004895307862126908, 'time_step': 0.00228208088013063, 'init_value': -1.3872671127319336, 'ave_value': -0.9125033920681155, 'soft_opc': nan} step=2490




2022-04-20 19:43.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194312/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:43.18 [info     ] FQE_20220420194312: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.0001473182655242552, 'time_algorithm_update': 0.002062903829367764, 'loss': 0.004994169335548642, 'time_step': 0.0022783466132290393, 'init_value': -1.519996166229248, 'ave_value': -1.0000478073417602, 'soft_opc': nan} step=2656




2022-04-20 19:43.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194312/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:43.18 [info     ] FQE_20220420194312: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00014704106801963714, 'time_algorithm_update': 0.001990736248981522, 'loss': 0.005523477220930249, 'time_step': 0.0022043233894440063, 'init_value': -1.64031183719635, 'ave_value': -1.064258287216092, 'soft_opc': nan} step=2822




2022-04-20 19:43.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194312/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:43.19 [info     ] FQE_20220420194312: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.000144399792315012, 'time_algorithm_update': 0.002072169120053211, 'loss': 0.005518171217794676, 'time_step': 0.002283779971570854, 'init_value': -1.7070512771606445, 'ave_value': -1.0864847536156843, 'soft_opc': nan} step=2988




2022-04-20 19:43.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194312/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:43.19 [info     ] FQE_20220420194312: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.000145080577896302, 'time_algorithm_update': 0.0020483014095260435, 'loss': 0.006316468306867608, 'time_step': 0.0022639438330409037, 'init_value': -1.811194658279419, 'ave_value': -1.1413301919763152, 'soft_opc': nan} step=3154




2022-04-20 19:43.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194312/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:43.20 [info     ] FQE_20220420194312: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.0001475954630288733, 'time_algorithm_update': 0.00206089881529291, 'loss': 0.0065675204443038405, 'time_step': 0.0022766360317368105, 'init_value': -1.9363014698028564, 'ave_value': -1.2042253624077315, 'soft_opc': nan} step=3320




2022-04-20 19:43.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194312/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:43.20 [info     ] FQE_20220420194312: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.000144836414291198, 'time_algorithm_update': 0.0020072431449430534, 'loss': 0.007039577603769343, 'time_step': 0.0022207441100155017, 'init_value': -1.9873619079589844, 'ave_value': -1.2183955659871704, 'soft_opc': nan} step=3486




2022-04-20 19:43.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194312/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:43.21 [info     ] FQE_20220420194312: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.000145202659698854, 'time_algorithm_update': 0.0020563315196209645, 'loss': 0.007482067442005388, 'time_step': 0.0022670949798032462, 'init_value': -2.0964200496673584, 'ave_value': -1.2550891999457334, 'soft_opc': nan} step=3652




2022-04-20 19:43.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194312/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:43.21 [info     ] FQE_20220420194312: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00014546693089496657, 'time_algorithm_update': 0.002034872411245323, 'loss': 0.0077699137808115454, 'time_step': 0.0022463597447039134, 'init_value': -2.2201266288757324, 'ave_value': -1.306370744860924, 'soft_opc': nan} step=3818




2022-04-20 19:43.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194312/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:43.21 [info     ] FQE_20220420194312: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00014535203037491763, 'time_algorithm_update': 0.002002161669443889, 'loss': 0.008353491471547365, 'time_step': 0.0022131894008222833, 'init_value': -2.27999210357666, 'ave_value': -1.3234863670022639, 'soft_opc': nan} step=3984




2022-04-20 19:43.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194312/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:43.22 [info     ] FQE_20220420194312: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00014761557061988186, 'time_algorithm_update': 0.0020343208887490883, 'loss': 0.009057612293007144, 'time_step': 0.0022520616830113424, 'init_value': -2.367360830307007, 'ave_value': -1.3545015083374203, 'soft_opc': nan} step=4150




2022-04-20 19:43.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194312/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:43.22 [info     ] FQE_20220420194312: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.0001449211534247341, 'time_algorithm_update': 0.002009131822241358, 'loss': 0.009181679537833994, 'time_step': 0.0022204841475888908, 'init_value': -2.442924976348877, 'ave_value': -1.3627524146625587, 'soft_opc': nan} step=4316




2022-04-20 19:43.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194312/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:43.23 [info     ] FQE_20220420194312: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00014682419328804477, 'time_algorithm_update': 0.0020416716495192193, 'loss': 0.009784592426359564, 'time_step': 0.0022593162145959325, 'init_value': -2.556894063949585, 'ave_value': -1.4247169867023692, 'soft_opc': nan} step=4482




2022-04-20 19:43.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194312/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:43.23 [info     ] FQE_20220420194312: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00014792005699801157, 'time_algorithm_update': 0.0020909926977502294, 'loss': 0.00989771907565644, 'time_step': 0.0023084146430693477, 'init_value': -2.670536994934082, 'ave_value': -1.4937749120297732, 'soft_opc': nan} step=4648




2022-04-20 19:43.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194312/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:43.23 [info     ] FQE_20220420194312: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.0001460888299597315, 'time_algorithm_update': 0.002086296138993229, 'loss': 0.010704696111111757, 'time_step': 0.002300424748156444, 'init_value': -2.7633466720581055, 'ave_value': -1.549903044816073, 'soft_opc': nan} step=4814




2022-04-20 19:43.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194312/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:43.24 [info     ] FQE_20220420194312: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00014463533838111233, 'time_algorithm_update': 0.002018234815942236, 'loss': 0.011443444625867239, 'time_step': 0.002229804016021361, 'init_value': -2.849987745285034, 'ave_value': -1.5533070040044483, 'soft_opc': nan} step=4980




2022-04-20 19:43.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194312/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:43.24 [info     ] FQE_20220420194312: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00014592653297516237, 'time_algorithm_update': 0.002047877713858363, 'loss': 0.011920053753805492, 'time_step': 0.0022611919655857317, 'init_value': -2.9709229469299316, 'ave_value': -1.6034755128595206, 'soft_opc': nan} step=5146




2022-04-20 19:43.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194312/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:43.25 [info     ] FQE_20220420194312: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.0001459179154361587, 'time_algorithm_update': 0.002008718180369182, 'loss': 0.012082911106331432, 'time_step': 0.0022232676126870765, 'init_value': -2.981980562210083, 'ave_value': -1.5850500879926726, 'soft_opc': nan} step=5312




2022-04-20 19:43.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194312/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:43.25 [info     ] FQE_20220420194312: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00014676387051501907, 'time_algorithm_update': 0.0020405829670917556, 'loss': 0.012534189413172608, 'time_step': 0.002255926649254489, 'init_value': -3.0357375144958496, 'ave_value': -1.6087434341912872, 'soft_opc': nan} step=5478




2022-04-20 19:43.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194312/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:43.26 [info     ] FQE_20220420194312: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00014875883079436888, 'time_algorithm_update': 0.0020334203559232047, 'loss': 0.013174418285657394, 'time_step': 0.002257199172514031, 'init_value': -3.18034029006958, 'ave_value': -1.7217342679833507, 'soft_opc': nan} step=5644




2022-04-20 19:43.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194312/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:43.26 [info     ] FQE_20220420194312: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.000143989022955837, 'time_algorithm_update': 0.0020069573298994317, 'loss': 0.013376706124126283, 'time_step': 0.0022160719676190114, 'init_value': -3.2486045360565186, 'ave_value': -1.723366866146659, 'soft_opc': nan} step=5810




2022-04-20 19:43.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194312/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:43.26 [info     ] FQE_20220420194312: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.0001425671290202313, 'time_algorithm_update': 0.0020519365747290923, 'loss': 0.014572835601697934, 'time_step': 0.0022607625248920486, 'init_value': -3.2889225482940674, 'ave_value': -1.7222840839812348, 'soft_opc': nan} step=5976




2022-04-20 19:43.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194312/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:43.27 [info     ] FQE_20220420194312: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00014688595231757107, 'time_algorithm_update': 0.002094212784824601, 'loss': 0.015089940515358046, 'time_step': 0.0023132447736809053, 'init_value': -3.3590004444122314, 'ave_value': -1.7228131548778431, 'soft_opc': nan} step=6142




2022-04-20 19:43.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194312/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:43.27 [info     ] FQE_20220420194312: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00014676099800201784, 'time_algorithm_update': 0.0020245557808014282, 'loss': 0.015110608069674975, 'time_step': 0.0022429400179759564, 'init_value': -3.4238085746765137, 'ave_value': -1.750819746425023, 'soft_opc': nan} step=6308




2022-04-20 19:43.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194312/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:43.28 [info     ] FQE_20220420194312: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00014860658760530403, 'time_algorithm_update': 0.002061750515397773, 'loss': 0.01575286295587556, 'time_step': 0.0022787200399191983, 'init_value': -3.497668743133545, 'ave_value': -1.7923641183757568, 'soft_opc': nan} step=6474




2022-04-20 19:43.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194312/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:43.28 [info     ] FQE_20220420194312: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00014481630670018942, 'time_algorithm_update': 0.002051313239407827, 'loss': 0.015177852054076067, 'time_step': 0.002263195543404085, 'init_value': -3.6341891288757324, 'ave_value': -1.8818880815495242, 'soft_opc': nan} step=6640




2022-04-20 19:43.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194312/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:43.29 [info     ] FQE_20220420194312: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.0001450575977922922, 'time_algorithm_update': 0.002046520451465285, 'loss': 0.016984200154161865, 'time_step': 0.002260284251477345, 'init_value': -3.712347984313965, 'ave_value': -1.9318322760699032, 'soft_opc': nan} step=6806




2022-04-20 19:43.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194312/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:43.29 [info     ] FQE_20220420194312: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.0001470597393541451, 'time_algorithm_update': 0.002034476004451154, 'loss': 0.017762916053909, 'time_step': 0.0022504631295261614, 'init_value': -3.842698574066162, 'ave_value': -2.0088767372407355, 'soft_opc': nan} step=6972




2022-04-20 19:43.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194312/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:43.29 [info     ] FQE_20220420194312: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00014282565519034145, 'time_algorithm_update': 0.002011080822312688, 'loss': 0.018274204832142645, 'time_step': 0.002220823104123035, 'init_value': -3.891209125518799, 'ave_value': -2.025179063602611, 'soft_opc': nan} step=7138




2022-04-20 19:43.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194312/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:43.30 [info     ] FQE_20220420194312: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00014411684978439147, 'time_algorithm_update': 0.002036445112113493, 'loss': 0.018533419175999218, 'time_step': 0.0022468724882746317, 'init_value': -3.9523096084594727, 'ave_value': -2.049980159063597, 'soft_opc': nan} step=7304




2022-04-20 19:43.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194312/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:43.30 [info     ] FQE_20220420194312: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00015076671738222422, 'time_algorithm_update': 0.0020758789705942913, 'loss': 0.019263581514459508, 'time_step': 0.0022996161357465997, 'init_value': -3.9879815578460693, 'ave_value': -2.0204846179297378, 'soft_opc': nan} step=7470




2022-04-20 19:43.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194312/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:43.31 [info     ] FQE_20220420194312: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00014746619994381824, 'time_algorithm_update': 0.002075840191668775, 'loss': 0.020616735302960973, 'time_step': 0.0022906538951827818, 'init_value': -4.119106292724609, 'ave_value': -2.1112857485273935, 'soft_opc': nan} step=7636




2022-04-20 19:43.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194312/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:43.31 [info     ] FQE_20220420194312: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.0001492974269820983, 'time_algorithm_update': 0.0020916145968149944, 'loss': 0.019929044231974398, 'time_step': 0.0023095579032438346, 'init_value': -4.120571136474609, 'ave_value': -2.106898161299057, 'soft_opc': nan} step=7802




2022-04-20 19:43.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194312/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:43.31 [info     ] FQE_20220420194312: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00014724501644272403, 'time_algorithm_update': 0.002033980495958443, 'loss': 0.0204318188367894, 'time_step': 0.0022524221833929957, 'init_value': -4.202056884765625, 'ave_value': -2.147930888254363, 'soft_opc': nan} step=7968




2022-04-20 19:43.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194312/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:43.32 [info     ] FQE_20220420194312: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00014595956687467643, 'time_algorithm_update': 0.0020652492362332633, 'loss': 0.021014024057504105, 'time_step': 0.002277484859328672, 'init_value': -4.305978775024414, 'ave_value': -2.227488650582932, 'soft_opc': nan} step=8134




2022-04-20 19:43.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194312/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:43.32 [info     ] FQE_20220420194312: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00014449314898755177, 'time_algorithm_update': 0.0020396063126713396, 'loss': 0.021773722918328256, 'time_step': 0.0022531403116433017, 'init_value': -4.337491035461426, 'ave_value': -2.2148355344662796, 'soft_opc': nan} step=8300




2022-04-20 19:43.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194312/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

start
[ 0.00000000e+00  7.95731469e+08  4.27108923e-02  1.24000047e-02
  1.42999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.34732792e-01  6.00000000e-01  3.37421461e-01]
Read chunk # 39 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.38489108e-01  4.94000047e-02
 -1.56000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -7.49080829e-02  7.04145269e-02]
Read chunk # 40 out of 4999
torch.Size([44400, 6])
2022-04-20 19:43.33 [debug    ] RoundIterator is selected.
2022-04-20 19:43.33 [info     ] Directory is created at d3rlpy_logs/FQE_20220420194333
2022-04-20 19:43.33 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 19:43.33 [debug    ] Building models...
2022-04-20 19:43.33 [debug    ] Models have been built.
2022-04-20 19:43.33 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420194333/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 19:43.33 [info     ] FQE_20220420194333: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.0001509660898252975, 'time_algorithm_update': 0.0019750927769860557, 'loss': 0.02232062009293153, 'time_step': 0.0021944025228189867, 'init_value': -1.0827889442443848, 'ave_value': -1.1117576075701026, 'soft_opc': nan} step=344




2022-04-20 19:43.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194333/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:43.34 [info     ] FQE_20220420194333: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00015087529670360477, 'time_algorithm_update': 0.0020318308541941088, 'loss': 0.020298060322137074, 'time_step': 0.002249964447908623, 'init_value': -1.8948454856872559, 'ave_value': -1.9382427352640006, 'soft_opc': nan} step=688




2022-04-20 19:43.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194333/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:43.35 [info     ] FQE_20220420194333: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.0001527992791907732, 'time_algorithm_update': 0.0020175326702206635, 'loss': 0.02277470991925098, 'time_step': 0.002241524846054787, 'init_value': -2.847951889038086, 'ave_value': -2.898148963513138, 'soft_opc': nan} step=1032




2022-04-20 19:43.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194333/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:43.36 [info     ] FQE_20220420194333: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00015297462773877522, 'time_algorithm_update': 0.002051875341770261, 'loss': 0.024615834467113018, 'time_step': 0.0022764670294384624, 'init_value': -3.573338270187378, 'ave_value': -3.6704619099696476, 'soft_opc': nan} step=1376




2022-04-20 19:43.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194333/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:43.37 [info     ] FQE_20220420194333: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00015198491340459778, 'time_algorithm_update': 0.0020564350970955782, 'loss': 0.03012717832529614, 'time_step': 0.002275554246680681, 'init_value': -4.459548473358154, 'ave_value': -4.643254712170309, 'soft_opc': nan} step=1720




2022-04-20 19:43.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194333/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:43.38 [info     ] FQE_20220420194333: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00015302314314731333, 'time_algorithm_update': 0.002031956301179043, 'loss': 0.03539858840752479, 'time_step': 0.0022545061832250552, 'init_value': -4.854406356811523, 'ave_value': -5.150828285139423, 'soft_opc': nan} step=2064




2022-04-20 19:43.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194333/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:43.39 [info     ] FQE_20220420194333: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00015355542648670285, 'time_algorithm_update': 0.002036522294199744, 'loss': 0.04441769111381714, 'time_step': 0.0022616649782934853, 'init_value': -5.696543216705322, 'ave_value': -6.147351787648759, 'soft_opc': nan} step=2408




2022-04-20 19:43.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194333/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:43.39 [info     ] FQE_20220420194333: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00015225937200147053, 'time_algorithm_update': 0.001999358105105023, 'loss': 0.05229472622361987, 'time_step': 0.0022202972755875697, 'init_value': -5.987150192260742, 'ave_value': -6.617154156356245, 'soft_opc': nan} step=2752




2022-04-20 19:43.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194333/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:43.40 [info     ] FQE_20220420194333: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00015080598897712174, 'time_algorithm_update': 0.0020391227200973867, 'loss': 0.06032129255296706, 'time_step': 0.002259919809740643, 'init_value': -6.505919456481934, 'ave_value': -7.349013292816308, 'soft_opc': nan} step=3096




2022-04-20 19:43.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194333/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:43.41 [info     ] FQE_20220420194333: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00015475375707759412, 'time_algorithm_update': 0.002031054607657499, 'loss': 0.0701439275844889, 'time_step': 0.002256411452626073, 'init_value': -7.160606861114502, 'ave_value': -8.228810740779112, 'soft_opc': nan} step=3440




2022-04-20 19:43.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194333/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:43.42 [info     ] FQE_20220420194333: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.0001527992791907732, 'time_algorithm_update': 0.0020612242609955546, 'loss': 0.07930447791735533, 'time_step': 0.0022827761117802108, 'init_value': -7.350167751312256, 'ave_value': -8.67969181314782, 'soft_opc': nan} step=3784




2022-04-20 19:43.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194333/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:43.43 [info     ] FQE_20220420194333: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00015194748723229698, 'time_algorithm_update': 0.002001806747081668, 'loss': 0.0905701982555911, 'time_step': 0.0022243365298869997, 'init_value': -7.973982334136963, 'ave_value': -9.465293613532642, 'soft_opc': nan} step=4128




2022-04-20 19:43.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194333/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:43.44 [info     ] FQE_20220420194333: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.000154170879097872, 'time_algorithm_update': 0.002012279837630516, 'loss': 0.10376998790821364, 'time_step': 0.002237652623376181, 'init_value': -8.299784660339355, 'ave_value': -9.986504840206456, 'soft_opc': nan} step=4472




2022-04-20 19:43.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194333/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:43.44 [info     ] FQE_20220420194333: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00015322898709496787, 'time_algorithm_update': 0.0020463487436605055, 'loss': 0.11722013082214479, 'time_step': 0.0022667722646580184, 'init_value': -8.885554313659668, 'ave_value': -10.801847828535346, 'soft_opc': nan} step=4816




2022-04-20 19:43.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194333/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:43.45 [info     ] FQE_20220420194333: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00015444117923115575, 'time_algorithm_update': 0.002061548621155495, 'loss': 0.13442641888138687, 'time_step': 0.0022829909657323083, 'init_value': -9.224220275878906, 'ave_value': -11.331409401439869, 'soft_opc': nan} step=5160




2022-04-20 19:43.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194333/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:43.46 [info     ] FQE_20220420194333: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00015297116235245105, 'time_algorithm_update': 0.0020402032275532566, 'loss': 0.1445690319862563, 'time_step': 0.00226301439972811, 'init_value': -9.568822860717773, 'ave_value': -11.934985325062597, 'soft_opc': nan} step=5504




2022-04-20 19:43.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194333/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:43.47 [info     ] FQE_20220420194333: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.0001501745955888615, 'time_algorithm_update': 0.0020024651704832566, 'loss': 0.15842688118302545, 'time_step': 0.0022218858086785606, 'init_value': -9.654826164245605, 'ave_value': -12.239924701095164, 'soft_opc': nan} step=5848




2022-04-20 19:43.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194333/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:43.48 [info     ] FQE_20220420194333: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00015502197797908338, 'time_algorithm_update': 0.0020107793253521587, 'loss': 0.17527472335563668, 'time_step': 0.002234914968180102, 'init_value': -10.354286193847656, 'ave_value': -13.084032997945407, 'soft_opc': nan} step=6192




2022-04-20 19:43.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194333/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:43.49 [info     ] FQE_20220420194333: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.0001421037108399147, 'time_algorithm_update': 0.0018809784290402434, 'loss': 0.19684667081767043, 'time_step': 0.0020880574403807175, 'init_value': -11.110699653625488, 'ave_value': -14.12682508842365, 'soft_opc': nan} step=6536




2022-04-20 19:43.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194333/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:43.49 [info     ] FQE_20220420194333: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00013399678607319676, 'time_algorithm_update': 0.00180600410283998, 'loss': 0.21689562681997412, 'time_step': 0.0020002182139906774, 'init_value': -11.055692672729492, 'ave_value': -14.454426292954265, 'soft_opc': nan} step=6880




2022-04-20 19:43.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194333/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:43.50 [info     ] FQE_20220420194333: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00013733187387155932, 'time_algorithm_update': 0.0018675348093343336, 'loss': 0.22715204875634679, 'time_step': 0.0020684003829956055, 'init_value': -11.369889259338379, 'ave_value': -15.063721920019603, 'soft_opc': nan} step=7224




2022-04-20 19:43.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194333/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:43.51 [info     ] FQE_20220420194333: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00013615294944408328, 'time_algorithm_update': 0.0018064844053845073, 'loss': 0.24192397777251032, 'time_step': 0.002005315104196238, 'init_value': -11.892236709594727, 'ave_value': -15.748831922366275, 'soft_opc': nan} step=7568




2022-04-20 19:43.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194333/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:43.52 [info     ] FQE_20220420194333: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.0001331006371697714, 'time_algorithm_update': 0.0018123104128726693, 'loss': 0.25585276727174777, 'time_step': 0.002008256524108177, 'init_value': -12.273588180541992, 'ave_value': -16.5107448295967, 'soft_opc': nan} step=7912




2022-04-20 19:43.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194333/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:43.52 [info     ] FQE_20220420194333: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00012985911480216093, 'time_algorithm_update': 0.0017312453236690787, 'loss': 0.2739127449918712, 'time_step': 0.0019222913786422374, 'init_value': -12.644514083862305, 'ave_value': -17.281673707578097, 'soft_opc': nan} step=8256




2022-04-20 19:43.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194333/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:43.53 [info     ] FQE_20220420194333: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.0001399801221004752, 'time_algorithm_update': 0.0019210840380469033, 'loss': 0.28295674055901376, 'time_step': 0.0021245923153189726, 'init_value': -12.779508590698242, 'ave_value': -17.707309494015703, 'soft_opc': nan} step=8600




2022-04-20 19:43.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194333/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:43.54 [info     ] FQE_20220420194333: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00013975972353025924, 'time_algorithm_update': 0.0018658936023712158, 'loss': 0.2956277875901135, 'time_step': 0.0020705309025076933, 'init_value': -13.097322463989258, 'ave_value': -18.345652305546242, 'soft_opc': nan} step=8944




2022-04-20 19:43.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194333/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:43.55 [info     ] FQE_20220420194333: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00014330273450807084, 'time_algorithm_update': 0.0019196140211681987, 'loss': 0.305199266849952, 'time_step': 0.0021272481873978017, 'init_value': -13.240195274353027, 'ave_value': -18.84946190703761, 'soft_opc': nan} step=9288




2022-04-20 19:43.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194333/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:43.56 [info     ] FQE_20220420194333: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00015677407730457394, 'time_algorithm_update': 0.0020776737567990327, 'loss': 0.31293365016557967, 'time_step': 0.0023025523784548736, 'init_value': -13.412078857421875, 'ave_value': -19.42210324271624, 'soft_opc': nan} step=9632




2022-04-20 19:43.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194333/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:43.56 [info     ] FQE_20220420194333: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00015304740085158238, 'time_algorithm_update': 0.002095929411954658, 'loss': 0.3284101556163541, 'time_step': 0.0023205044657685038, 'init_value': -13.747142791748047, 'ave_value': -19.8324571321173, 'soft_opc': nan} step=9976




2022-04-20 19:43.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194333/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:43.57 [info     ] FQE_20220420194333: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00015539208123850268, 'time_algorithm_update': 0.002064751331196275, 'loss': 0.33651604776833816, 'time_step': 0.0022888433101565337, 'init_value': -13.942398071289062, 'ave_value': -20.48888019883653, 'soft_opc': nan} step=10320




2022-04-20 19:43.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194333/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:43.58 [info     ] FQE_20220420194333: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.0001553969327793565, 'time_algorithm_update': 0.0020641012247218644, 'loss': 0.34929159920402736, 'time_step': 0.0022892896519150843, 'init_value': -14.098390579223633, 'ave_value': -20.876109183854044, 'soft_opc': nan} step=10664




2022-04-20 19:43.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194333/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:43.59 [info     ] FQE_20220420194333: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00015593822612318883, 'time_algorithm_update': 0.0021273868028507674, 'loss': 0.367243250875279, 'time_step': 0.0023541706939076267, 'init_value': -14.657793045043945, 'ave_value': -21.582296121284063, 'soft_opc': nan} step=11008




2022-04-20 19:43.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194333/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:44.00 [info     ] FQE_20220420194333: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.000155721292939297, 'time_algorithm_update': 0.0021033564279245775, 'loss': 0.3807994952252091, 'time_step': 0.002331796080567116, 'init_value': -14.83142375946045, 'ave_value': -21.85102813971472, 'soft_opc': nan} step=11352




2022-04-20 19:44.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194333/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:44.01 [info     ] FQE_20220420194333: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00015464078548342683, 'time_algorithm_update': 0.002058819282886594, 'loss': 0.39331973311599605, 'time_step': 0.002286476451297139, 'init_value': -15.314647674560547, 'ave_value': -22.446287698496704, 'soft_opc': nan} step=11696




2022-04-20 19:44.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194333/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:44.02 [info     ] FQE_20220420194333: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.0001565245694892351, 'time_algorithm_update': 0.0021211421766946484, 'loss': 0.41111382487944736, 'time_step': 0.00234658912170765, 'init_value': -15.584291458129883, 'ave_value': -22.75427553068494, 'soft_opc': nan} step=12040




2022-04-20 19:44.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194333/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:44.03 [info     ] FQE_20220420194333: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00015593753304592398, 'time_algorithm_update': 0.0020846364110015158, 'loss': 0.418436978699986, 'time_step': 0.0023101990999177444, 'init_value': -15.641947746276855, 'ave_value': -22.85925784481874, 'soft_opc': nan} step=12384




2022-04-20 19:44.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194333/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:44.03 [info     ] FQE_20220420194333: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00015357206034105877, 'time_algorithm_update': 0.0020460424035094503, 'loss': 0.4449423091150387, 'time_step': 0.002269762199978496, 'init_value': -16.40692138671875, 'ave_value': -23.719685875282934, 'soft_opc': nan} step=12728




2022-04-20 19:44.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194333/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:44.04 [info     ] FQE_20220420194333: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00015506910723309185, 'time_algorithm_update': 0.002071347347525663, 'loss': 0.4596947683238013, 'time_step': 0.002299655315487884, 'init_value': -16.252840042114258, 'ave_value': -24.094455389444388, 'soft_opc': nan} step=13072




2022-04-20 19:44.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194333/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:44.05 [info     ] FQE_20220420194333: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00015384859816972599, 'time_algorithm_update': 0.0020799872487090353, 'loss': 0.4759569553994076, 'time_step': 0.002306573612745418, 'init_value': -16.370975494384766, 'ave_value': -24.386919750501445, 'soft_opc': nan} step=13416




2022-04-20 19:44.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194333/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:44.06 [info     ] FQE_20220420194333: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.0001596191594767016, 'time_algorithm_update': 0.002094904350679974, 'loss': 0.48597608228365696, 'time_step': 0.002327111571334129, 'init_value': -16.40218734741211, 'ave_value': -24.696408323303437, 'soft_opc': nan} step=13760




2022-04-20 19:44.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194333/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:44.07 [info     ] FQE_20220420194333: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00015510168186453886, 'time_algorithm_update': 0.002067015614620475, 'loss': 0.48373917013211826, 'time_step': 0.0022900402545928955, 'init_value': -15.983463287353516, 'ave_value': -24.485002726358402, 'soft_opc': nan} step=14104




2022-04-20 19:44.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194333/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:44.08 [info     ] FQE_20220420194333: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00015380008276118787, 'time_algorithm_update': 0.0020361861517263014, 'loss': 0.4951937557414694, 'time_step': 0.0022588171238122983, 'init_value': -16.294357299804688, 'ave_value': -24.80060989349275, 'soft_opc': nan} step=14448




2022-04-20 19:44.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194333/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:44.09 [info     ] FQE_20220420194333: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00015239590822264205, 'time_algorithm_update': 0.0020613011925719503, 'loss': 0.5099388543126542, 'time_step': 0.0022824226423751475, 'init_value': -16.56201934814453, 'ave_value': -25.338427044805307, 'soft_opc': nan} step=14792




2022-04-20 19:44.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194333/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:44.09 [info     ] FQE_20220420194333: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00015446820924448412, 'time_algorithm_update': 0.00206831929295562, 'loss': 0.5286514341376375, 'time_step': 0.002296205869940824, 'init_value': -16.746219635009766, 'ave_value': -25.631561489430098, 'soft_opc': nan} step=15136




2022-04-20 19:44.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194333/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:44.10 [info     ] FQE_20220420194333: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00015547940897387127, 'time_algorithm_update': 0.0020797315031983133, 'loss': 0.5476970305276472, 'time_step': 0.0023084414559741352, 'init_value': -16.702579498291016, 'ave_value': -25.78624542136383, 'soft_opc': nan} step=15480




2022-04-20 19:44.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194333/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:44.11 [info     ] FQE_20220420194333: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00015875489212745843, 'time_algorithm_update': 0.0020877018917438597, 'loss': 0.5497381516602323, 'time_step': 0.0023191294004750807, 'init_value': -16.66897201538086, 'ave_value': -25.841497985780375, 'soft_opc': nan} step=15824




2022-04-20 19:44.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194333/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:44.12 [info     ] FQE_20220420194333: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00015466712241949036, 'time_algorithm_update': 0.002056619455648023, 'loss': 0.5665023256049946, 'time_step': 0.002282033826029578, 'init_value': -16.793739318847656, 'ave_value': -26.17694974759476, 'soft_opc': nan} step=16168




2022-04-20 19:44.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194333/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:44.13 [info     ] FQE_20220420194333: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00015825587649678075, 'time_algorithm_update': 0.0020838830360146456, 'loss': 0.5917041790238473, 'time_step': 0.002312572196472523, 'init_value': -16.596500396728516, 'ave_value': -26.336315685951966, 'soft_opc': nan} step=16512




2022-04-20 19:44.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194333/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:44.14 [info     ] FQE_20220420194333: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.0001645018888074298, 'time_algorithm_update': 0.002162569484045339, 'loss': 0.5955107442731428, 'time_step': 0.0024007967738218085, 'init_value': -16.316967010498047, 'ave_value': -26.28248348226687, 'soft_opc': nan} step=16856




2022-04-20 19:44.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194333/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:44.15 [info     ] FQE_20220420194333: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.0001565543718116228, 'time_algorithm_update': 0.002076251562251601, 'loss': 0.6056651212166735, 'time_step': 0.002303241297256115, 'init_value': -16.593246459960938, 'ave_value': -26.66211283243186, 'soft_opc': nan} step=17200




2022-04-20 19:44.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194333/model_17200.pt
search iteration:  30
using hyper params:  [0.00818745780056889, 0.0001408039588777811, 9.959897302444231e-05, 5]
2022-04-20 19:44.15 [debug    ] RoundIterator is selected.
2022-04-20 19:44.15 [info     ] Directory is created at d3rlpy_logs/CQL_20220420194415
2022-04-20 19:44.15 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 19:44.15 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 19:44.15 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220420194415/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.00818745780056889, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'wei

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:44.21 [info     ] CQL_20220420194415: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.00034958437869423315, 'time_algorithm_update': 0.01768511150315491, 'temp_loss': 4.8753644942540175, 'temp': 0.9843340831541876, 'alpha_loss': -17.601246861686484, 'alpha': 1.0175776122606288, 'critic_loss': 96.84235850551673, 'actor_loss': 2.193785951805045, 'time_step': 0.018132098236976313, 'td_error': 1.019111754340333, 'init_value': -6.577020168304443, 'ave_value': -6.535898429810464} step=342
2022-04-20 19:44.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420194415/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:44.28 [info     ] CQL_20220420194415: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.00035611230727524784, 'time_algorithm_update': 0.017653489670558284, 'temp_loss': 4.676152842783789, 'temp': 0.9521255357223645, 'alpha_loss': -18.414758431284053, 'alpha': 1.0536061915737844, 'critic_loss': 102.77026248954193, 'actor_loss': 7.131569972512318, 'time_step': 0.01810772446861044, 'td_error': 3.3262483046534834, 'init_value': -9.846360206604004, 'ave_value': -9.148432151004} step=684
2022-04-20 19:44.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420194415/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:44.34 [info     ] CQL_20220420194415: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.00035679688927722954, 'time_algorithm_update': 0.017754948627181917, 'temp_loss': 4.142148744990254, 'temp': 0.922529819764589, 'alpha_loss': -20.088899913587067, 'alpha': 1.0923780283035591, 'critic_loss': 111.14213907788371, 'actor_loss': 10.333145939118682, 'time_step': 0.018213569072254916, 'td_error': 6.677469285620953, 'init_value': -12.928311347961426, 'ave_value': -11.27280415412542} step=1026
2022-04-20 19:44.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420194415/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:44.40 [info     ] CQL_20220420194415: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.00035273680212902046, 'time_algorithm_update': 0.017744829780177065, 'temp_loss': 3.8128954681039553, 'temp': 0.8953140137488382, 'alpha_loss': -20.890329963282536, 'alpha': 1.133463308476565, 'critic_loss': 115.7718171906053, 'actor_loss': 12.429448320154558, 'time_step': 0.018191157028689023, 'td_error': 7.413663108170262, 'init_value': -15.498529434204102, 'ave_value': -12.812737760243115} step=1368
2022-04-20 19:44.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420194415/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:44.47 [info     ] CQL_20220420194415: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.0003493529314186141, 'time_algorithm_update': 0.01749012135622794, 'temp_loss': 3.5831734227855305, 'temp': 0.8691683345370822, 'alpha_loss': -21.10112693295841, 'alpha': 1.1754883025821887, 'critic_loss': 120.89111180891071, 'actor_loss': 14.233076762037667, 'time_step': 0.017929605573241473, 'td_error': 6.6463855611089615, 'init_value': -18.09489631652832, 'ave_value': -14.748661382874927} step=1710
2022-04-20 19:44.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420194415/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:44.53 [info     ] CQL_20220420194415: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.0003478401585629112, 'time_algorithm_update': 0.017378014430665133, 'temp_loss': 3.4460777351033616, 'temp': 0.8437016355363947, 'alpha_loss': -20.595211374829386, 'alpha': 1.2177199669051588, 'critic_loss': 128.1645703455161, 'actor_loss': 16.599859812106306, 'time_step': 0.017818798098647802, 'td_error': 6.2480539575976906, 'init_value': -21.363372802734375, 'ave_value': -17.359563433050035} step=2052
2022-04-20 19:44.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420194415/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:45.00 [info     ] CQL_20220420194415: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.0003530079858344898, 'time_algorithm_update': 0.017408172289530437, 'temp_loss': 3.302795465926678, 'temp': 0.8187830643347133, 'alpha_loss': -19.52481554823312, 'alpha': 1.2596324884403518, 'critic_loss': 136.7988749052349, 'actor_loss': 19.399214856108728, 'time_step': 0.017852222013194658, 'td_error': 6.301852533449072, 'init_value': -24.71533966064453, 'ave_value': -20.006672926244434} step=2394
2022-04-20 19:45.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420194415/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:45.06 [info     ] CQL_20220420194415: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.00034588052515397994, 'time_algorithm_update': 0.017441661037199678, 'temp_loss': 3.125565745677167, 'temp': 0.794724877989083, 'alpha_loss': -18.075142821373298, 'alpha': 1.3007319416219032, 'critic_loss': 145.73525599429482, 'actor_loss': 22.500621483339902, 'time_step': 0.017879542551542584, 'td_error': 6.888755717135172, 'init_value': -29.203907012939453, 'ave_value': -23.260157722159548} step=2736
2022-04-20 19:45.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420194415/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:45.12 [info     ] CQL_20220420194415: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.000345345826176872, 'time_algorithm_update': 0.017629350137989424, 'temp_loss': 2.9301933921568577, 'temp': 0.7716930817093766, 'alpha_loss': -16.783254263693827, 'alpha': 1.3411243209364818, 'critic_loss': 153.84212315431114, 'actor_loss': 25.73095582660876, 'time_step': 0.018069318163464642, 'td_error': 7.687689912289422, 'init_value': -33.56523132324219, 'ave_value': -26.38640690197816} step=3078
2022-04-20 19:45.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420194415/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:45.19 [info     ] CQL_20220420194415: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.00035028429756387634, 'time_algorithm_update': 0.017457646933215405, 'temp_loss': 2.757162003489266, 'temp': 0.7496316732718931, 'alpha_loss': -15.620701739662572, 'alpha': 1.3815778110459533, 'critic_loss': 160.54501400774683, 'actor_loss': 28.995839135688648, 'time_step': 0.01789977815416124, 'td_error': 9.039281101017465, 'init_value': -37.74333572387695, 'ave_value': -29.138742869721877} step=3420
2022-04-20 19:45.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420194415/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:45.25 [info     ] CQL_20220420194415: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.00034242485001770377, 'time_algorithm_update': 0.017408900790744357, 'temp_loss': 2.583784642275314, 'temp': 0.7284488519381361, 'alpha_loss': -14.288048752567224, 'alpha': 1.42194990735305, 'critic_loss': 165.75680930154365, 'actor_loss': 32.40811377519753, 'time_step': 0.017845163568418625, 'td_error': 11.017727943001551, 'init_value': -42.707618713378906, 'ave_value': -32.420914362040726} step=3762
2022-04-20 19:45.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420194415/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:45.31 [info     ] CQL_20220420194415: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.0003503296110365126, 'time_algorithm_update': 0.017432438699822677, 'temp_loss': 2.398809331899498, 'temp': 0.7082118757984095, 'alpha_loss': -12.97577360777827, 'alpha': 1.4618660612413061, 'critic_loss': 169.33204931961862, 'actor_loss': 35.80944544251202, 'time_step': 0.017874362175924738, 'td_error': 13.063445024430665, 'init_value': -47.9271354675293, 'ave_value': -35.497540084910284} step=4104
2022-04-20 19:45.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420194415/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:45.38 [info     ] CQL_20220420194415: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.0003451631780256305, 'time_algorithm_update': 0.017366743227194625, 'temp_loss': 2.2226915450124016, 'temp': 0.6888669823345385, 'alpha_loss': -11.815601075601856, 'alpha': 1.5016029233123824, 'critic_loss': 172.3821541412532, 'actor_loss': 39.128094388727554, 'time_step': 0.017802914680793272, 'td_error': 15.744779014208069, 'init_value': -53.050682067871094, 'ave_value': -38.47077378201324} step=4446
2022-04-20 19:45.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420194415/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:45.44 [info     ] CQL_20220420194415: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.00034877152470817345, 'time_algorithm_update': 0.017355107424551982, 'temp_loss': 2.0620527563736455, 'temp': 0.6703578329574295, 'alpha_loss': -10.794371844732273, 'alpha': 1.5415089252399423, 'critic_loss': 174.4922931068822, 'actor_loss': 42.31606574922974, 'time_step': 0.017795898063838134, 'td_error': 18.777332287734747, 'init_value': -58.60316848754883, 'ave_value': -41.44309478012023} step=4788
2022-04-20 19:45.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420194415/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:45.50 [info     ] CQL_20220420194415: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.0003476721501489829, 'time_algorithm_update': 0.01734078557867753, 'temp_loss': 1.9039384346259267, 'temp': 0.6525345988440931, 'alpha_loss': -9.694204925793654, 'alpha': 1.5814770205676207, 'critic_loss': 175.65878960681937, 'actor_loss': 45.29943415993139, 'time_step': 0.017783742899086043, 'td_error': 21.592246527034472, 'init_value': -63.69843292236328, 'ave_value': -44.02257828061108} step=5130
2022-04-20 19:45.50 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420194415/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:45.57 [info     ] CQL_20220420194415: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.0003550917084454096, 'time_algorithm_update': 0.017664051195334273, 'temp_loss': 1.7464705475589686, 'temp': 0.6354700656313645, 'alpha_loss': -8.672544519803678, 'alpha': 1.6212944468559578, 'critic_loss': 176.0681039464404, 'actor_loss': 48.079976455510014, 'time_step': 0.018113905226278027, 'td_error': 24.54423324734096, 'init_value': -68.37606048583984, 'ave_value': -46.51475773172604} step=5472
2022-04-20 19:45.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420194415/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:46.03 [info     ] CQL_20220420194415: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.00034874224523354693, 'time_algorithm_update': 0.018397242005108394, 'temp_loss': 1.5871806796531231, 'temp': 0.6191893870021865, 'alpha_loss': -7.765986763943008, 'alpha': 1.6609873684526186, 'critic_loss': 175.19669846205684, 'actor_loss': 50.487644418638354, 'time_step': 0.018843558796665126, 'td_error': 26.996595774732462, 'init_value': -72.17166900634766, 'ave_value': -48.25000408499896} step=5814
2022-04-20 19:46.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420194415/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:46.10 [info     ] CQL_20220420194415: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.00035371139035587424, 'time_algorithm_update': 0.018262063550670244, 'temp_loss': 1.4245090955182125, 'temp': 0.603738574605239, 'alpha_loss': -6.936539629049468, 'alpha': 1.7003439745010689, 'critic_loss': 173.02188636824403, 'actor_loss': 52.471284174779704, 'time_step': 0.01871437914887367, 'td_error': 29.894782282543282, 'init_value': -75.80076599121094, 'ave_value': -49.72159898208605} step=6156
2022-04-20 19:46.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420194415/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:46.17 [info     ] CQL_20220420194415: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.0003510783290305333, 'time_algorithm_update': 0.01791205461959393, 'temp_loss': 1.29455999777331, 'temp': 0.5890163296495962, 'alpha_loss': -6.261870076781825, 'alpha': 1.7402806515582123, 'critic_loss': 170.4340879652235, 'actor_loss': 54.174852906611925, 'time_step': 0.018362674796790407, 'td_error': 32.23310042623697, 'init_value': -79.29200744628906, 'ave_value': -51.429721539611215} step=6498
2022-04-20 19:46.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420194415/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:46.23 [info     ] CQL_20220420194415: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.0003492239623041878, 'time_algorithm_update': 0.018099006853605572, 'temp_loss': 1.1842100855551267, 'temp': 0.5748242929316404, 'alpha_loss': -5.7957222294389155, 'alpha': 1.7810182710837204, 'critic_loss': 167.49387386388946, 'actor_loss': 55.525001682036105, 'time_step': 0.01854612952784488, 'td_error': 34.47443002691704, 'init_value': -82.37062072753906, 'ave_value': -52.49496605336532} step=6840
2022-04-20 19:46.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420194415/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:46.30 [info     ] CQL_20220420194415: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.0003467080188773529, 'time_algorithm_update': 0.018040110493264004, 'temp_loss': 1.0767561416876943, 'temp': 0.5610754453299338, 'alpha_loss': -5.37621579811587, 'alpha': 1.8233357967689023, 'critic_loss': 165.24090986642224, 'actor_loss': 56.64457080238744, 'time_step': 0.018484726286771006, 'td_error': 36.978664104832845, 'init_value': -84.77262878417969, 'ave_value': -53.30342046450387} step=7182
2022-04-20 19:46.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420194415/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:46.37 [info     ] CQL_20220420194415: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.0003667860700373064, 'time_algorithm_update': 0.01966600529631676, 'temp_loss': 0.9990997344081165, 'temp': 0.5476666080672838, 'alpha_loss': -4.993577585582845, 'alpha': 1.8669576829637002, 'critic_loss': 163.2953091894674, 'actor_loss': 57.67701074254443, 'time_step': 0.020134698577791627, 'td_error': 38.81105139068322, 'init_value': -86.67327117919922, 'ave_value': -54.064322053261165} step=7524
2022-04-20 19:46.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420194415/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:46.44 [info     ] CQL_20220420194415: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.00036290235686720464, 'time_algorithm_update': 0.01949499155345716, 'temp_loss': 0.9292393556812353, 'temp': 0.5343651684404117, 'alpha_loss': -4.572556366000259, 'alpha': 1.9119991991255019, 'critic_loss': 161.4266279343276, 'actor_loss': 58.572750526562075, 'time_step': 0.019957210585387828, 'td_error': 41.23373709613362, 'init_value': -88.31963348388672, 'ave_value': -54.490409251711924} step=7866
2022-04-20 19:46.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420194415/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:46.51 [info     ] CQL_20220420194415: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.0003500946781091523, 'time_algorithm_update': 0.018748516227766784, 'temp_loss': 0.863213479693173, 'temp': 0.5213963971500508, 'alpha_loss': -4.210261936599051, 'alpha': 1.957968090361322, 'critic_loss': 160.10543725086234, 'actor_loss': 59.33691492136459, 'time_step': 0.019199654372812014, 'td_error': 43.014662836299934, 'init_value': -89.91236877441406, 'ave_value': -55.2521142922221} step=8208
2022-04-20 19:46.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420194415/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:46.57 [info     ] CQL_20220420194415: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.0003612403981169762, 'time_algorithm_update': 0.018371759799488803, 'temp_loss': 0.8012925339372534, 'temp': 0.5085513039290557, 'alpha_loss': -3.892373319726154, 'alpha': 2.0053330217188563, 'critic_loss': 159.11043709202818, 'actor_loss': 60.01065985919439, 'time_step': 0.018833157611869232, 'td_error': 44.7556038955901, 'init_value': -91.20733642578125, 'ave_value': -55.71411634672936} step=8550
2022-04-20 19:46.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420194415/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:47.04 [info     ] CQL_20220420194415: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.00035576025644938153, 'time_algorithm_update': 0.01821163941545096, 'temp_loss': 0.7375285405861703, 'temp': 0.49610734709173615, 'alpha_loss': -3.5689739028743483, 'alpha': 2.0538195803848622, 'critic_loss': 158.30295870596902, 'actor_loss': 60.62241636800487, 'time_step': 0.01866996915716874, 'td_error': 46.48122649993589, 'init_value': -92.53627014160156, 'ave_value': -56.07984474639635} step=8892
2022-04-20 19:47.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420194415/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:47.11 [info     ] CQL_20220420194415: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.00034441097437986853, 'time_algorithm_update': 0.017963409423828125, 'temp_loss': 0.6818424586141318, 'temp': 0.4839068876895291, 'alpha_loss': -3.2360138808600385, 'alpha': 2.1018582178138154, 'critic_loss': 158.02186388160752, 'actor_loss': 61.16232896548266, 'time_step': 0.018401835396973012, 'td_error': 47.92673630083393, 'init_value': -93.63426208496094, 'ave_value': -56.42945910358013} step=9234
2022-04-20 19:47.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420194415/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:47.17 [info     ] CQL_20220420194415: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.00035188351458276224, 'time_algorithm_update': 0.0180420248132003, 'temp_loss': 0.6200955206016351, 'temp': 0.47211603001195784, 'alpha_loss': -2.9004055408254397, 'alpha': 2.1499569701869587, 'critic_loss': 157.92093397441664, 'actor_loss': 61.637588902523646, 'time_step': 0.018492649870309217, 'td_error': 49.73469747405707, 'init_value': -94.68511199951172, 'ave_value': -57.07770305692344} step=9576
2022-04-20 19:47.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420194415/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:47.24 [info     ] CQL_20220420194415: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.00034946516940468237, 'time_algorithm_update': 0.017902724924143295, 'temp_loss': 0.5786010399896498, 'temp': 0.4606167609580079, 'alpha_loss': -2.571611077742575, 'alpha': 2.1987406468530843, 'critic_loss': 158.39951868782268, 'actor_loss': 62.09974177399574, 'time_step': 0.018349937528197528, 'td_error': 50.66886400335839, 'init_value': -94.9315414428711, 'ave_value': -56.98972919127627} step=9918
2022-04-20 19:47.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420194415/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:47.30 [info     ] CQL_20220420194415: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.00034969731381064966, 'time_algorithm_update': 0.017676664374725162, 'temp_loss': 0.5256069587510929, 'temp': 0.4494452274333664, 'alpha_loss': -2.165572319751638, 'alpha': 2.2440717352761164, 'critic_loss': 158.57459406267134, 'actor_loss': 62.45690765157778, 'time_step': 0.01812262074989185, 'td_error': 52.24784837110002, 'init_value': -95.97267150878906, 'ave_value': -57.56464820064202} step=10260
2022-04-20 19:47.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420194415/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:47.37 [info     ] CQL_20220420194415: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.00035118080719172604, 'time_algorithm_update': 0.01760057120295296, 'temp_loss': 0.4790397058104911, 'temp': 0.4386405396705482, 'alpha_loss': -1.804388411778804, 'alpha': 2.2874249965823883, 'critic_loss': 159.14316723918358, 'actor_loss': 62.76531492757518, 'time_step': 0.018050146381757413, 'td_error': 53.10172151539202, 'init_value': -96.35801696777344, 'ave_value': -57.66889280192554} step=10602
2022-04-20 19:47.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420194415/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:47.43 [info     ] CQL_20220420194415: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.0003517399057310227, 'time_algorithm_update': 0.01744783970347622, 'temp_loss': 0.43332145097311475, 'temp': 0.42833886169202146, 'alpha_loss': -1.40128369891692, 'alpha': 2.3249161731429964, 'critic_loss': 159.64680284645127, 'actor_loss': 63.07539937649554, 'time_step': 0.01789554448155632, 'td_error': 54.290208370886944, 'init_value': -96.82124328613281, 'ave_value': -57.697857926000616} step=10944
2022-04-20 19:47.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420194415/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:47.49 [info     ] CQL_20220420194415: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.0003449658901370757, 'time_algorithm_update': 0.017564758919832998, 'temp_loss': 0.39316725282118337, 'temp': 0.4182619165781646, 'alpha_loss': -1.0326426450993011, 'alpha': 2.356055719113489, 'critic_loss': 160.1065144232142, 'actor_loss': 63.34944392644871, 'time_step': 0.018006031973320142, 'td_error': 55.15016487558479, 'init_value': -97.69186401367188, 'ave_value': -58.30924916926872} step=11286
2022-04-20 19:47.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420194415/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:47.56 [info     ] CQL_20220420194415: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.00034213832944457294, 'time_algorithm_update': 0.017368140973542865, 'temp_loss': 0.35830973358879314, 'temp': 0.4085629733159528, 'alpha_loss': -0.6182846380833383, 'alpha': 2.3821543964029055, 'critic_loss': 160.59309152971235, 'actor_loss': 63.6093380465145, 'time_step': 0.017805776400872837, 'td_error': 56.01327003165313, 'init_value': -98.07572174072266, 'ave_value': -58.45066072993484} step=11628
2022-04-20 19:47.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420194415/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:48.02 [info     ] CQL_20220420194415: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.00034256915599979157, 'time_algorithm_update': 0.017256609180517364, 'temp_loss': 0.31671453672542904, 'temp': 0.39927573249354, 'alpha_loss': -0.11021321935154367, 'alpha': 2.3935056018550496, 'critic_loss': 160.82235307303088, 'actor_loss': 63.82635480200338, 'time_step': 0.01769489642472295, 'td_error': 56.84661163959028, 'init_value': -98.30708312988281, 'ave_value': -58.517549991490874} step=11970
2022-04-20 19:48.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420194415/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:48.08 [info     ] CQL_20220420194415: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.00034551662311219333, 'time_algorithm_update': 0.017443554443225526, 'temp_loss': 0.2783202439633726, 'temp': 0.3904157646915369, 'alpha_loss': 0.2634951292080342, 'alpha': 2.3923588677456507, 'critic_loss': 160.98740429348416, 'actor_loss': 63.99586582741542, 'time_step': 0.0178838180519684, 'td_error': 57.61959912173788, 'init_value': -98.4577865600586, 'ave_value': -58.46402913883698} step=12312
2022-04-20 19:48.08 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420194415/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:48.15 [info     ] CQL_20220420194415: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.0003488300836574264, 'time_algorithm_update': 0.01743928521697284, 'temp_loss': 0.24704639478559381, 'temp': 0.38219716317123836, 'alpha_loss': 0.666049487575649, 'alpha': 2.372208565996404, 'critic_loss': 160.8500198007327, 'actor_loss': 64.14244386327196, 'time_step': 0.017883088156493785, 'td_error': 58.30659271771697, 'init_value': -98.60050201416016, 'ave_value': -58.5270062153179} step=12654
2022-04-20 19:48.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420194415/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:48.21 [info     ] CQL_20220420194415: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.0003471555765609295, 'time_algorithm_update': 0.017605775281002645, 'temp_loss': 0.218023197458545, 'temp': 0.37406530244308606, 'alpha_loss': 0.9967045533165815, 'alpha': 2.343682325374313, 'critic_loss': 161.00051971346315, 'actor_loss': 64.23897759956226, 'time_step': 0.018048789766099717, 'td_error': 59.139756878587924, 'init_value': -99.1202392578125, 'ave_value': -58.77087283776452} step=12996
2022-04-20 19:48.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420194415/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:48.27 [info     ] CQL_20220420194415: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.0003440854145072357, 'time_algorithm_update': 0.017294811226471127, 'temp_loss': 0.18605642699315186, 'temp': 0.3665198728180768, 'alpha_loss': 1.3915792868840433, 'alpha': 2.2976381925114415, 'critic_loss': 161.04736142967178, 'actor_loss': 64.39453856708013, 'time_step': 0.01773462867179112, 'td_error': 59.47126157437852, 'init_value': -98.94623565673828, 'ave_value': -58.668333628972704} step=13338
2022-04-20 19:48.27 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420194415/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:48.34 [info     ] CQL_20220420194415: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.00034417046440972225, 'time_algorithm_update': 0.01735415165884453, 'temp_loss': 0.17267360302418122, 'temp': 0.359292335329, 'alpha_loss': 1.62725959318739, 'alpha': 2.241997815015023, 'critic_loss': 160.7383917200635, 'actor_loss': 64.5580362799572, 'time_step': 0.017793078171579463, 'td_error': 59.892687254387276, 'init_value': -99.01900482177734, 'ave_value': -58.79890533860924} step=13680
2022-04-20 19:48.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420194415/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:48.40 [info     ] CQL_20220420194415: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.00034400594164753516, 'time_algorithm_update': 0.01737441444954677, 'temp_loss': 0.14896140129942642, 'temp': 0.3522441986708613, 'alpha_loss': 1.830085624892187, 'alpha': 2.181882129775153, 'critic_loss': 160.70166421633715, 'actor_loss': 64.63202633774071, 'time_step': 0.017814323218942385, 'td_error': 60.487151735733086, 'init_value': -99.01356506347656, 'ave_value': -58.9830711644653} step=14022
2022-04-20 19:48.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420194415/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:48.46 [info     ] CQL_20220420194415: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.00033997861962569386, 'time_algorithm_update': 0.01733571325826366, 'temp_loss': 0.12393658586412842, 'temp': 0.3456888573560101, 'alpha_loss': 2.009994795355314, 'alpha': 2.1196500341794646, 'critic_loss': 160.69393144574082, 'actor_loss': 64.72886310822783, 'time_step': 0.017770203233462328, 'td_error': 60.80307888357478, 'init_value': -99.05088806152344, 'ave_value': -59.00084669681887} step=14364
2022-04-20 19:48.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420194415/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:48.53 [info     ] CQL_20220420194415: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.0003416154816833853, 'time_algorithm_update': 0.017287655183446337, 'temp_loss': 0.11310496054894743, 'temp': 0.3393440180354648, 'alpha_loss': 2.18638118048204, 'alpha': 2.0573990059177776, 'critic_loss': 160.33211082324647, 'actor_loss': 64.82504947283114, 'time_step': 0.017725651027166354, 'td_error': 61.24162199974521, 'init_value': -99.05110931396484, 'ave_value': -59.15975707592795} step=14706
2022-04-20 19:48.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420194415/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:48.59 [info     ] CQL_20220420194415: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.0003420002976356194, 'time_algorithm_update': 0.017402508802581252, 'temp_loss': 0.09133758132580166, 'temp': 0.3334157923508806, 'alpha_loss': 2.3231469912247524, 'alpha': 1.9948308398151955, 'critic_loss': 160.13021212572244, 'actor_loss': 64.92232650622987, 'time_step': 0.017840606427332113, 'td_error': 61.56995136281718, 'init_value': -98.91089630126953, 'ave_value': -59.28453820209984} step=15048
2022-04-20 19:48.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420194415/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:49.05 [info     ] CQL_20220420194415: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.00033995422006350513, 'time_algorithm_update': 0.017293780170686064, 'temp_loss': 0.07128590511359623, 'temp': 0.32821606526597896, 'alpha_loss': 2.427434815785559, 'alpha': 1.9344515786533467, 'critic_loss': 159.7912917109261, 'actor_loss': 64.96465429785655, 'time_step': 0.017730526059691668, 'td_error': 61.6615455470866, 'init_value': -98.7386474609375, 'ave_value': -59.404612139814475} step=15390
2022-04-20 19:49.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420194415/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:49.11 [info     ] CQL_20220420194415: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.0003494784148812991, 'time_algorithm_update': 0.017308270722104793, 'temp_loss': 0.06993983216632746, 'temp': 0.32335485962399263, 'alpha_loss': 2.4656325112715125, 'alpha': 1.8739196971843117, 'critic_loss': 159.23884714137742, 'actor_loss': 65.06245089971532, 'time_step': 0.01775313817966751, 'td_error': 61.993886781893124, 'init_value': -98.46148681640625, 'ave_value': -59.53158998552911} step=15732
2022-04-20 19:49.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420194415/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:49.18 [info     ] CQL_20220420194415: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.00034806184601365473, 'time_algorithm_update': 0.017336284905149227, 'temp_loss': 0.05544174802271726, 'temp': 0.31880396421541246, 'alpha_loss': 2.6293975043802233, 'alpha': 1.8131871031738862, 'critic_loss': 158.6803266067951, 'actor_loss': 65.14561504787869, 'time_step': 0.017782221063535813, 'td_error': 61.943685491021476, 'init_value': -98.01469421386719, 'ave_value': -59.34798842588893} step=16074
2022-04-20 19:49.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420194415/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:49.24 [info     ] CQL_20220420194415: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.0003438497844495271, 'time_algorithm_update': 0.017351968943724157, 'temp_loss': 0.040726127774231353, 'temp': 0.3145877573399516, 'alpha_loss': 2.6651757557220126, 'alpha': 1.7562558522001344, 'critic_loss': 158.33756874039855, 'actor_loss': 65.25945650067246, 'time_step': 0.017791347196924757, 'td_error': 62.49054924406728, 'init_value': -98.35443115234375, 'ave_value': -59.84077814967279} step=16416
2022-04-20 19:49.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420194415/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:49.30 [info     ] CQL_20220420194415: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.00034465706139280084, 'time_algorithm_update': 0.017444183951929995, 'temp_loss': 0.030594159290739144, 'temp': 0.3112530925294809, 'alpha_loss': 2.700815869898301, 'alpha': 1.7015729546546936, 'critic_loss': 157.63390899122808, 'actor_loss': 65.363321917796, 'time_step': 0.017884705498901723, 'td_error': 62.35995824854998, 'init_value': -97.65858459472656, 'ave_value': -59.37079208500516} step=16758
2022-04-20 19:49.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420194415/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:49.37 [info     ] CQL_20220420194415: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.00034745115982858757, 'time_algorithm_update': 0.017484370727985227, 'temp_loss': 0.027305627876228235, 'temp': 0.30808806157948676, 'alpha_loss': 2.751028048315723, 'alpha': 1.6488639363768505, 'critic_loss': 156.54431185806007, 'actor_loss': 65.39485692699053, 'time_step': 0.017925892657006694, 'td_error': 62.48003235041369, 'init_value': -97.74122619628906, 'ave_value': -59.74811368949644} step=17100
2022-04-20 19:49.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420194415/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 19:49.37 [info     ] FQE_20220420194937: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.0001422008836125753, 'time_algorithm_update': 0.0019994816148137472, 'loss': 0.008670582430414766, 'time_step': 0.002209450825151191, 'init_value': -0.28593379259109497, 'ave_value': -0.22289272780353958, 'soft_opc': nan} step=166




2022-04-20 19:49.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194937/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:49.38 [info     ] FQE_20220420194937: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00014014129179069795, 'time_algorithm_update': 0.001966329942266625, 'loss': 0.0067685332853369504, 'time_step': 0.002173769904906491, 'init_value': -0.4739774465560913, 'ave_value': -0.3474882481200201, 'soft_opc': nan} step=332




2022-04-20 19:49.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194937/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:49.38 [info     ] FQE_20220420194937: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00014170537511986423, 'time_algorithm_update': 0.0019469318619693618, 'loss': 0.0060502885622028485, 'time_step': 0.002149162522281509, 'init_value': -0.5240589380264282, 'ave_value': -0.35584245256341257, 'soft_opc': nan} step=498




2022-04-20 19:49.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194937/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:49.39 [info     ] FQE_20220420194937: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00014508345040930323, 'time_algorithm_update': 0.0020454389503203243, 'loss': 0.006105747330563524, 'time_step': 0.002256636159965791, 'init_value': -0.6456338167190552, 'ave_value': -0.43058557857398516, 'soft_opc': nan} step=664




2022-04-20 19:49.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194937/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:49.39 [info     ] FQE_20220420194937: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00014005368014416062, 'time_algorithm_update': 0.0019813675478280307, 'loss': 0.005861948673467499, 'time_step': 0.0021917216749076382, 'init_value': -0.7252370119094849, 'ave_value': -0.46583812585486484, 'soft_opc': nan} step=830




2022-04-20 19:49.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194937/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:49.39 [info     ] FQE_20220420194937: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00014111650995461336, 'time_algorithm_update': 0.001993870160665857, 'loss': 0.005764157191502401, 'time_step': 0.0021995995418134942, 'init_value': -0.7745923399925232, 'ave_value': -0.5016727642835798, 'soft_opc': nan} step=996




2022-04-20 19:49.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194937/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:49.40 [info     ] FQE_20220420194937: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.0001399545784456184, 'time_algorithm_update': 0.0019720304443175533, 'loss': 0.0057254418287516, 'time_step': 0.0021786761571125812, 'init_value': -0.8443881273269653, 'ave_value': -0.5321084052492101, 'soft_opc': nan} step=1162




2022-04-20 19:49.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194937/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:49.40 [info     ] FQE_20220420194937: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00014252547758171358, 'time_algorithm_update': 0.002037487834332937, 'loss': 0.005586725676499577, 'time_step': 0.0022471295781882413, 'init_value': -0.9326318502426147, 'ave_value': -0.5865484137322988, 'soft_opc': nan} step=1328




2022-04-20 19:49.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194937/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:49.41 [info     ] FQE_20220420194937: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00013813627771584383, 'time_algorithm_update': 0.0020036726112825326, 'loss': 0.005512844886159502, 'time_step': 0.002209970750004412, 'init_value': -0.985256552696228, 'ave_value': -0.5949540489865047, 'soft_opc': nan} step=1494




2022-04-20 19:49.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194937/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:49.41 [info     ] FQE_20220420194937: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00014087234634950938, 'time_algorithm_update': 0.0019644225936338126, 'loss': 0.005679024609243385, 'time_step': 0.002169563109616199, 'init_value': -1.0545498132705688, 'ave_value': -0.6136903003361579, 'soft_opc': nan} step=1660




2022-04-20 19:49.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194937/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:49.41 [info     ] FQE_20220420194937: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.0001430138047919216, 'time_algorithm_update': 0.00201951164797128, 'loss': 0.005705145820947252, 'time_step': 0.002229980675570936, 'init_value': -1.1096103191375732, 'ave_value': -0.6501892363333756, 'soft_opc': nan} step=1826




2022-04-20 19:49.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194937/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:49.42 [info     ] FQE_20220420194937: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00014215923217405756, 'time_algorithm_update': 0.002045584012226886, 'loss': 0.005736887160441782, 'time_step': 0.002257176192410021, 'init_value': -1.1184992790222168, 'ave_value': -0.6300300971311223, 'soft_opc': nan} step=1992




2022-04-20 19:49.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194937/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:49.42 [info     ] FQE_20220420194937: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00014300518725291793, 'time_algorithm_update': 0.0020094104560024768, 'loss': 0.005822619221997952, 'time_step': 0.0022182579500129424, 'init_value': -1.2102675437927246, 'ave_value': -0.6949749385015116, 'soft_opc': nan} step=2158




2022-04-20 19:49.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194937/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:49.43 [info     ] FQE_20220420194937: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00014202566031950065, 'time_algorithm_update': 0.0019607673208397553, 'loss': 0.005985967749701701, 'time_step': 0.002169478370482663, 'init_value': -1.3080580234527588, 'ave_value': -0.7418259897630091, 'soft_opc': nan} step=2324




2022-04-20 19:49.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194937/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:49.43 [info     ] FQE_20220420194937: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00014717894864369588, 'time_algorithm_update': 0.002005421971700278, 'loss': 0.006191648444699415, 'time_step': 0.0022215584674513482, 'init_value': -1.2806413173675537, 'ave_value': -0.697260075797503, 'soft_opc': nan} step=2490




2022-04-20 19:49.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194937/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:49.44 [info     ] FQE_20220420194937: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00014032369636627566, 'time_algorithm_update': 0.002008988196591297, 'loss': 0.006807141377680363, 'time_step': 0.0022148798747235036, 'init_value': -1.4287463426589966, 'ave_value': -0.8087356530465521, 'soft_opc': nan} step=2656




2022-04-20 19:49.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194937/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:49.44 [info     ] FQE_20220420194937: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00014336999640407333, 'time_algorithm_update': 0.0019866055752857624, 'loss': 0.007363904261939138, 'time_step': 0.0021974451570625766, 'init_value': -1.4687368869781494, 'ave_value': -0.8293987455333138, 'soft_opc': nan} step=2822




2022-04-20 19:49.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194937/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:49.44 [info     ] FQE_20220420194937: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.0001353499401046569, 'time_algorithm_update': 0.0018304643860782485, 'loss': 0.007481774579627013, 'time_step': 0.0020285370838211245, 'init_value': -1.4528127908706665, 'ave_value': -0.7690261357136675, 'soft_opc': nan} step=2988




2022-04-20 19:49.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194937/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:49.45 [info     ] FQE_20220420194937: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00014229998531111753, 'time_algorithm_update': 0.0020038420895496047, 'loss': 0.008031507572495794, 'time_step': 0.0022128346454666323, 'init_value': -1.544785737991333, 'ave_value': -0.8511197758452581, 'soft_opc': nan} step=3154




2022-04-20 19:49.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194937/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:49.45 [info     ] FQE_20220420194937: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00014276245990431453, 'time_algorithm_update': 0.0020147504576717517, 'loss': 0.008596860101665315, 'time_step': 0.002221956310502018, 'init_value': -1.663910984992981, 'ave_value': -0.9495579832879542, 'soft_opc': nan} step=3320




2022-04-20 19:49.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194937/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:49.46 [info     ] FQE_20220420194937: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.0001409010714795216, 'time_algorithm_update': 0.0019940109138029166, 'loss': 0.009362335049642631, 'time_step': 0.002202598445386772, 'init_value': -1.752885341644287, 'ave_value': -0.9889581874348559, 'soft_opc': nan} step=3486




2022-04-20 19:49.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194937/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:49.46 [info     ] FQE_20220420194937: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00014226982392460467, 'time_algorithm_update': 0.0019856820623558686, 'loss': 0.010121382442611572, 'time_step': 0.0021978042211877294, 'init_value': -1.8531107902526855, 'ave_value': -1.0929610189822343, 'soft_opc': nan} step=3652




2022-04-20 19:49.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194937/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:49.46 [info     ] FQE_20220420194937: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00014229998531111753, 'time_algorithm_update': 0.002002382852944983, 'loss': 0.010695737526409822, 'time_step': 0.002211193004286433, 'init_value': -1.9024317264556885, 'ave_value': -1.0861773012323424, 'soft_opc': nan} step=3818




2022-04-20 19:49.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194937/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:49.47 [info     ] FQE_20220420194937: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00014361272375267673, 'time_algorithm_update': 0.002025387373315283, 'loss': 0.011364147282368222, 'time_step': 0.002231182822261948, 'init_value': -1.9367858171463013, 'ave_value': -1.0792972828085359, 'soft_opc': nan} step=3984




2022-04-20 19:49.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194937/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:49.47 [info     ] FQE_20220420194937: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00014228849525911263, 'time_algorithm_update': 0.0020484795053321197, 'loss': 0.012207354026207005, 'time_step': 0.0022616357688444205, 'init_value': -2.029205799102783, 'ave_value': -1.1437088504165143, 'soft_opc': nan} step=4150




2022-04-20 19:49.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194937/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:49.48 [info     ] FQE_20220420194937: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00014055206114987293, 'time_algorithm_update': 0.002016183841659362, 'loss': 0.013151287245676371, 'time_step': 0.002222160258925105, 'init_value': -2.0742154121398926, 'ave_value': -1.1671195039072553, 'soft_opc': nan} step=4316




2022-04-20 19:49.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194937/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:49.48 [info     ] FQE_20220420194937: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00014413982988840127, 'time_algorithm_update': 0.0020223942147680075, 'loss': 0.01373518011266231, 'time_step': 0.002237118870378977, 'init_value': -2.1570322513580322, 'ave_value': -1.2184036268777139, 'soft_opc': nan} step=4482




2022-04-20 19:49.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194937/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:49.48 [info     ] FQE_20220420194937: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00014369889914271343, 'time_algorithm_update': 0.002017160496079778, 'loss': 0.015093444789765426, 'time_step': 0.002228703843541892, 'init_value': -2.2984795570373535, 'ave_value': -1.289656985873306, 'soft_opc': nan} step=4648




2022-04-20 19:49.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194937/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:49.49 [info     ] FQE_20220420194937: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00014212188950504166, 'time_algorithm_update': 0.0020422705684799746, 'loss': 0.015104979838396097, 'time_step': 0.0022550131901200995, 'init_value': -2.369354248046875, 'ave_value': -1.3047700678845784, 'soft_opc': nan} step=4814




2022-04-20 19:49.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194937/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:49.49 [info     ] FQE_20220420194937: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00014111794621111398, 'time_algorithm_update': 0.0019930428769215046, 'loss': 0.016236155389959598, 'time_step': 0.002201002764414592, 'init_value': -2.3942646980285645, 'ave_value': -1.326931866547009, 'soft_opc': nan} step=4980




2022-04-20 19:49.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194937/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:49.50 [info     ] FQE_20220420194937: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.0001453592116574207, 'time_algorithm_update': 0.001997183604412768, 'loss': 0.016671308911691635, 'time_step': 0.0022136992718800007, 'init_value': -2.4796981811523438, 'ave_value': -1.3663570077771001, 'soft_opc': nan} step=5146




2022-04-20 19:49.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194937/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:49.50 [info     ] FQE_20220420194937: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00014299369720091303, 'time_algorithm_update': 0.0019889911973332785, 'loss': 0.017592129145373017, 'time_step': 0.002198296857167439, 'init_value': -2.626798391342163, 'ave_value': -1.4785855905966716, 'soft_opc': nan} step=5312




2022-04-20 19:49.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194937/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:49.51 [info     ] FQE_20220420194937: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00014448165893554688, 'time_algorithm_update': 0.002028130623231451, 'loss': 0.019077478281042753, 'time_step': 0.002241908785808517, 'init_value': -2.650707483291626, 'ave_value': -1.4638403603689627, 'soft_opc': nan} step=5478




2022-04-20 19:49.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194937/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:49.51 [info     ] FQE_20220420194937: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00014276389616081514, 'time_algorithm_update': 0.002026725964373853, 'loss': 0.02033796981580174, 'time_step': 0.0022386484835521282, 'init_value': -2.78151535987854, 'ave_value': -1.52626160534921, 'soft_opc': nan} step=5644




2022-04-20 19:49.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194937/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:49.51 [info     ] FQE_20220420194937: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00014649385429290403, 'time_algorithm_update': 0.002039182617003659, 'loss': 0.021162122518177628, 'time_step': 0.0022520458841898353, 'init_value': -2.8365814685821533, 'ave_value': -1.5930878325222848, 'soft_opc': nan} step=5810




2022-04-20 19:49.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194937/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:49.52 [info     ] FQE_20220420194937: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00014581306871161404, 'time_algorithm_update': 0.0020617045551897533, 'loss': 0.022714201934601976, 'time_step': 0.0022740536425487103, 'init_value': -2.9034180641174316, 'ave_value': -1.576307583781513, 'soft_opc': nan} step=5976




2022-04-20 19:49.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194937/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:49.52 [info     ] FQE_20220420194937: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00014712580715317325, 'time_algorithm_update': 0.0020249995840601175, 'loss': 0.023825407575209994, 'time_step': 0.002245790987129671, 'init_value': -3.06964111328125, 'ave_value': -1.686486352241791, 'soft_opc': nan} step=6142




2022-04-20 19:49.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194937/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:49.53 [info     ] FQE_20220420194937: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00014936493103762707, 'time_algorithm_update': 0.0019924626292952574, 'loss': 0.024973569030407816, 'time_step': 0.002208872013781444, 'init_value': -3.0632760524749756, 'ave_value': -1.6473498768567503, 'soft_opc': nan} step=6308




2022-04-20 19:49.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194937/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:49.53 [info     ] FQE_20220420194937: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.0001448737569602139, 'time_algorithm_update': 0.0020180739552141673, 'loss': 0.02627307317761076, 'time_step': 0.0022316136992121317, 'init_value': -3.157599449157715, 'ave_value': -1.752186225569463, 'soft_opc': nan} step=6474




2022-04-20 19:49.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194937/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:49.53 [info     ] FQE_20220420194937: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00014523425734186746, 'time_algorithm_update': 0.002094905060457896, 'loss': 0.026997533480984617, 'time_step': 0.002308499382202884, 'init_value': -3.3701539039611816, 'ave_value': -1.9084808561783115, 'soft_opc': nan} step=6640




2022-04-20 19:49.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194937/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:49.54 [info     ] FQE_20220420194937: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00014479763536568148, 'time_algorithm_update': 0.002063777073320136, 'loss': 0.029336472530709017, 'time_step': 0.0022783523582550415, 'init_value': -3.3945045471191406, 'ave_value': -1.9406905520942297, 'soft_opc': nan} step=6806




2022-04-20 19:49.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194937/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:49.54 [info     ] FQE_20220420194937: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00014394737151731928, 'time_algorithm_update': 0.001987710056534733, 'loss': 0.03016822422147695, 'time_step': 0.0022008074335305087, 'init_value': -3.480780601501465, 'ave_value': -1.9921720036716613, 'soft_opc': nan} step=6972




2022-04-20 19:49.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194937/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:49.55 [info     ] FQE_20220420194937: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00014383103474076972, 'time_algorithm_update': 0.0019673051604305407, 'loss': 0.03045819865270372, 'time_step': 0.002178239535136395, 'init_value': -3.5930988788604736, 'ave_value': -2.0250853300463776, 'soft_opc': nan} step=7138




2022-04-20 19:49.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194937/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:49.55 [info     ] FQE_20220420194937: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00014189352472144436, 'time_algorithm_update': 0.0019685705024075797, 'loss': 0.03160764870980014, 'time_step': 0.0021792377334043204, 'init_value': -3.6613409519195557, 'ave_value': -2.0776300356396145, 'soft_opc': nan} step=7304




2022-04-20 19:49.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194937/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:49.56 [info     ] FQE_20220420194937: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00014988198337784732, 'time_algorithm_update': 0.0020855162517133966, 'loss': 0.03334041302518762, 'time_step': 0.0023065202207450406, 'init_value': -3.5925188064575195, 'ave_value': -1.9745229197447902, 'soft_opc': nan} step=7470




2022-04-20 19:49.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194937/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:49.56 [info     ] FQE_20220420194937: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.0001478137740169663, 'time_algorithm_update': 0.002102656536791698, 'loss': 0.03413665429717042, 'time_step': 0.0023175190730267262, 'init_value': -3.877639055252075, 'ave_value': -2.2003761799292856, 'soft_opc': nan} step=7636




2022-04-20 19:49.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194937/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:49.56 [info     ] FQE_20220420194937: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00014707841068865304, 'time_algorithm_update': 0.002007477254752653, 'loss': 0.03634634284240878, 'time_step': 0.002223923981907856, 'init_value': -3.8619468212127686, 'ave_value': -2.1457386418270903, 'soft_opc': nan} step=7802




2022-04-20 19:49.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194937/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:49.57 [info     ] FQE_20220420194937: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00014558326767151616, 'time_algorithm_update': 0.0020100337913237423, 'loss': 0.03840008041265993, 'time_step': 0.0022228309907108904, 'init_value': -3.9408838748931885, 'ave_value': -2.2134785343162915, 'soft_opc': nan} step=7968




2022-04-20 19:49.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194937/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:49.57 [info     ] FQE_20220420194937: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00014292188437588243, 'time_algorithm_update': 0.002027653786073248, 'loss': 0.03881092171089342, 'time_step': 0.0022386226309351175, 'init_value': -4.127072334289551, 'ave_value': -2.282972378933987, 'soft_opc': nan} step=8134




2022-04-20 19:49.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194937/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:49.58 [info     ] FQE_20220420194937: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00014382098094526543, 'time_algorithm_update': 0.0020662388169621847, 'loss': 0.03974348818462522, 'time_step': 0.0022789440959332936, 'init_value': -4.188939094543457, 'ave_value': -2.298030166007444, 'soft_opc': nan} step=8300




2022-04-20 19:49.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194937/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

start
[ 0.00000000e+00  7.95731469e+08 -4.38489108e-01  4.94000047e-02
 -1.56000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -7.49080829e-02  7.04145269e-02]
Read chunk # 40 out of 4999
torch.Size([44400, 6])
2022-04-20 19:49.58 [debug    ] RoundIterator is selected.
2022-04-20 19:49.58 [info     ] Directory is created at d3rlpy_logs/FQE_20220420194958
2022-04-20 19:49.58 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 19:49.58 [debug    ] Building models...
2022-04-20 19:49.58 [debug    ] Models have been built.
2022-04-20 19:49.58 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420194958/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 19:49.59 [info     ] FQE_20220420194958: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00015353186185969862, 'time_algorithm_update': 0.0020171251407889433, 'loss': 0.022762653585739954, 'time_step': 0.0022404644378395968, 'init_value': -1.1281737089157104, 'ave_value': -1.1481221810520232, 'soft_opc': nan} step=344




2022-04-20 19:49.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194958/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:50.00 [info     ] FQE_20220420194958: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00015461791393368742, 'time_algorithm_update': 0.0020559762799462608, 'loss': 0.0209716311276912, 'time_step': 0.0022818390713181605, 'init_value': -1.932733178138733, 'ave_value': -1.9601614123529143, 'soft_opc': nan} step=688




2022-04-20 19:50.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194958/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:50.01 [info     ] FQE_20220420194958: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.0001534071079520292, 'time_algorithm_update': 0.0020398948081704073, 'loss': 0.023744864516545002, 'time_step': 0.0022619720115218053, 'init_value': -2.890125036239624, 'ave_value': -2.9495489704984803, 'soft_opc': nan} step=1032




2022-04-20 19:50.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194958/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:50.02 [info     ] FQE_20220420194958: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00015545168588327807, 'time_algorithm_update': 0.002052302970442661, 'loss': 0.02613416282990731, 'time_step': 0.002274852852488673, 'init_value': -3.6338651180267334, 'ave_value': -3.695048528493525, 'soft_opc': nan} step=1376




2022-04-20 19:50.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194958/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:50.03 [info     ] FQE_20220420194958: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00015276878379112067, 'time_algorithm_update': 0.002029463995334714, 'loss': 0.03234396837814169, 'time_step': 0.0022517026856888173, 'init_value': -4.663681983947754, 'ave_value': -4.721709638055381, 'soft_opc': nan} step=1720




2022-04-20 19:50.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194958/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:50.04 [info     ] FQE_20220420194958: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00015260244524756142, 'time_algorithm_update': 0.0020159004732619883, 'loss': 0.03944736718048537, 'time_step': 0.0022405711717383807, 'init_value': -5.365715026855469, 'ave_value': -5.385046688930409, 'soft_opc': nan} step=2064




2022-04-20 19:50.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194958/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:50.04 [info     ] FQE_20220420194958: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.000153163144754809, 'time_algorithm_update': 0.002020700033320937, 'loss': 0.04830566495061354, 'time_step': 0.0022435257601183516, 'init_value': -6.379848480224609, 'ave_value': -6.345501528720598, 'soft_opc': nan} step=2408




2022-04-20 19:50.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194958/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:50.05 [info     ] FQE_20220420194958: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.0001547073209008505, 'time_algorithm_update': 0.002035542976024539, 'loss': 0.0616762021551082, 'time_step': 0.002259222574012224, 'init_value': -6.984360218048096, 'ave_value': -6.850286019492794, 'soft_opc': nan} step=2752




2022-04-20 19:50.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194958/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:50.06 [info     ] FQE_20220420194958: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.0001546511816423993, 'time_algorithm_update': 0.0020646543003791985, 'loss': 0.07443436772053585, 'time_step': 0.0022894788620083832, 'init_value': -7.582277297973633, 'ave_value': -7.402442682648564, 'soft_opc': nan} step=3096




2022-04-20 19:50.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194958/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:50.07 [info     ] FQE_20220420194958: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.0001524063043816145, 'time_algorithm_update': 0.0020142017408858897, 'loss': 0.09209658647616675, 'time_step': 0.0022357002247211546, 'init_value': -8.410165786743164, 'ave_value': -8.112133807454024, 'soft_opc': nan} step=3440




2022-04-20 19:50.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194958/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:50.08 [info     ] FQE_20220420194958: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00015419860218846522, 'time_algorithm_update': 0.0020857966223428418, 'loss': 0.10662207193672657, 'time_step': 0.0023120676362237266, 'init_value': -9.0355863571167, 'ave_value': -8.617917212629104, 'soft_opc': nan} step=3784




2022-04-20 19:50.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194958/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:50.09 [info     ] FQE_20220420194958: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00015177005945250045, 'time_algorithm_update': 0.002033299484918284, 'loss': 0.12496292270817469, 'time_step': 0.0022511877292810483, 'init_value': -9.75764274597168, 'ave_value': -9.303268144930804, 'soft_opc': nan} step=4128




2022-04-20 19:50.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194958/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:50.10 [info     ] FQE_20220420194958: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.0001527375953142033, 'time_algorithm_update': 0.0020477723243624664, 'loss': 0.14075400090917164, 'time_step': 0.0022688903087793393, 'init_value': -10.139805793762207, 'ave_value': -9.677760431331556, 'soft_opc': nan} step=4472




2022-04-20 19:50.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194958/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:50.10 [info     ] FQE_20220420194958: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.0001547073209008505, 'time_algorithm_update': 0.0020740683688673864, 'loss': 0.16017636360578932, 'time_step': 0.0022996865039648013, 'init_value': -10.73607063293457, 'ave_value': -10.273363443564724, 'soft_opc': nan} step=4816




2022-04-20 19:50.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194958/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:50.11 [info     ] FQE_20220420194958: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00015536990276602812, 'time_algorithm_update': 0.00205699787583462, 'loss': 0.1791489779743431, 'time_step': 0.0022848671258882034, 'init_value': -11.214296340942383, 'ave_value': -10.86498856332388, 'soft_opc': nan} step=5160




2022-04-20 19:50.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194958/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:50.12 [info     ] FQE_20220420194958: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.0001558730768602948, 'time_algorithm_update': 0.0020689236563305523, 'loss': 0.19744544824162974, 'time_step': 0.002295452494953954, 'init_value': -11.779611587524414, 'ave_value': -11.577799918549555, 'soft_opc': nan} step=5504




2022-04-20 19:50.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194958/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:50.13 [info     ] FQE_20220420194958: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.0001567338788232138, 'time_algorithm_update': 0.002114916956701944, 'loss': 0.21767112695297963, 'time_step': 0.0023411096528519033, 'init_value': -11.792165756225586, 'ave_value': -11.685976370041434, 'soft_opc': nan} step=5848




2022-04-20 19:50.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194958/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:50.14 [info     ] FQE_20220420194958: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00015110886374185251, 'time_algorithm_update': 0.0020556671674861467, 'loss': 0.23178411985552588, 'time_step': 0.002277789420859758, 'init_value': -11.946462631225586, 'ave_value': -11.994106125992698, 'soft_opc': nan} step=6192




2022-04-20 19:50.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194958/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:50.15 [info     ] FQE_20220420194958: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00015026469563328944, 'time_algorithm_update': 0.002035314260527145, 'loss': 0.25180707684837195, 'time_step': 0.002256997795992119, 'init_value': -12.342266082763672, 'ave_value': -12.414497911446803, 'soft_opc': nan} step=6536




2022-04-20 19:50.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194958/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:50.16 [info     ] FQE_20220420194958: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00015429840531460074, 'time_algorithm_update': 0.0020643645940824997, 'loss': 0.2726426949767872, 'time_step': 0.002289997976879741, 'init_value': -12.359037399291992, 'ave_value': -12.577292523727762, 'soft_opc': nan} step=6880




2022-04-20 19:50.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194958/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:50.16 [info     ] FQE_20220420194958: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00015395048052765602, 'time_algorithm_update': 0.0020244052243787187, 'loss': 0.29055045033415217, 'time_step': 0.002248831266580626, 'init_value': -12.673595428466797, 'ave_value': -13.033037779943363, 'soft_opc': nan} step=7224




2022-04-20 19:50.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194958/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:50.17 [info     ] FQE_20220420194958: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00015409671983053518, 'time_algorithm_update': 0.0020533619925033214, 'loss': 0.3073133795608788, 'time_step': 0.002277307732160701, 'init_value': -12.760416030883789, 'ave_value': -13.237547910052378, 'soft_opc': nan} step=7568




2022-04-20 19:50.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194958/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:50.18 [info     ] FQE_20220420194958: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00015469969705093737, 'time_algorithm_update': 0.002042095328486243, 'loss': 0.3335901981836921, 'time_step': 0.0022691509058309156, 'init_value': -13.14504623413086, 'ave_value': -13.56444131275555, 'soft_opc': nan} step=7912




2022-04-20 19:50.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194958/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:50.19 [info     ] FQE_20220420194958: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00015443424845850744, 'time_algorithm_update': 0.0021010602629461953, 'loss': 0.36483748114754466, 'time_step': 0.0023257212583408797, 'init_value': -13.871898651123047, 'ave_value': -14.375285106628866, 'soft_opc': nan} step=8256




2022-04-20 19:50.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194958/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:50.20 [info     ] FQE_20220420194958: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.0001541473144708678, 'time_algorithm_update': 0.00205610103385393, 'loss': 0.39344337753150177, 'time_step': 0.0022818148136138916, 'init_value': -13.688053131103516, 'ave_value': -14.397141116159457, 'soft_opc': nan} step=8600




2022-04-20 19:50.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194958/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:50.21 [info     ] FQE_20220420194958: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.0001529351223346799, 'time_algorithm_update': 0.002010048128837763, 'loss': 0.41849513936159743, 'time_step': 0.0022340881270031597, 'init_value': -14.102127075195312, 'ave_value': -14.938528890491606, 'soft_opc': nan} step=8944




2022-04-20 19:50.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194958/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:50.22 [info     ] FQE_20220420194958: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00015332255252571993, 'time_algorithm_update': 0.0020380283510962197, 'loss': 0.44570683323518306, 'time_step': 0.002262678257254667, 'init_value': -14.309232711791992, 'ave_value': -15.21486479387627, 'soft_opc': nan} step=9288




2022-04-20 19:50.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194958/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:50.22 [info     ] FQE_20220420194958: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00015650031178496605, 'time_algorithm_update': 0.0020622368468794713, 'loss': 0.46490035721014233, 'time_step': 0.002288120430569316, 'init_value': -14.623453140258789, 'ave_value': -15.617557104965588, 'soft_opc': nan} step=9632




2022-04-20 19:50.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194958/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:50.23 [info     ] FQE_20220420194958: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.0001534438410470652, 'time_algorithm_update': 0.002026444950769114, 'loss': 0.4960622600398871, 'time_step': 0.002249913160191026, 'init_value': -14.808637619018555, 'ave_value': -15.933076528547046, 'soft_opc': nan} step=9976




2022-04-20 19:50.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194958/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:50.24 [info     ] FQE_20220420194958: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00015717398288638094, 'time_algorithm_update': 0.0020835115466006967, 'loss': 0.5328879508752983, 'time_step': 0.0023123989271563155, 'init_value': -15.255160331726074, 'ave_value': -16.365357296853453, 'soft_opc': nan} step=10320




2022-04-20 19:50.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194958/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:50.25 [info     ] FQE_20220420194958: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00015430048454639523, 'time_algorithm_update': 0.0020621827868528143, 'loss': 0.5483708277312216, 'time_step': 0.0022887753885845806, 'init_value': -14.896208763122559, 'ave_value': -16.179611400441008, 'soft_opc': nan} step=10664




2022-04-20 19:50.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194958/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:50.26 [info     ] FQE_20220420194958: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00015474959861400516, 'time_algorithm_update': 0.0020351728727651197, 'loss': 0.5652186959158889, 'time_step': 0.0022611063580180325, 'init_value': -15.111209869384766, 'ave_value': -16.437612838626983, 'soft_opc': nan} step=11008




2022-04-20 19:50.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194958/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:50.27 [info     ] FQE_20220420194958: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00015735695528429607, 'time_algorithm_update': 0.002044873182163682, 'loss': 0.5816212804760611, 'time_step': 0.002273768186569214, 'init_value': -15.114234924316406, 'ave_value': -16.43107886743975, 'soft_opc': nan} step=11352




2022-04-20 19:50.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194958/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:50.28 [info     ] FQE_20220420194958: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00015826280726942906, 'time_algorithm_update': 0.0020297038000683452, 'loss': 0.6130857881386006, 'time_step': 0.002258120581161144, 'init_value': -15.71265983581543, 'ave_value': -17.04134704433046, 'soft_opc': nan} step=11696




2022-04-20 19:50.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194958/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:50.28 [info     ] FQE_20220420194958: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00015114421068235885, 'time_algorithm_update': 0.0020462586436160776, 'loss': 0.631650390060142, 'time_step': 0.0022678908913634542, 'init_value': -15.428430557250977, 'ave_value': -16.876199081674354, 'soft_opc': nan} step=12040




2022-04-20 19:50.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194958/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:50.29 [info     ] FQE_20220420194958: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00015231273895086243, 'time_algorithm_update': 0.0020188813985780227, 'loss': 0.6511206822578124, 'time_step': 0.002242542976556822, 'init_value': -15.818934440612793, 'ave_value': -17.270228901102737, 'soft_opc': nan} step=12384




2022-04-20 19:50.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194958/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:50.30 [info     ] FQE_20220420194958: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00015314304551412893, 'time_algorithm_update': 0.002065734807835069, 'loss': 0.6806948214639411, 'time_step': 0.0022887594478074895, 'init_value': -16.014196395874023, 'ave_value': -17.50150750241838, 'soft_opc': nan} step=12728




2022-04-20 19:50.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194958/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:50.31 [info     ] FQE_20220420194958: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00015150045239648155, 'time_algorithm_update': 0.0020533536755761436, 'loss': 0.6775231965936633, 'time_step': 0.0022752915703973106, 'init_value': -16.196041107177734, 'ave_value': -17.681104449968082, 'soft_opc': nan} step=13072




2022-04-20 19:50.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194958/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:50.32 [info     ] FQE_20220420194958: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00015538445738858954, 'time_algorithm_update': 0.0020463806252146877, 'loss': 0.7014225772690288, 'time_step': 0.002272779858389566, 'init_value': -16.06463050842285, 'ave_value': -17.76202179902309, 'soft_opc': nan} step=13416




2022-04-20 19:50.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194958/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:50.33 [info     ] FQE_20220420194958: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00015455553697985271, 'time_algorithm_update': 0.00201358767442925, 'loss': 0.7263357014839299, 'time_step': 0.0022387275862139327, 'init_value': -16.364654541015625, 'ave_value': -17.96838533432634, 'soft_opc': nan} step=13760




2022-04-20 19:50.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194958/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:50.34 [info     ] FQE_20220420194958: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00015508019646932913, 'time_algorithm_update': 0.00202274946279304, 'loss': 0.7491067336374071, 'time_step': 0.0022504246512124707, 'init_value': -16.446006774902344, 'ave_value': -18.067663673586672, 'soft_opc': nan} step=14104




2022-04-20 19:50.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194958/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:50.34 [info     ] FQE_20220420194958: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00015257125677064408, 'time_algorithm_update': 0.0020381066688271456, 'loss': 0.7637808098576876, 'time_step': 0.0022588649461435717, 'init_value': -17.05059051513672, 'ave_value': -18.644291325732393, 'soft_opc': nan} step=14448




2022-04-20 19:50.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194958/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:50.35 [info     ] FQE_20220420194958: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00015691615814386413, 'time_algorithm_update': 0.0020584290803864944, 'loss': 0.7943990463715827, 'time_step': 0.0022869324961373974, 'init_value': -17.215526580810547, 'ave_value': -18.756636358273997, 'soft_opc': nan} step=14792




2022-04-20 19:50.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194958/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:50.36 [info     ] FQE_20220420194958: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.000155399012011151, 'time_algorithm_update': 0.0020852296851402107, 'loss': 0.8146710161249652, 'time_step': 0.0023138349832490432, 'init_value': -17.186689376831055, 'ave_value': -18.87127935558826, 'soft_opc': nan} step=15136




2022-04-20 19:50.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194958/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:50.37 [info     ] FQE_20220420194958: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.0001542672168376834, 'time_algorithm_update': 0.0020846218563789543, 'loss': 0.8363665559134165, 'time_step': 0.0023092045340427133, 'init_value': -17.397207260131836, 'ave_value': -18.992380238787547, 'soft_opc': nan} step=15480




2022-04-20 19:50.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194958/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:50.38 [info     ] FQE_20220420194958: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.0001555909944135089, 'time_algorithm_update': 0.0020443998103918033, 'loss': 0.8594045081814794, 'time_step': 0.002273301745569983, 'init_value': -17.883453369140625, 'ave_value': -19.520169508510882, 'soft_opc': nan} step=15824




2022-04-20 19:50.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194958/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:50.39 [info     ] FQE_20220420194958: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00015375641889350358, 'time_algorithm_update': 0.002034339793892794, 'loss': 0.874310809999791, 'time_step': 0.00225927940634794, 'init_value': -18.087596893310547, 'ave_value': -19.68398377750371, 'soft_opc': nan} step=16168




2022-04-20 19:50.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194958/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:50.40 [info     ] FQE_20220420194958: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00015366631884907567, 'time_algorithm_update': 0.0020378432994665103, 'loss': 0.8944730942603201, 'time_step': 0.002260582391605821, 'init_value': -17.461917877197266, 'ave_value': -19.17799113043794, 'soft_opc': nan} step=16512




2022-04-20 19:50.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194958/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:50.40 [info     ] FQE_20220420194958: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00015849498815314713, 'time_algorithm_update': 0.0020676359187724977, 'loss': 0.8987192895674948, 'time_step': 0.0022959639859753984, 'init_value': -18.238632202148438, 'ave_value': -19.807325374757923, 'soft_opc': nan} step=16856




2022-04-20 19:50.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194958/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 19:50.41 [info     ] FQE_20220420194958: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00015694388123445733, 'time_algorithm_update': 0.0020654173784477765, 'loss': 0.9221604343850253, 'time_step': 0.0022938230703043383, 'init_value': -18.18419075012207, 'ave_value': -19.956386372769202, 'soft_opc': nan} step=17200




2022-04-20 19:50.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420194958/model_17200.pt
search iteration:  31
using hyper params:  [0.0010795492153408616, 0.005042980858030678, 5.345239143589602e-05, 7]
2022-04-20 19:50.41 [debug    ] RoundIterator is selected.
2022-04-20 19:50.41 [info     ] Directory is created at d3rlpy_logs/CQL_20220420195041
2022-04-20 19:50.41 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 19:50.41 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 19:50.41 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220420195041/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0010795492153408616, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, '

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:50.48 [info     ] CQL_20220420195041: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.0003798635382401316, 'time_algorithm_update': 0.01785649333083839, 'temp_loss': 4.515967501534356, 'temp': 0.9910769692638464, 'alpha_loss': -18.004354658182603, 'alpha': 1.0171847587440446, 'critic_loss': 54.240564441123205, 'actor_loss': 4.930232222491537, 'time_step': 0.01833921636057179, 'td_error': 4.773280774641118, 'init_value': -10.925324440002441, 'ave_value': -7.6300191403784465} step=342
2022-04-20 19:50.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195041/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:50.54 [info     ] CQL_20220420195041: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.00037596100255062704, 'time_algorithm_update': 0.017871094725982486, 'temp_loss': 4.240868754554213, 'temp': 0.9734323565025775, 'alpha_loss': -8.344819407714041, 'alpha': 1.0429901519016913, 'critic_loss': 29.334753025344938, 'actor_loss': 11.819385511833325, 'time_step': 0.018349077966478135, 'td_error': 5.965729344825002, 'init_value': -22.54681396484375, 'ave_value': -14.162003672175647} step=684
2022-04-20 19:50.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195041/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:51.01 [info     ] CQL_20220420195041: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.0003733614034820021, 'time_algorithm_update': 0.01782598620966861, 'temp_loss': 3.5348551447628536, 'temp': 0.9576252149908167, 'alpha_loss': -4.546618527139139, 'alpha': 1.0596367312453643, 'critic_loss': 43.529197341517396, 'actor_loss': 19.684820164016813, 'time_step': 0.018299462502462824, 'td_error': 8.57672406594574, 'init_value': -33.368804931640625, 'ave_value': -20.506886913881107} step=1026
2022-04-20 19:51.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195041/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:51.07 [info     ] CQL_20220420195041: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.00037144638641535885, 'time_algorithm_update': 0.01794810880694473, 'temp_loss': 3.0091769374602024, 'temp': 0.943392325737323, 'alpha_loss': -2.0178799389382247, 'alpha': 1.0710234262092768, 'critic_loss': 63.61117989835683, 'actor_loss': 27.105660934894406, 'time_step': 0.018417180630198697, 'td_error': 13.113745617414049, 'init_value': -44.63855743408203, 'ave_value': -27.253720698771446} step=1368
2022-04-20 19:51.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195041/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:51.14 [info     ] CQL_20220420195041: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.0003753419508013809, 'time_algorithm_update': 0.018016794968766774, 'temp_loss': 2.5989990359858464, 'temp': 0.9301956654989232, 'alpha_loss': 0.2161791181329322, 'alpha': 1.074876522460179, 'critic_loss': 87.0028260883532, 'actor_loss': 33.9683002226534, 'time_step': 0.018496868206046478, 'td_error': 18.049069053248747, 'init_value': -53.4741096496582, 'ave_value': -32.00516997368218} step=1710
2022-04-20 19:51.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195041/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:51.21 [info     ] CQL_20220420195041: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.0003752513238561084, 'time_algorithm_update': 0.018008806552106175, 'temp_loss': 2.2344551232823155, 'temp': 0.9178494401493965, 'alpha_loss': 2.114522365664611, 'alpha': 1.0681415036407829, 'critic_loss': 112.03675576817919, 'actor_loss': 40.12008856611642, 'time_step': 0.018483774006715296, 'td_error': 21.664247105627513, 'init_value': -60.9747428894043, 'ave_value': -36.62413768361012} step=2052
2022-04-20 19:51.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195041/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:51.27 [info     ] CQL_20220420195041: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.0003686822645845469, 'time_algorithm_update': 0.017788618628741704, 'temp_loss': 1.9235388730701648, 'temp': 0.9063037604267834, 'alpha_loss': 3.7383307469518563, 'alpha': 1.0489426969784743, 'critic_loss': 140.57084704839696, 'actor_loss': 45.777925368638066, 'time_step': 0.018253433774089254, 'td_error': 27.295421098241864, 'init_value': -71.46492004394531, 'ave_value': -43.21559820573072} step=2394
2022-04-20 19:51.27 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195041/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:51.34 [info     ] CQL_20220420195041: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.00037299262152777775, 'time_algorithm_update': 0.017949356670268098, 'temp_loss': 1.6508516027913456, 'temp': 0.8953367611121016, 'alpha_loss': 4.967714147261011, 'alpha': 1.019189721311045, 'critic_loss': 168.3242028888903, 'actor_loss': 50.88224282738758, 'time_step': 0.01841880424677977, 'td_error': 31.3275335234813, 'init_value': -76.5434341430664, 'ave_value': -46.06998803145848} step=2736
2022-04-20 19:51.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195041/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:51.40 [info     ] CQL_20220420195041: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.0003683030256750988, 'time_algorithm_update': 0.0178252939592328, 'temp_loss': 1.4321281413237255, 'temp': 0.8850285069287172, 'alpha_loss': 5.88711018520489, 'alpha': 0.9829096421163682, 'critic_loss': 196.4733527111031, 'actor_loss': 55.53987670363041, 'time_step': 0.018295815116480776, 'td_error': 36.512378646859055, 'init_value': -84.18839263916016, 'ave_value': -50.64421107686049} step=3078
2022-04-20 19:51.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195041/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:51.47 [info     ] CQL_20220420195041: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.00037912458007098636, 'time_algorithm_update': 0.01792106921212715, 'temp_loss': 1.2616464575828865, 'temp': 0.8750272379284016, 'alpha_loss': 6.5764336348974215, 'alpha': 0.9451350482583744, 'critic_loss': 222.85924592492177, 'actor_loss': 59.686310171383866, 'time_step': 0.01840125468739292, 'td_error': 39.15695929662978, 'init_value': -90.45808410644531, 'ave_value': -54.22088528119766} step=3420
2022-04-20 19:51.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195041/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:51.53 [info     ] CQL_20220420195041: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.00038097197549384935, 'time_algorithm_update': 0.01802560948489005, 'temp_loss': 1.107837725103947, 'temp': 0.8654210285136574, 'alpha_loss': 7.139881856260244, 'alpha': 0.907684318154876, 'critic_loss': 248.45229906227158, 'actor_loss': 63.54644506577163, 'time_step': 0.018507189220852323, 'td_error': 42.285883071246126, 'init_value': -93.84369659423828, 'ave_value': -57.122345298869654} step=3762
2022-04-20 19:51.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195041/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:52.00 [info     ] CQL_20220420195041: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.00037398742653473076, 'time_algorithm_update': 0.01802033151102345, 'temp_loss': 0.9650962345781382, 'temp': 0.8559963995950264, 'alpha_loss': 7.615350929617184, 'alpha': 0.8721337515359734, 'critic_loss': 271.2942155983016, 'actor_loss': 66.93073350783678, 'time_step': 0.018496694620589765, 'td_error': 45.55671590545797, 'init_value': -99.18862915039062, 'ave_value': -60.00019705106009} step=4104
2022-04-20 19:52.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195041/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:52.06 [info     ] CQL_20220420195041: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.000369030829758672, 'time_algorithm_update': 0.017830701599344176, 'temp_loss': 0.8154963477574594, 'temp': 0.8470404179472673, 'alpha_loss': 7.925586178985952, 'alpha': 0.8382989717157263, 'critic_loss': 294.6180084406981, 'actor_loss': 70.15835416247273, 'time_step': 0.018301141889471757, 'td_error': 49.247368794507246, 'init_value': -105.31013488769531, 'ave_value': -64.34548567988449} step=4446
2022-04-20 19:52.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195041/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:52.13 [info     ] CQL_20220420195041: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.0003727967278999195, 'time_algorithm_update': 0.017877765566284894, 'temp_loss': 0.6986491325480199, 'temp': 0.8387525653281407, 'alpha_loss': 8.00981212638275, 'alpha': 0.8067620312958433, 'critic_loss': 316.9987018429048, 'actor_loss': 72.82214329814353, 'time_step': 0.018348667356703017, 'td_error': 50.51561946210953, 'init_value': -108.72077941894531, 'ave_value': -65.56602193839983} step=4788
2022-04-20 19:52.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195041/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:52.19 [info     ] CQL_20220420195041: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.00037265381617852816, 'time_algorithm_update': 0.017680802540472378, 'temp_loss': 0.5757832259310094, 'temp': 0.8307487096702844, 'alpha_loss': 8.00852122083742, 'alpha': 0.7777433552240071, 'critic_loss': 335.9452053315458, 'actor_loss': 75.3373384196856, 'time_step': 0.01815528618661981, 'td_error': 52.13726851450129, 'init_value': -111.55348205566406, 'ave_value': -67.4138805340123} step=5130
2022-04-20 19:52.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195041/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:52.26 [info     ] CQL_20220420195041: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.00037391422784816453, 'time_algorithm_update': 0.017781956154003478, 'temp_loss': 0.4674551085683337, 'temp': 0.8235843082269033, 'alpha_loss': 8.02255852320041, 'alpha': 0.7501894286152913, 'critic_loss': 352.8368760493764, 'actor_loss': 77.43737612272564, 'time_step': 0.01825420410312407, 'td_error': 57.303240481323165, 'init_value': -114.09709167480469, 'ave_value': -69.42713102988698} step=5472
2022-04-20 19:52.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195041/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:52.32 [info     ] CQL_20220420195041: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.00037949057350381775, 'time_algorithm_update': 0.017685607859962864, 'temp_loss': 0.3693160843430904, 'temp': 0.816912875886549, 'alpha_loss': 7.9260146185668585, 'alpha': 0.7242700778252897, 'critic_loss': 368.2507637425473, 'actor_loss': 79.3397769593356, 'time_step': 0.018165079473752028, 'td_error': 53.72776954249127, 'init_value': -114.85888671875, 'ave_value': -69.9867933114853} step=5814
2022-04-20 19:52.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195041/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:52.38 [info     ] CQL_20220420195041: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.0003490113375479715, 'time_algorithm_update': 0.016308218415020503, 'temp_loss': 0.2913048212138707, 'temp': 0.8110859803637566, 'alpha_loss': 7.861924045964291, 'alpha': 0.6997567568606103, 'critic_loss': 381.0627071982936, 'actor_loss': 80.86535988077085, 'time_step': 0.01674783229827881, 'td_error': 55.34944699548646, 'init_value': -119.1354751586914, 'ave_value': -72.8807256554067} step=6156
2022-04-20 19:52.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195041/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:52.44 [info     ] CQL_20220420195041: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.0003759777336789851, 'time_algorithm_update': 0.01749057239956326, 'temp_loss': 0.2305992168826405, 'temp': 0.8058563761892374, 'alpha_loss': 7.790688488218519, 'alpha': 0.6763436707139713, 'critic_loss': 393.00023396809894, 'actor_loss': 82.4038512692814, 'time_step': 0.017968828915155423, 'td_error': 55.42677421038207, 'init_value': -120.8111343383789, 'ave_value': -74.33448431142949} step=6498
2022-04-20 19:52.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195041/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:52.51 [info     ] CQL_20220420195041: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.0003702173456113938, 'time_algorithm_update': 0.017660772591306453, 'temp_loss': 0.16516651668482357, 'temp': 0.801419096383435, 'alpha_loss': 7.663305073453669, 'alpha': 0.653970008530812, 'critic_loss': 401.6765111733598, 'actor_loss': 83.55939733494095, 'time_step': 0.01812911939899824, 'td_error': 54.2642287038163, 'init_value': -117.9761734008789, 'ave_value': -72.57135146509286} step=6840
2022-04-20 19:52.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195041/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:52.57 [info     ] CQL_20220420195041: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.00037354056598150245, 'time_algorithm_update': 0.01753734426888806, 'temp_loss': 0.12068263287737704, 'temp': 0.797690413674416, 'alpha_loss': 7.5472538750074065, 'alpha': 0.632547988877659, 'critic_loss': 409.19955274793836, 'actor_loss': 84.62894504390962, 'time_step': 0.018010879817761873, 'td_error': 55.087263040860606, 'init_value': -125.063720703125, 'ave_value': -77.24824217407284} step=7182
2022-04-20 19:52.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195041/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:53.04 [info     ] CQL_20220420195041: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.0003648619902761359, 'time_algorithm_update': 0.017640389894184313, 'temp_loss': 0.06617307115062984, 'temp': 0.7948433660972886, 'alpha_loss': 7.390284403025755, 'alpha': 0.6120572696652329, 'critic_loss': 417.0773179796007, 'actor_loss': 85.76209259033203, 'time_step': 0.018104224177131877, 'td_error': 53.09063342719033, 'init_value': -123.65986633300781, 'ave_value': -75.9448249838414} step=7524
2022-04-20 19:53.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195041/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:53.10 [info     ] CQL_20220420195041: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.0003715356190999349, 'time_algorithm_update': 0.01751416956472118, 'temp_loss': 0.023346197475510384, 'temp': 0.7932467096381717, 'alpha_loss': 7.341600482226812, 'alpha': 0.5923486968927216, 'critic_loss': 424.5029905441909, 'actor_loss': 86.69099105031867, 'time_step': 0.017985049744098508, 'td_error': 53.52612946488897, 'init_value': -122.85697174072266, 'ave_value': -77.16942453491527} step=7866
2022-04-20 19:53.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195041/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:53.17 [info     ] CQL_20220420195041: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.00037384660620438427, 'time_algorithm_update': 0.01760494499875788, 'temp_loss': 0.01159083738117016, 'temp': 0.7929069253436306, 'alpha_loss': 7.181837809713263, 'alpha': 0.5731908691208265, 'critic_loss': 431.4302254838553, 'actor_loss': 87.63130696614583, 'time_step': 0.01807917837511029, 'td_error': 52.41632333036896, 'init_value': -125.276611328125, 'ave_value': -78.6301439646994} step=8208
2022-04-20 19:53.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195041/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:53.23 [info     ] CQL_20220420195041: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.0003465748669808371, 'time_algorithm_update': 0.01674101018069083, 'temp_loss': 0.0020597861611355117, 'temp': 0.7926660683768535, 'alpha_loss': 7.007112610409831, 'alpha': 0.5550114364303343, 'critic_loss': 439.12245695772225, 'actor_loss': 88.35253747861985, 'time_step': 0.017178893089294434, 'td_error': 54.756336673379536, 'init_value': -127.45002746582031, 'ave_value': -78.58420244010719} step=8550
2022-04-20 19:53.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195041/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:53.29 [info     ] CQL_20220420195041: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.00036822146142435354, 'time_algorithm_update': 0.017470139509056046, 'temp_loss': -0.008267024194287974, 'temp': 0.7923236285385332, 'alpha_loss': 6.881142248187149, 'alpha': 0.5374118940174928, 'critic_loss': 443.4644828037909, 'actor_loss': 88.98408124600238, 'time_step': 0.01793648346125731, 'td_error': 54.73184479242687, 'init_value': -128.9930419921875, 'ave_value': -79.57313759713156} step=8892
2022-04-20 19:53.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195041/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:53.35 [info     ] CQL_20220420195041: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.00037185839045117473, 'time_algorithm_update': 0.017701880276551722, 'temp_loss': -0.03504008619245468, 'temp': 0.7936011648666091, 'alpha_loss': 6.7318733449567825, 'alpha': 0.520354467874382, 'critic_loss': 446.6809457700852, 'actor_loss': 89.48490704988178, 'time_step': 0.018172953561035512, 'td_error': 50.317513110216694, 'init_value': -125.68846130371094, 'ave_value': -77.92108458748541} step=9234
2022-04-20 19:53.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195041/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:53.42 [info     ] CQL_20220420195041: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.0003695718029089141, 'time_algorithm_update': 0.017635123074403285, 'temp_loss': -0.05596516898435633, 'temp': 0.7961346554128748, 'alpha_loss': 6.565831933105201, 'alpha': 0.5039640896850162, 'critic_loss': 450.9645170691418, 'actor_loss': 89.91973172014917, 'time_step': 0.018106164290891055, 'td_error': 55.55988567047522, 'init_value': -126.0142593383789, 'ave_value': -78.839322898694} step=9576
2022-04-20 19:53.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195041/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:53.48 [info     ] CQL_20220420195041: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.0003671855257268538, 'time_algorithm_update': 0.017601444707279316, 'temp_loss': -0.040753265799224725, 'temp': 0.7985292070093211, 'alpha_loss': 6.378520388352244, 'alpha': 0.48821352953799285, 'critic_loss': 454.6628528617279, 'actor_loss': 90.4638077362239, 'time_step': 0.018069256815993993, 'td_error': 53.729631572352986, 'init_value': -130.89706420898438, 'ave_value': -81.87889431866708} step=9918
2022-04-20 19:53.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195041/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:53.55 [info     ] CQL_20220420195041: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.00036985971774274143, 'time_algorithm_update': 0.017716647588718704, 'temp_loss': -0.019702141331127514, 'temp': 0.8006026094768479, 'alpha_loss': 6.205324688850091, 'alpha': 0.47298482984130147, 'critic_loss': 458.40943864075064, 'actor_loss': 90.96764099388791, 'time_step': 0.018186139781572665, 'td_error': 52.379418632911126, 'init_value': -129.25759887695312, 'ave_value': -81.84394683640753} step=10260
2022-04-20 19:53.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195041/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:54.01 [info     ] CQL_20220420195041: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.0003751321145665576, 'time_algorithm_update': 0.017800609967861956, 'temp_loss': -0.013064994705481487, 'temp': 0.8013457096459573, 'alpha_loss': 5.942435535771108, 'alpha': 0.4584909937186548, 'critic_loss': 460.88591949284427, 'actor_loss': 91.27559182239555, 'time_step': 0.018276506000094943, 'td_error': 52.5253724046183, 'init_value': -131.6478271484375, 'ave_value': -82.29624518284658} step=10602
2022-04-20 19:54.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195041/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:54.08 [info     ] CQL_20220420195041: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.00038173672748587984, 'time_algorithm_update': 0.01776915405228821, 'temp_loss': -0.030674220520284092, 'temp': 0.8024901986470696, 'alpha_loss': 5.78557805011147, 'alpha': 0.4445482495932551, 'critic_loss': 461.9042718006156, 'actor_loss': 91.4838087963082, 'time_step': 0.018250047114857455, 'td_error': 51.44876326147368, 'init_value': -128.67042541503906, 'ave_value': -81.20696657708434} step=10944
2022-04-20 19:54.08 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195041/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:54.14 [info     ] CQL_20220420195041: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.0003748588394700435, 'time_algorithm_update': 0.017777618608976666, 'temp_loss': -0.031996347222542555, 'temp': 0.8042592826293923, 'alpha_loss': 5.63713726802179, 'alpha': 0.4309335287377151, 'critic_loss': 461.72545164230974, 'actor_loss': 91.77834614536218, 'time_step': 0.018250464695936056, 'td_error': 53.33872089635255, 'init_value': -131.92843627929688, 'ave_value': -82.00065796644637} step=11286
2022-04-20 19:54.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195041/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:54.21 [info     ] CQL_20220420195041: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.000372814853288974, 'time_algorithm_update': 0.017792360127320765, 'temp_loss': -0.013063951019655195, 'temp': 0.805142619456464, 'alpha_loss': 5.343608209264208, 'alpha': 0.4179934400041201, 'critic_loss': 462.12095240542766, 'actor_loss': 91.94686610796298, 'time_step': 0.018265155323764735, 'td_error': 53.58587834558359, 'init_value': -130.0972442626953, 'ave_value': -82.6701668488288} step=11628
2022-04-20 19:54.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195041/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:54.27 [info     ] CQL_20220420195041: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.0003715147051894874, 'time_algorithm_update': 0.017750424948352123, 'temp_loss': 0.007733223104729638, 'temp': 0.8055050034969173, 'alpha_loss': 5.287420729447526, 'alpha': 0.40530922651639456, 'critic_loss': 460.27929143180626, 'actor_loss': 92.1435365955732, 'time_step': 0.018221091805842884, 'td_error': 52.113208800458146, 'init_value': -131.0598602294922, 'ave_value': -82.09997359809239} step=11970
2022-04-20 19:54.27 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195041/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:54.34 [info     ] CQL_20220420195041: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.00037873906698840403, 'time_algorithm_update': 0.017756324065359014, 'temp_loss': -0.002468735306409367, 'temp': 0.8053253981453633, 'alpha_loss': 5.010417527622646, 'alpha': 0.39304661489369574, 'critic_loss': 460.0493682504397, 'actor_loss': 92.1925690411127, 'time_step': 0.018234198553520337, 'td_error': 48.30536870045503, 'init_value': -126.19998931884766, 'ave_value': -82.95558359458103} step=12312
2022-04-20 19:54.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195041/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:54.40 [info     ] CQL_20220420195041: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.00036705376809103446, 'time_algorithm_update': 0.017692248723660296, 'temp_loss': 0.02776363284234502, 'temp': 0.804768430036411, 'alpha_loss': 4.867916989047625, 'alpha': 0.3812897803142057, 'critic_loss': 460.16172397903534, 'actor_loss': 92.31976543671904, 'time_step': 0.018159411804020753, 'td_error': 50.51337185023046, 'init_value': -127.64717102050781, 'ave_value': -82.63106885675911} step=12654
2022-04-20 19:54.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195041/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:54.47 [info     ] CQL_20220420195041: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.0003739469929745323, 'time_algorithm_update': 0.017685221649749935, 'temp_loss': 0.034658352000235815, 'temp': 0.8023816169353953, 'alpha_loss': 4.65240676709783, 'alpha': 0.3699157664127517, 'critic_loss': 460.8305826465986, 'actor_loss': 92.41389184249074, 'time_step': 0.018158563396386933, 'td_error': 51.12333583922665, 'init_value': -129.43495178222656, 'ave_value': -83.24036516683194} step=12996
2022-04-20 19:54.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195041/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:54.53 [info     ] CQL_20220420195041: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.0003732484683655856, 'time_algorithm_update': 0.01778195475974278, 'temp_loss': 0.022821315199311017, 'temp': 0.8012770173842447, 'alpha_loss': 4.49099514219496, 'alpha': 0.3589108572368733, 'critic_loss': 457.8995154308297, 'actor_loss': 92.44655709517629, 'time_step': 0.01825350000147234, 'td_error': 44.10328425110149, 'init_value': -122.09886169433594, 'ave_value': -81.33278840614332} step=13338
2022-04-20 19:54.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195041/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:55.00 [info     ] CQL_20220420195041: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.0003701253244054248, 'time_algorithm_update': 0.017787909647177535, 'temp_loss': 0.03854008851169843, 'temp': 0.7990874947860227, 'alpha_loss': 4.311033161760074, 'alpha': 0.3483166517744287, 'critic_loss': 457.3326446354738, 'actor_loss': 92.40295885320296, 'time_step': 0.01825816450063248, 'td_error': 49.69591455523839, 'init_value': -126.29731750488281, 'ave_value': -81.79934855194578} step=13680
2022-04-20 19:55.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195041/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:55.06 [info     ] CQL_20220420195041: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.00037396790688497977, 'time_algorithm_update': 0.01780301506756342, 'temp_loss': 0.014685741435104644, 'temp': 0.7976238610800247, 'alpha_loss': 4.132228510421619, 'alpha': 0.33798614475462174, 'critic_loss': 456.0996464957968, 'actor_loss': 92.45064711989018, 'time_step': 0.018276420950192458, 'td_error': 50.12175792704916, 'init_value': -125.7021713256836, 'ave_value': -83.16762277606774} step=14022
2022-04-20 19:55.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195041/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:55.13 [info     ] CQL_20220420195041: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.0003748706906859638, 'time_algorithm_update': 0.01790426906786467, 'temp_loss': 0.03888236405600232, 'temp': 0.7967711782943435, 'alpha_loss': 4.026821196427819, 'alpha': 0.32797051730908844, 'critic_loss': 454.81498049016585, 'actor_loss': 92.48932498240332, 'time_step': 0.018377575260853908, 'td_error': 49.500304392574265, 'init_value': -126.7799301147461, 'ave_value': -83.44302657877876} step=14364
2022-04-20 19:55.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195041/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:55.19 [info     ] CQL_20220420195041: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.0003770840795416581, 'time_algorithm_update': 0.017911916587784973, 'temp_loss': 0.07618861565399065, 'temp': 0.7936936597726498, 'alpha_loss': 3.8380517652857375, 'alpha': 0.3181979086315423, 'critic_loss': 453.30464208056355, 'actor_loss': 92.53289330911915, 'time_step': 0.018389623764662716, 'td_error': 48.3473315184747, 'init_value': -127.24394226074219, 'ave_value': -82.20590659961641} step=14706
2022-04-20 19:55.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195041/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:55.26 [info     ] CQL_20220420195041: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.00037389261680736876, 'time_algorithm_update': 0.017877526450575443, 'temp_loss': 0.07902208706963132, 'temp': 0.7889821867496647, 'alpha_loss': 3.7290485476889805, 'alpha': 0.30887183059028717, 'critic_loss': 452.3101441678945, 'actor_loss': 92.39067673264888, 'time_step': 0.01835046177021941, 'td_error': 47.221321256149885, 'init_value': -123.83290100097656, 'ave_value': -81.8709827917821} step=15048
2022-04-20 19:55.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195041/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:55.32 [info     ] CQL_20220420195041: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.0003744273157844767, 'time_algorithm_update': 0.017882227897644043, 'temp_loss': 0.09034491824912048, 'temp': 0.7843320737456718, 'alpha_loss': 3.577475909601178, 'alpha': 0.2996920160731377, 'critic_loss': 451.35634091025906, 'actor_loss': 92.36090273048445, 'time_step': 0.01835292473173978, 'td_error': 49.05816345864075, 'init_value': -128.35000610351562, 'ave_value': -83.36249333969376} step=15390
2022-04-20 19:55.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195041/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:55.39 [info     ] CQL_20220420195041: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.00037340253417254886, 'time_algorithm_update': 0.01770008028599254, 'temp_loss': 0.06574748472817112, 'temp': 0.7795699228320205, 'alpha_loss': 3.3848400617900647, 'alpha': 0.2909153702489117, 'critic_loss': 450.55934785541734, 'actor_loss': 92.39241614536932, 'time_step': 0.018172467661182783, 'td_error': 48.153017199449735, 'init_value': -124.98164367675781, 'ave_value': -83.10902419720752} step=15732
2022-04-20 19:55.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195041/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:55.45 [info     ] CQL_20220420195041: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.00036732146614476254, 'time_algorithm_update': 0.017773975405776708, 'temp_loss': 0.07219478219464334, 'temp': 0.7753389585785001, 'alpha_loss': 3.3180435045420773, 'alpha': 0.282410365621946, 'critic_loss': 447.81679592913355, 'actor_loss': 92.22888540524488, 'time_step': 0.01824285412392421, 'td_error': 47.246362304076044, 'init_value': -126.73051452636719, 'ave_value': -83.90127035114075} step=16074
2022-04-20 19:55.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195041/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:55.51 [info     ] CQL_20220420195041: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.0003686892358880294, 'time_algorithm_update': 0.017569242165102597, 'temp_loss': 0.09628095501783299, 'temp': 0.7704722569351308, 'alpha_loss': 3.1468599358497307, 'alpha': 0.2740823295381334, 'critic_loss': 445.9343359874703, 'actor_loss': 92.07980226215564, 'time_step': 0.018039814909996343, 'td_error': 45.56906013405441, 'init_value': -124.97883605957031, 'ave_value': -82.4501524716072} step=16416
2022-04-20 19:55.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195041/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:55.58 [info     ] CQL_20220420195041: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.0003733328211377239, 'time_algorithm_update': 0.017805478726214137, 'temp_loss': 0.0943440601218775, 'temp': 0.7659185453813676, 'alpha_loss': 3.037422962007467, 'alpha': 0.2660817304375576, 'critic_loss': 444.5159171478093, 'actor_loss': 92.05515463310375, 'time_step': 0.018278296927959597, 'td_error': 48.522304625047795, 'init_value': -122.9617919921875, 'ave_value': -82.88653031759367} step=16758
2022-04-20 19:55.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195041/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:56.04 [info     ] CQL_20220420195041: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.00037087055674770423, 'time_algorithm_update': 0.017678628190916184, 'temp_loss': 0.079868889430113, 'temp': 0.7607916520352949, 'alpha_loss': 2.9328735679910896, 'alpha': 0.25825248649942945, 'critic_loss': 442.93494830215184, 'actor_loss': 91.7775151436789, 'time_step': 0.018149005042182073, 'td_error': 46.7309011911356, 'init_value': -123.4647216796875, 'ave_value': -83.57668290262488} step=17100
2022-04-20 19:56.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195041/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.5191

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 19:56.05 [info     ] FQE_20220420195605: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00015619002192853446, 'time_algorithm_update': 0.0020920210574046673, 'loss': 0.006668243631552231, 'time_step': 0.0023181883685560113, 'init_value': -0.06104554235935211, 'ave_value': 0.012807712252545464, 'soft_opc': nan} step=166




2022-04-20 19:56.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195605/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:56.05 [info     ] FQE_20220420195605: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00014935056847262094, 'time_algorithm_update': 0.0020553764090480574, 'loss': 0.005462058216714895, 'time_step': 0.002271668020501194, 'init_value': -0.19518744945526123, 'ave_value': -0.06646260902713481, 'soft_opc': nan} step=332




2022-04-20 19:56.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195605/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:56.06 [info     ] FQE_20220420195605: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00014855631862778262, 'time_algorithm_update': 0.002088171889983028, 'loss': 0.004897528692676002, 'time_step': 0.002301601042230445, 'init_value': -0.2826805114746094, 'ave_value': -0.13048127039276936, 'soft_opc': nan} step=498




2022-04-20 19:56.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195605/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:56.06 [info     ] FQE_20220420195605: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.0001500615154404238, 'time_algorithm_update': 0.0020416386156197055, 'loss': 0.0049177697994353545, 'time_step': 0.0022603934069713913, 'init_value': -0.30994704365730286, 'ave_value': -0.11760245747738392, 'soft_opc': nan} step=664




2022-04-20 19:56.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195605/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:56.07 [info     ] FQE_20220420195605: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00014967372618525862, 'time_algorithm_update': 0.002026139971721603, 'loss': 0.004309146492517587, 'time_step': 0.0022380265844873636, 'init_value': -0.3820357024669647, 'ave_value': -0.1607674281062508, 'soft_opc': nan} step=830




2022-04-20 19:56.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195605/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:56.07 [info     ] FQE_20220420195605: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00015144893922001482, 'time_algorithm_update': 0.0020717928208500505, 'loss': 0.0039948261650110855, 'time_step': 0.0022911479674189925, 'init_value': -0.3971675932407379, 'ave_value': -0.15605798815670716, 'soft_opc': nan} step=996




2022-04-20 19:56.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195605/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:56.07 [info     ] FQE_20220420195605: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00015221877270434276, 'time_algorithm_update': 0.0020273708435426274, 'loss': 0.004007522197698344, 'time_step': 0.0022469701537166735, 'init_value': -0.469784140586853, 'ave_value': -0.19584008769157368, 'soft_opc': nan} step=1162




2022-04-20 19:56.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195605/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:56.08 [info     ] FQE_20220420195605: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00015080836882074196, 'time_algorithm_update': 0.0021146320434937992, 'loss': 0.0037114056197544896, 'time_step': 0.0023347010095435454, 'init_value': -0.5082318186759949, 'ave_value': -0.2095346319715719, 'soft_opc': nan} step=1328




2022-04-20 19:56.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195605/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:56.08 [info     ] FQE_20220420195605: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00015046941228659757, 'time_algorithm_update': 0.002035501491592591, 'loss': 0.003524802997568061, 'time_step': 0.0022523374442594596, 'init_value': -0.5530929565429688, 'ave_value': -0.24013816985922556, 'soft_opc': nan} step=1494




2022-04-20 19:56.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195605/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:56.09 [info     ] FQE_20220420195605: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00015632790255259318, 'time_algorithm_update': 0.0020954838718276426, 'loss': 0.003777397696226162, 'time_step': 0.0023210249751447194, 'init_value': -0.580604612827301, 'ave_value': -0.23623707699086013, 'soft_opc': nan} step=1660




2022-04-20 19:56.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195605/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:56.09 [info     ] FQE_20220420195605: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00015226473291236233, 'time_algorithm_update': 0.0020597569913749234, 'loss': 0.003573150150281523, 'time_step': 0.0022782992167645192, 'init_value': -0.6137087345123291, 'ave_value': -0.24108262042103856, 'soft_opc': nan} step=1826




2022-04-20 19:56.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195605/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:56.10 [info     ] FQE_20220420195605: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00015534837561917593, 'time_algorithm_update': 0.0021053523902433463, 'loss': 0.003676206313202674, 'time_step': 0.0023327505732157142, 'init_value': -0.6519801020622253, 'ave_value': -0.2575383833483667, 'soft_opc': nan} step=1992




2022-04-20 19:56.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195605/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:56.10 [info     ] FQE_20220420195605: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.0001521627587008189, 'time_algorithm_update': 0.0021267971360539816, 'loss': 0.0038883164763473064, 'time_step': 0.002347522471324507, 'init_value': -0.6761682033538818, 'ave_value': -0.2504554521460254, 'soft_opc': nan} step=2158




2022-04-20 19:56.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195605/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:56.11 [info     ] FQE_20220420195605: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.0001481398042426052, 'time_algorithm_update': 0.002031125218035227, 'loss': 0.003996085466716035, 'time_step': 0.0022459432303187357, 'init_value': -0.7749539613723755, 'ave_value': -0.3205302901381442, 'soft_opc': nan} step=2324




2022-04-20 19:56.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195605/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:56.11 [info     ] FQE_20220420195605: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00014875308576836643, 'time_algorithm_update': 0.0019941732107874857, 'loss': 0.004204653337435146, 'time_step': 0.002208421029240252, 'init_value': -0.7978368401527405, 'ave_value': -0.30865049691207974, 'soft_opc': nan} step=2490




2022-04-20 19:56.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195605/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:56.11 [info     ] FQE_20220420195605: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00015149059065853256, 'time_algorithm_update': 0.002040018518287015, 'loss': 0.00466121386046555, 'time_step': 0.002257885703121323, 'init_value': -0.8696287274360657, 'ave_value': -0.34979557029668007, 'soft_opc': nan} step=2656




2022-04-20 19:56.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195605/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:56.12 [info     ] FQE_20220420195605: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00014395598905632295, 'time_algorithm_update': 0.0019355409116630095, 'loss': 0.004877318731573668, 'time_step': 0.0021431503525699475, 'init_value': -0.9448926448822021, 'ave_value': -0.3868659669239712, 'soft_opc': nan} step=2822




2022-04-20 19:56.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195605/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:56.12 [info     ] FQE_20220420195605: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00014712437089667264, 'time_algorithm_update': 0.0020043605781463257, 'loss': 0.004926858971972602, 'time_step': 0.0022148525858499916, 'init_value': -0.997536301612854, 'ave_value': -0.4223841442625869, 'soft_opc': nan} step=2988




2022-04-20 19:56.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195605/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:56.13 [info     ] FQE_20220420195605: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00015133260244346527, 'time_algorithm_update': 0.002019999975181488, 'loss': 0.005375095739180828, 'time_step': 0.0022399727120456926, 'init_value': -1.0565009117126465, 'ave_value': -0.4463000314075265, 'soft_opc': nan} step=3154




2022-04-20 19:56.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195605/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:56.13 [info     ] FQE_20220420195605: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.0001508126775902438, 'time_algorithm_update': 0.0020402454468141117, 'loss': 0.0061132892046168626, 'time_step': 0.002257805272757289, 'init_value': -1.180890440940857, 'ave_value': -0.5345073297435233, 'soft_opc': nan} step=3320




2022-04-20 19:56.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195605/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:56.13 [info     ] FQE_20220420195605: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00015388339398855186, 'time_algorithm_update': 0.0020724850964833454, 'loss': 0.006500204102169857, 'time_step': 0.002293879727283156, 'init_value': -1.1885281801223755, 'ave_value': -0.5187393359563036, 'soft_opc': nan} step=3486




2022-04-20 19:56.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195605/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:56.14 [info     ] FQE_20220420195605: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00015307909034820925, 'time_algorithm_update': 0.002174933272671987, 'loss': 0.007005386539813445, 'time_step': 0.002401360546249941, 'init_value': -1.348721981048584, 'ave_value': -0.6170230012882064, 'soft_opc': nan} step=3652




2022-04-20 19:56.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195605/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:56.14 [info     ] FQE_20220420195605: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.0001591472740632942, 'time_algorithm_update': 0.0023584811084241754, 'loss': 0.007527823039577399, 'time_step': 0.0025871805397860975, 'init_value': -1.3988285064697266, 'ave_value': -0.6447378586197423, 'soft_opc': nan} step=3818




2022-04-20 19:56.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195605/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:56.15 [info     ] FQE_20220420195605: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00015027120888951313, 'time_algorithm_update': 0.002002042460154338, 'loss': 0.007657651140435924, 'time_step': 0.0022199527326836645, 'init_value': -1.4381064176559448, 'ave_value': -0.6664853274264281, 'soft_opc': nan} step=3984




2022-04-20 19:56.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195605/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:56.15 [info     ] FQE_20220420195605: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00014734411814126624, 'time_algorithm_update': 0.002047438219369176, 'loss': 0.008259558940098721, 'time_step': 0.0022617506693644695, 'init_value': -1.5405313968658447, 'ave_value': -0.7244250450551778, 'soft_opc': nan} step=4150




2022-04-20 19:56.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195605/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:56.16 [info     ] FQE_20220420195605: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00015189992376120695, 'time_algorithm_update': 0.0021004461380372563, 'loss': 0.00873107046621055, 'time_step': 0.0023206185145550466, 'init_value': -1.649731159210205, 'ave_value': -0.7863077555233529, 'soft_opc': nan} step=4316




2022-04-20 19:56.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195605/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:56.16 [info     ] FQE_20220420195605: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00014648523675390036, 'time_algorithm_update': 0.001999462943479239, 'loss': 0.009143559316857662, 'time_step': 0.0022127700139241047, 'init_value': -1.7011425495147705, 'ave_value': -0.8225112369565108, 'soft_opc': nan} step=4482




2022-04-20 19:56.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195605/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:56.16 [info     ] FQE_20220420195605: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00014534054032291276, 'time_algorithm_update': 0.001991879509156009, 'loss': 0.01004554555226813, 'time_step': 0.0022047054336731694, 'init_value': -1.7495472431182861, 'ave_value': -0.8021152634695927, 'soft_opc': nan} step=4648




2022-04-20 19:56.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195605/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:56.17 [info     ] FQE_20220420195605: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00015263241457651896, 'time_algorithm_update': 0.0020851313349712327, 'loss': 0.010462915350217372, 'time_step': 0.0023046487785247437, 'init_value': -1.8588188886642456, 'ave_value': -0.8821235069025493, 'soft_opc': nan} step=4814




2022-04-20 19:56.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195605/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:56.17 [info     ] FQE_20220420195605: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.0001529641898281603, 'time_algorithm_update': 0.0020417204822402403, 'loss': 0.010961219062117007, 'time_step': 0.0022629255271819702, 'init_value': -1.8952667713165283, 'ave_value': -0.9261827577572518, 'soft_opc': nan} step=4980




2022-04-20 19:56.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195605/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:56.18 [info     ] FQE_20220420195605: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00015253618539097798, 'time_algorithm_update': 0.00205871570541198, 'loss': 0.011150674501725709, 'time_step': 0.0022770798349954994, 'init_value': -1.9751591682434082, 'ave_value': -0.9364370839309518, 'soft_opc': nan} step=5146




2022-04-20 19:56.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195605/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:56.18 [info     ] FQE_20220420195605: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00014691898621708513, 'time_algorithm_update': 0.0019828253481761517, 'loss': 0.011619780074134582, 'time_step': 0.0021970702941159167, 'init_value': -2.0559964179992676, 'ave_value': -0.9865686278186134, 'soft_opc': nan} step=5312




2022-04-20 19:56.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195605/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:56.19 [info     ] FQE_20220420195605: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.0001465398145009236, 'time_algorithm_update': 0.0019778602094535367, 'loss': 0.013109227308599525, 'time_step': 0.0021915134177150496, 'init_value': -2.1024885177612305, 'ave_value': -0.996705337228287, 'soft_opc': nan} step=5478




2022-04-20 19:56.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195605/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:56.19 [info     ] FQE_20220420195605: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00015309632542621657, 'time_algorithm_update': 0.001972783042723874, 'loss': 0.013938556477870703, 'time_step': 0.002193340335983828, 'init_value': -2.194416046142578, 'ave_value': -1.0634321145855965, 'soft_opc': nan} step=5644




2022-04-20 19:56.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195605/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:56.19 [info     ] FQE_20220420195605: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00014865972909582667, 'time_algorithm_update': 0.0019977336906525024, 'loss': 0.014251606643034414, 'time_step': 0.0022096992975257964, 'init_value': -2.2694427967071533, 'ave_value': -1.1085893983440893, 'soft_opc': nan} step=5810




2022-04-20 19:56.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195605/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:56.20 [info     ] FQE_20220420195605: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.0001525634742644896, 'time_algorithm_update': 0.0021262786474572607, 'loss': 0.014833521888961634, 'time_step': 0.00234708297683532, 'init_value': -2.2767534255981445, 'ave_value': -1.0785810033075494, 'soft_opc': nan} step=5976




2022-04-20 19:56.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195605/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:56.20 [info     ] FQE_20220420195605: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00014730390295924912, 'time_algorithm_update': 0.0020466296069593316, 'loss': 0.015105061217615134, 'time_step': 0.002260206693626312, 'init_value': -2.378065824508667, 'ave_value': -1.1514888254852194, 'soft_opc': nan} step=6142




2022-04-20 19:56.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195605/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:56.21 [info     ] FQE_20220420195605: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00015185252729668674, 'time_algorithm_update': 0.0020858049392700195, 'loss': 0.015891973324069553, 'time_step': 0.0023052663688200065, 'init_value': -2.445024013519287, 'ave_value': -1.213675693534986, 'soft_opc': nan} step=6308




2022-04-20 19:56.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195605/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:56.21 [info     ] FQE_20220420195605: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00015418213534067912, 'time_algorithm_update': 0.0020359510398772827, 'loss': 0.01636631657504084, 'time_step': 0.0022544530500848608, 'init_value': -2.48240327835083, 'ave_value': -1.2089281990551515, 'soft_opc': nan} step=6474




2022-04-20 19:56.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195605/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:56.21 [info     ] FQE_20220420195605: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00014930029949509953, 'time_algorithm_update': 0.002048389021172581, 'loss': 0.017076095475686198, 'time_step': 0.0022653556731810054, 'init_value': -2.5422940254211426, 'ave_value': -1.23245137190312, 'soft_opc': nan} step=6640




2022-04-20 19:56.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195605/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:56.22 [info     ] FQE_20220420195605: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00015192434012171733, 'time_algorithm_update': 0.0021235741764666087, 'loss': 0.01803327170382983, 'time_step': 0.002345567726227174, 'init_value': -2.698787212371826, 'ave_value': -1.3644128048805066, 'soft_opc': nan} step=6806




2022-04-20 19:56.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195605/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:56.22 [info     ] FQE_20220420195605: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00015115594289389, 'time_algorithm_update': 0.0020636205213615693, 'loss': 0.01829611966512671, 'time_step': 0.002283767045262348, 'init_value': -2.7099850177764893, 'ave_value': -1.3517522954130294, 'soft_opc': nan} step=6972




2022-04-20 19:56.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195605/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:56.23 [info     ] FQE_20220420195605: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.0001537167882344809, 'time_algorithm_update': 0.002046834991638919, 'loss': 0.019621477058404183, 'time_step': 0.0022661642855908498, 'init_value': -2.7218096256256104, 'ave_value': -1.341329786223821, 'soft_opc': nan} step=7138




2022-04-20 19:56.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195605/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:56.23 [info     ] FQE_20220420195605: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00014848306954625142, 'time_algorithm_update': 0.002031116600496223, 'loss': 0.020058467244331062, 'time_step': 0.0022480272385011235, 'init_value': -2.8234238624572754, 'ave_value': -1.4355279777089955, 'soft_opc': nan} step=7304




2022-04-20 19:56.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195605/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:56.24 [info     ] FQE_20220420195605: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00015284928930811136, 'time_algorithm_update': 0.002023583435150514, 'loss': 0.02086249609892986, 'time_step': 0.0022428150636604033, 'init_value': -2.816936492919922, 'ave_value': -1.387286214700791, 'soft_opc': nan} step=7470




2022-04-20 19:56.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195605/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:56.24 [info     ] FQE_20220420195605: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.0001524184123579278, 'time_algorithm_update': 0.0020934113536972597, 'loss': 0.02151567663110972, 'time_step': 0.0023134688296950006, 'init_value': -2.9633889198303223, 'ave_value': -1.5168111068830061, 'soft_opc': nan} step=7636




2022-04-20 19:56.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195605/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:56.24 [info     ] FQE_20220420195605: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00015034876674054618, 'time_algorithm_update': 0.0020015713680221372, 'loss': 0.022641254641558885, 'time_step': 0.0022160834576710163, 'init_value': -3.0606164932250977, 'ave_value': -1.5864680071344581, 'soft_opc': nan} step=7802




2022-04-20 19:56.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195605/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:56.25 [info     ] FQE_20220420195605: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.0001445577805300793, 'time_algorithm_update': 0.0019285563963005342, 'loss': 0.022556164517230075, 'time_step': 0.0021399632993950903, 'init_value': -3.2064459323883057, 'ave_value': -1.6960156108407924, 'soft_opc': nan} step=7968




2022-04-20 19:56.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195605/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:56.25 [info     ] FQE_20220420195605: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.0001556126468152885, 'time_algorithm_update': 0.00204401992889772, 'loss': 0.021799052026156472, 'time_step': 0.0022654461573405438, 'init_value': -3.227735996246338, 'ave_value': -1.6873047459211938, 'soft_opc': nan} step=8134




2022-04-20 19:56.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195605/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 19:56.26 [info     ] FQE_20220420195605: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.0001528076378695936, 'time_algorithm_update': 0.0020946752594177983, 'loss': 0.024901114002358258, 'time_step': 0.0023182759802025483, 'init_value': -3.3060507774353027, 'ave_value': -1.788594883322917, 'soft_opc': nan} step=8300




2022-04-20 19:56.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195605/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

2022-04-20 19:56.26 [info     ] Directory is created at d3rlpy_logs/FQE_20220420195626
2022-04-20 19:56.26 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 19:56.26 [debug    ] Building models...
2022-04-20 19:56.26 [debug    ] Models have been built.
2022-04-20 19:56.26 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420195626/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/355 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 19:56.27 [info     ] FQE_20220420195626: epoch=1 step=355 epoch=1 metrics={'time_sample_batch': 0.00015170943569129623, 'time_algorithm_update': 0.0021776300081065, 'loss': 0.022401029204713627, 'time_step': 0.0023986111224537166, 'init_value': -1.4163247346878052, 'ave_value': -1.4144102594260237, 'soft_opc': nan} step=355




2022-04-20 19:56.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195626/model_355.pt


Epoch 2/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:56.28 [info     ] FQE_20220420195626: epoch=2 step=710 epoch=2 metrics={'time_sample_batch': 0.00014857305607325594, 'time_algorithm_update': 0.002054492520614409, 'loss': 0.022073336467671564, 'time_step': 0.002269599135492889, 'init_value': -2.6414966583251953, 'ave_value': -2.6611897388173498, 'soft_opc': nan} step=710




2022-04-20 19:56.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195626/model_710.pt


Epoch 3/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:56.29 [info     ] FQE_20220420195626: epoch=3 step=1065 epoch=3 metrics={'time_sample_batch': 0.00014933599552638096, 'time_algorithm_update': 0.00201607690730565, 'loss': 0.025778597370314765, 'time_step': 0.0022329310296286997, 'init_value': -3.2838635444641113, 'ave_value': -3.2575358682464297, 'soft_opc': nan} step=1065




2022-04-20 19:56.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195626/model_1065.pt


Epoch 4/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:56.29 [info     ] FQE_20220420195626: epoch=4 step=1420 epoch=4 metrics={'time_sample_batch': 0.00015119028763032296, 'time_algorithm_update': 0.0020109143055660624, 'loss': 0.03165758988544555, 'time_step': 0.002230593184350242, 'init_value': -4.428343296051025, 'ave_value': -4.357778263092041, 'soft_opc': nan} step=1420




2022-04-20 19:56.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195626/model_1420.pt


Epoch 5/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:56.30 [info     ] FQE_20220420195626: epoch=5 step=1775 epoch=5 metrics={'time_sample_batch': 0.0001511439471177652, 'time_algorithm_update': 0.0020459799699380364, 'loss': 0.03854739020355571, 'time_step': 0.0022660450196601975, 'init_value': -5.083241939544678, 'ave_value': -4.976926954802092, 'soft_opc': nan} step=1775




2022-04-20 19:56.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195626/model_1775.pt


Epoch 6/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:56.31 [info     ] FQE_20220420195626: epoch=6 step=2130 epoch=6 metrics={'time_sample_batch': 0.0001510552957024373, 'time_algorithm_update': 0.002027082443237305, 'loss': 0.04749332556999485, 'time_step': 0.0022498157662405095, 'init_value': -5.9754133224487305, 'ave_value': -5.809068127856752, 'soft_opc': nan} step=2130




2022-04-20 19:56.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195626/model_2130.pt


Epoch 7/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:56.32 [info     ] FQE_20220420195626: epoch=7 step=2485 epoch=7 metrics={'time_sample_batch': 0.00015001162676744057, 'time_algorithm_update': 0.0020392471635845345, 'loss': 0.05609003804151861, 'time_step': 0.002260826674985214, 'init_value': -6.638028621673584, 'ave_value': -6.472576690794112, 'soft_opc': nan} step=2485




2022-04-20 19:56.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195626/model_2485.pt


Epoch 8/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:56.33 [info     ] FQE_20220420195626: epoch=8 step=2840 epoch=8 metrics={'time_sample_batch': 0.00015352947611204336, 'time_algorithm_update': 0.0020685128762688437, 'loss': 0.06679737035604849, 'time_step': 0.0022918311642928864, 'init_value': -7.420497894287109, 'ave_value': -7.2750199979489985, 'soft_opc': nan} step=2840




2022-04-20 19:56.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195626/model_2840.pt


Epoch 9/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:56.34 [info     ] FQE_20220420195626: epoch=9 step=3195 epoch=9 metrics={'time_sample_batch': 0.0001505818165523905, 'time_algorithm_update': 0.002026577398810588, 'loss': 0.07688128220205995, 'time_step': 0.0022464235064009546, 'init_value': -7.988958835601807, 'ave_value': -7.894878164374844, 'soft_opc': nan} step=3195




2022-04-20 19:56.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195626/model_3195.pt


Epoch 10/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:56.35 [info     ] FQE_20220420195626: epoch=10 step=3550 epoch=10 metrics={'time_sample_batch': 0.00015229171430560906, 'time_algorithm_update': 0.0021128358975262707, 'loss': 0.08923936600626355, 'time_step': 0.0023346007709771814, 'init_value': -8.517870903015137, 'ave_value': -8.50109387386106, 'soft_opc': nan} step=3550




2022-04-20 19:56.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195626/model_3550.pt


Epoch 11/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:56.36 [info     ] FQE_20220420195626: epoch=11 step=3905 epoch=11 metrics={'time_sample_batch': 0.00015053480443820148, 'time_algorithm_update': 0.0020103817254724637, 'loss': 0.09997167918096546, 'time_step': 0.0022301546284850217, 'init_value': -9.482893943786621, 'ave_value': -9.561255217212201, 'soft_opc': nan} step=3905




2022-04-20 19:56.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195626/model_3905.pt


Epoch 12/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:56.36 [info     ] FQE_20220420195626: epoch=12 step=4260 epoch=12 metrics={'time_sample_batch': 0.00015239245455029985, 'time_algorithm_update': 0.002010589921978158, 'loss': 0.11606621618684329, 'time_step': 0.0022306187052122304, 'init_value': -9.823014259338379, 'ave_value': -9.982347187695202, 'soft_opc': nan} step=4260




2022-04-20 19:56.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195626/model_4260.pt


Epoch 13/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:56.37 [info     ] FQE_20220420195626: epoch=13 step=4615 epoch=13 metrics={'time_sample_batch': 0.00015126080580160652, 'time_algorithm_update': 0.0020823814499546104, 'loss': 0.13150099927270917, 'time_step': 0.00230094076881946, 'init_value': -10.413859367370605, 'ave_value': -10.717171266548421, 'soft_opc': nan} step=4615




2022-04-20 19:56.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195626/model_4615.pt


Epoch 14/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:56.38 [info     ] FQE_20220420195626: epoch=14 step=4970 epoch=14 metrics={'time_sample_batch': 0.0001532191961583957, 'time_algorithm_update': 0.002020579324641698, 'loss': 0.1443735572102834, 'time_step': 0.002243275037953551, 'init_value': -10.691618919372559, 'ave_value': -11.146440529792619, 'soft_opc': nan} step=4970




2022-04-20 19:56.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195626/model_4970.pt


Epoch 15/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:56.39 [info     ] FQE_20220420195626: epoch=15 step=5325 epoch=15 metrics={'time_sample_batch': 0.0001537249121867435, 'time_algorithm_update': 0.002147859922597106, 'loss': 0.15995462805190136, 'time_step': 0.0023715596803477114, 'init_value': -11.258722305297852, 'ave_value': -11.95853829144511, 'soft_opc': nan} step=5325




2022-04-20 19:56.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195626/model_5325.pt


Epoch 16/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:56.40 [info     ] FQE_20220420195626: epoch=16 step=5680 epoch=16 metrics={'time_sample_batch': 0.0001508746348636251, 'time_algorithm_update': 0.001998810029365647, 'loss': 0.1785248404001476, 'time_step': 0.002218492266157983, 'init_value': -11.526693344116211, 'ave_value': -12.381493311897975, 'soft_opc': nan} step=5680




2022-04-20 19:56.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195626/model_5680.pt


Epoch 17/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:56.41 [info     ] FQE_20220420195626: epoch=17 step=6035 epoch=17 metrics={'time_sample_batch': 0.00015103313284860531, 'time_algorithm_update': 0.0020222952668095978, 'loss': 0.18989076505506963, 'time_step': 0.00224124680102711, 'init_value': -11.739031791687012, 'ave_value': -12.883673234642549, 'soft_opc': nan} step=6035




2022-04-20 19:56.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195626/model_6035.pt


Epoch 18/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:56.42 [info     ] FQE_20220420195626: epoch=18 step=6390 epoch=18 metrics={'time_sample_batch': 0.0001502198232731349, 'time_algorithm_update': 0.0020380543990873956, 'loss': 0.1979634110278017, 'time_step': 0.0022557117569614463, 'init_value': -11.908585548400879, 'ave_value': -13.207408741971854, 'soft_opc': nan} step=6390




2022-04-20 19:56.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195626/model_6390.pt


Epoch 19/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:56.43 [info     ] FQE_20220420195626: epoch=19 step=6745 epoch=19 metrics={'time_sample_batch': 0.00014917212472835057, 'time_algorithm_update': 0.0020387649536132814, 'loss': 0.21143605292473042, 'time_step': 0.002256965637207031, 'init_value': -12.182961463928223, 'ave_value': -13.823487331262733, 'soft_opc': nan} step=6745




2022-04-20 19:56.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195626/model_6745.pt


Epoch 20/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:56.43 [info     ] FQE_20220420195626: epoch=20 step=7100 epoch=20 metrics={'time_sample_batch': 0.0001543703213543959, 'time_algorithm_update': 0.0020897368310203016, 'loss': 0.22405508869183316, 'time_step': 0.002313239137891313, 'init_value': -12.463888168334961, 'ave_value': -14.212406848541711, 'soft_opc': nan} step=7100




2022-04-20 19:56.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195626/model_7100.pt


Epoch 21/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:56.44 [info     ] FQE_20220420195626: epoch=21 step=7455 epoch=21 metrics={'time_sample_batch': 0.00015143743703063107, 'time_algorithm_update': 0.0020647727267842897, 'loss': 0.23820145614707555, 'time_step': 0.002286218589460346, 'init_value': -12.716570854187012, 'ave_value': -14.668469853750988, 'soft_opc': nan} step=7455




2022-04-20 19:56.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195626/model_7455.pt


Epoch 22/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:56.45 [info     ] FQE_20220420195626: epoch=22 step=7810 epoch=22 metrics={'time_sample_batch': 0.0001519478542703978, 'time_algorithm_update': 0.002076034142937459, 'loss': 0.25372368945202356, 'time_step': 0.002299629802435217, 'init_value': -13.104494094848633, 'ave_value': -15.325880585091149, 'soft_opc': nan} step=7810




2022-04-20 19:56.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195626/model_7810.pt


Epoch 23/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:56.46 [info     ] FQE_20220420195626: epoch=23 step=8165 epoch=23 metrics={'time_sample_batch': 0.00015159862142213635, 'time_algorithm_update': 0.0020639050174766863, 'loss': 0.26662280606237093, 'time_step': 0.0022858518949696715, 'init_value': -13.513463020324707, 'ave_value': -15.929240724049327, 'soft_opc': nan} step=8165




2022-04-20 19:56.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195626/model_8165.pt


Epoch 24/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:56.47 [info     ] FQE_20220420195626: epoch=24 step=8520 epoch=24 metrics={'time_sample_batch': 0.00014907474249181612, 'time_algorithm_update': 0.002009145978470923, 'loss': 0.2765529349105249, 'time_step': 0.002226646853164888, 'init_value': -13.699934005737305, 'ave_value': -16.315385913572715, 'soft_opc': nan} step=8520




2022-04-20 19:56.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195626/model_8520.pt


Epoch 25/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:56.48 [info     ] FQE_20220420195626: epoch=25 step=8875 epoch=25 metrics={'time_sample_batch': 0.00015280078834211322, 'time_algorithm_update': 0.0020366863465644943, 'loss': 0.28872387324210624, 'time_step': 0.002257518365349568, 'init_value': -14.023401260375977, 'ave_value': -16.79547795092584, 'soft_opc': nan} step=8875




2022-04-20 19:56.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195626/model_8875.pt


Epoch 26/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:56.49 [info     ] FQE_20220420195626: epoch=26 step=9230 epoch=26 metrics={'time_sample_batch': 0.0001547255986173388, 'time_algorithm_update': 0.0020659802665173167, 'loss': 0.30458916204286296, 'time_step': 0.0022891387133531166, 'init_value': -14.238442420959473, 'ave_value': -17.28783658336335, 'soft_opc': nan} step=9230




2022-04-20 19:56.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195626/model_9230.pt


Epoch 27/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:56.50 [info     ] FQE_20220420195626: epoch=27 step=9585 epoch=27 metrics={'time_sample_batch': 0.00015464232001506108, 'time_algorithm_update': 0.0021551575459225077, 'loss': 0.31120700546553437, 'time_step': 0.0023794154046287, 'init_value': -14.118539810180664, 'ave_value': -17.473158953180643, 'soft_opc': nan} step=9585




2022-04-20 19:56.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195626/model_9585.pt


Epoch 28/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:56.50 [info     ] FQE_20220420195626: epoch=28 step=9940 epoch=28 metrics={'time_sample_batch': 0.00015354156494140625, 'time_algorithm_update': 0.002061988266421036, 'loss': 0.3167708831511333, 'time_step': 0.0022836000147000166, 'init_value': -14.173646926879883, 'ave_value': -17.812315048336828, 'soft_opc': nan} step=9940




2022-04-20 19:56.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195626/model_9940.pt


Epoch 29/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:56.52 [info     ] FQE_20220420195626: epoch=29 step=10295 epoch=29 metrics={'time_sample_batch': 0.00016814621401504731, 'time_algorithm_update': 0.002476615637121066, 'loss': 0.3250714897584747, 'time_step': 0.0027251317467488034, 'init_value': -14.22829532623291, 'ave_value': -18.04611815586974, 'soft_opc': nan} step=10295




2022-04-20 19:56.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195626/model_10295.pt


Epoch 30/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:56.53 [info     ] FQE_20220420195626: epoch=30 step=10650 epoch=30 metrics={'time_sample_batch': 0.00017114693010357064, 'time_algorithm_update': 0.0025371994770748514, 'loss': 0.33537032591362653, 'time_step': 0.0027908237887100435, 'init_value': -14.670129776000977, 'ave_value': -18.678758861107255, 'soft_opc': nan} step=10650




2022-04-20 19:56.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195626/model_10650.pt


Epoch 31/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:56.54 [info     ] FQE_20220420195626: epoch=31 step=11005 epoch=31 metrics={'time_sample_batch': 0.0001611964803346446, 'time_algorithm_update': 0.002311652814838248, 'loss': 0.347815501099636, 'time_step': 0.002545380256545376, 'init_value': -14.814126968383789, 'ave_value': -19.005177439409792, 'soft_opc': nan} step=11005




2022-04-20 19:56.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195626/model_11005.pt


Epoch 32/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:56.55 [info     ] FQE_20220420195626: epoch=32 step=11360 epoch=32 metrics={'time_sample_batch': 0.00016584127721652178, 'time_algorithm_update': 0.002418633581886829, 'loss': 0.3520653974999424, 'time_step': 0.002662802414155342, 'init_value': -14.948076248168945, 'ave_value': -19.44016114005404, 'soft_opc': nan} step=11360




2022-04-20 19:56.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195626/model_11360.pt


Epoch 33/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:56.56 [info     ] FQE_20220420195626: epoch=33 step=11715 epoch=33 metrics={'time_sample_batch': 0.00016394601741307217, 'time_algorithm_update': 0.0023809506859577877, 'loss': 0.3576378859579563, 'time_step': 0.002622750779272805, 'init_value': -15.023161888122559, 'ave_value': -19.94510050553306, 'soft_opc': nan} step=11715




2022-04-20 19:56.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195626/model_11715.pt


Epoch 34/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:56.57 [info     ] FQE_20220420195626: epoch=34 step=12070 epoch=34 metrics={'time_sample_batch': 0.00016126901331082197, 'time_algorithm_update': 0.0023472866541902784, 'loss': 0.3727375251227911, 'time_step': 0.0025832384405001787, 'init_value': -15.174665451049805, 'ave_value': -20.20808414222344, 'soft_opc': nan} step=12070




2022-04-20 19:56.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195626/model_12070.pt


Epoch 35/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:56.58 [info     ] FQE_20220420195626: epoch=35 step=12425 epoch=35 metrics={'time_sample_batch': 0.00016627446026869222, 'time_algorithm_update': 0.002412920938411229, 'loss': 0.3778232265867188, 'time_step': 0.0026554060653901437, 'init_value': -15.434332847595215, 'ave_value': -20.657248557948698, 'soft_opc': nan} step=12425




2022-04-20 19:56.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195626/model_12425.pt


Epoch 36/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:56.59 [info     ] FQE_20220420195626: epoch=36 step=12780 epoch=36 metrics={'time_sample_batch': 0.0001615463847845373, 'time_algorithm_update': 0.002382827812517193, 'loss': 0.38601788766944495, 'time_step': 0.002621007301438023, 'init_value': -15.484369277954102, 'ave_value': -20.913475648439377, 'soft_opc': nan} step=12780




2022-04-20 19:56.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195626/model_12780.pt


Epoch 37/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:57.00 [info     ] FQE_20220420195626: epoch=37 step=13135 epoch=37 metrics={'time_sample_batch': 0.00016355313045877807, 'time_algorithm_update': 0.00244872939418739, 'loss': 0.39685223340253595, 'time_step': 0.002687599289585167, 'init_value': -15.472807884216309, 'ave_value': -21.148561029191796, 'soft_opc': nan} step=13135




2022-04-20 19:57.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195626/model_13135.pt


Epoch 38/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:57.01 [info     ] FQE_20220420195626: epoch=38 step=13490 epoch=38 metrics={'time_sample_batch': 0.0001605893524599747, 'time_algorithm_update': 0.002340520267755213, 'loss': 0.41106014925671713, 'time_step': 0.002576732635498047, 'init_value': -15.512656211853027, 'ave_value': -21.38470090687505, 'soft_opc': nan} step=13490




2022-04-20 19:57.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195626/model_13490.pt


Epoch 39/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:57.02 [info     ] FQE_20220420195626: epoch=39 step=13845 epoch=39 metrics={'time_sample_batch': 0.00015939793116609815, 'time_algorithm_update': 0.0023063820852360253, 'loss': 0.41782949838367567, 'time_step': 0.00253902556191028, 'init_value': -15.450693130493164, 'ave_value': -21.664337405182682, 'soft_opc': nan} step=13845




2022-04-20 19:57.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195626/model_13845.pt


Epoch 40/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:57.03 [info     ] FQE_20220420195626: epoch=40 step=14200 epoch=40 metrics={'time_sample_batch': 0.00016692188424123844, 'time_algorithm_update': 0.002410686519783987, 'loss': 0.42583757495953584, 'time_step': 0.002657201928152165, 'init_value': -15.669937133789062, 'ave_value': -22.033037170050534, 'soft_opc': nan} step=14200




2022-04-20 19:57.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195626/model_14200.pt


Epoch 41/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:57.04 [info     ] FQE_20220420195626: epoch=41 step=14555 epoch=41 metrics={'time_sample_batch': 0.00016812069315305897, 'time_algorithm_update': 0.002459374280043051, 'loss': 0.4408741093935891, 'time_step': 0.0027079374017849774, 'init_value': -15.898205757141113, 'ave_value': -22.367342669653922, 'soft_opc': nan} step=14555




2022-04-20 19:57.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195626/model_14555.pt


Epoch 42/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:57.05 [info     ] FQE_20220420195626: epoch=42 step=14910 epoch=42 metrics={'time_sample_batch': 0.00016271900123273822, 'time_algorithm_update': 0.002301144264113735, 'loss': 0.44882909813502303, 'time_step': 0.002539541351963097, 'init_value': -15.778481483459473, 'ave_value': -22.525014309195786, 'soft_opc': nan} step=14910




2022-04-20 19:57.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195626/model_14910.pt


Epoch 43/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:57.06 [info     ] FQE_20220420195626: epoch=43 step=15265 epoch=43 metrics={'time_sample_batch': 0.000162948017389002, 'time_algorithm_update': 0.0023067494513283313, 'loss': 0.46161944647122855, 'time_step': 0.0025486684181320837, 'init_value': -15.866157531738281, 'ave_value': -22.866286629108544, 'soft_opc': nan} step=15265




2022-04-20 19:57.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195626/model_15265.pt


Epoch 44/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:57.06 [info     ] FQE_20220420195626: epoch=44 step=15620 epoch=44 metrics={'time_sample_batch': 0.0001556107695673553, 'time_algorithm_update': 0.0021968028914760534, 'loss': 0.47379007891974817, 'time_step': 0.0024233435241269394, 'init_value': -16.00762367248535, 'ave_value': -23.274397149356023, 'soft_opc': nan} step=15620




2022-04-20 19:57.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195626/model_15620.pt


Epoch 45/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:57.08 [info     ] FQE_20220420195626: epoch=45 step=15975 epoch=45 metrics={'time_sample_batch': 0.00016962508080710828, 'time_algorithm_update': 0.0024645758346772528, 'loss': 0.4868550802043206, 'time_step': 0.002714783708814164, 'init_value': -16.189542770385742, 'ave_value': -23.60196955319077, 'soft_opc': nan} step=15975




2022-04-20 19:57.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195626/model_15975.pt


Epoch 46/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:57.09 [info     ] FQE_20220420195626: epoch=46 step=16330 epoch=46 metrics={'time_sample_batch': 0.00016600783442107724, 'time_algorithm_update': 0.0024043586892141425, 'loss': 0.5037407886840298, 'time_step': 0.0026452870436117683, 'init_value': -16.086688995361328, 'ave_value': -23.617242412769656, 'soft_opc': nan} step=16330




2022-04-20 19:57.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195626/model_16330.pt


Epoch 47/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:57.09 [info     ] FQE_20220420195626: epoch=47 step=16685 epoch=47 metrics={'time_sample_batch': 0.00015821994190484706, 'time_algorithm_update': 0.002254387358544578, 'loss': 0.5240034877886655, 'time_step': 0.0024844606157759545, 'init_value': -16.39992332458496, 'ave_value': -24.246822386857318, 'soft_opc': nan} step=16685




2022-04-20 19:57.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195626/model_16685.pt


Epoch 48/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:57.10 [info     ] FQE_20220420195626: epoch=48 step=17040 epoch=48 metrics={'time_sample_batch': 0.00016041607923910652, 'time_algorithm_update': 0.002286694755016918, 'loss': 0.5377821964015004, 'time_step': 0.0025207089706205986, 'init_value': -16.68680191040039, 'ave_value': -24.497496805334297, 'soft_opc': nan} step=17040




2022-04-20 19:57.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195626/model_17040.pt


Epoch 49/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:57.11 [info     ] FQE_20220420195626: epoch=49 step=17395 epoch=49 metrics={'time_sample_batch': 0.00016711664871430732, 'time_algorithm_update': 0.002407982651616486, 'loss': 0.5501093194911807, 'time_step': 0.0026520480572337834, 'init_value': -16.592472076416016, 'ave_value': -24.47380697736984, 'soft_opc': nan} step=17395




2022-04-20 19:57.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195626/model_17395.pt


Epoch 50/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 19:57.12 [info     ] FQE_20220420195626: epoch=50 step=17750 epoch=50 metrics={'time_sample_batch': 0.00015983984503947513, 'time_algorithm_update': 0.002255192608900473, 'loss': 0.5535935154630685, 'time_step': 0.0024891947356747907, 'init_value': -16.381044387817383, 'ave_value': -24.322726694475307, 'soft_opc': nan} step=17750




2022-04-20 19:57.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420195626/model_17750.pt
search iteration:  32
using hyper params:  [0.003450552729230812, 0.003484417075142001, 7.19681761181051e-05, 5]
2022-04-20 19:57.12 [debug    ] RoundIterator is selected.
2022-04-20 19:57.12 [info     ] Directory is created at d3rlpy_logs/CQL_20220420195712
2022-04-20 19:57.12 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 19:57.12 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 19:57.12 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220420195712/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.003450552729230812, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'wei

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:57.20 [info     ] CQL_20220420195712: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.00036250778109009503, 'time_algorithm_update': 0.01985852132763779, 'temp_loss': 4.564137422550491, 'temp': 0.987985814001128, 'alpha_loss': -17.201099992495532, 'alpha': 1.017261256948549, 'critic_loss': 47.53286260192157, 'actor_loss': 3.1856421677515523, 'time_step': 0.02032406148854752, 'td_error': 3.997183889251927, 'init_value': -7.3857879638671875, 'ave_value': -5.211154651457595} step=342
2022-04-20 19:57.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195712/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:57.27 [info     ] CQL_20220420195712: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.0003632334937826235, 'time_algorithm_update': 0.019222235819052535, 'temp_loss': 4.252000322118837, 'temp': 0.9646277488672246, 'alpha_loss': -9.670279006511844, 'alpha': 1.0452635002415083, 'critic_loss': 27.444008609704802, 'actor_loss': 8.051302236423158, 'time_step': 0.019690982082433868, 'td_error': 4.893155270431742, 'init_value': -16.9443416595459, 'ave_value': -10.55930076606018} step=684
2022-04-20 19:57.27 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195712/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:57.33 [info     ] CQL_20220420195712: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.00035268103170116045, 'time_algorithm_update': 0.018848734989500883, 'temp_loss': 3.5610310940714607, 'temp': 0.9435395570526346, 'alpha_loss': -5.637296479347854, 'alpha': 1.06495197614034, 'critic_loss': 34.44254686679059, 'actor_loss': 13.728477768033569, 'time_step': 0.01930235561571623, 'td_error': 6.082679940869235, 'init_value': -24.490846633911133, 'ave_value': -14.422399023322573} step=1026
2022-04-20 19:57.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195712/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:57.40 [info     ] CQL_20220420195712: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.0003582734113548234, 'time_algorithm_update': 0.018442271048562567, 'temp_loss': 3.0566715985013726, 'temp': 0.9246304748002548, 'alpha_loss': -3.2850560824773467, 'alpha': 1.0803477771100942, 'critic_loss': 46.75944271422269, 'actor_loss': 19.309203549435264, 'time_step': 0.01889962689918384, 'td_error': 8.365756592490998, 'init_value': -32.54244613647461, 'ave_value': -19.139905802067872} step=1368
2022-04-20 19:57.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195712/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:57.47 [info     ] CQL_20220420195712: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.0003495404594822934, 'time_algorithm_update': 0.018131080426667865, 'temp_loss': 2.6198941664389004, 'temp': 0.9071947994985079, 'alpha_loss': -1.2057249048123633, 'alpha': 1.0899746079891048, 'critic_loss': 63.671230974253156, 'actor_loss': 24.45163871809753, 'time_step': 0.018578725948668363, 'td_error': 11.565411060007083, 'init_value': -40.5036735534668, 'ave_value': -23.5120673931578} step=1710
2022-04-20 19:57.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195712/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:57.53 [info     ] CQL_20220420195712: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.0003502815090424833, 'time_algorithm_update': 0.018058092273466768, 'temp_loss': 2.26523172437099, 'temp': 0.8910112971799415, 'alpha_loss': 0.47481505722569967, 'alpha': 1.0916595106933549, 'critic_loss': 83.3201964640478, 'actor_loss': 29.268801677993864, 'time_step': 0.01850844893539161, 'td_error': 15.32383508734935, 'init_value': -47.144805908203125, 'ave_value': -26.939053366167453} step=2052
2022-04-20 19:57.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195712/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:58.00 [info     ] CQL_20220420195712: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.00034763241371913264, 'time_algorithm_update': 0.01800079722153513, 'temp_loss': 1.9744134831149676, 'temp': 0.8757673029662573, 'alpha_loss': 2.0021450543035453, 'alpha': 1.0834240069863392, 'critic_loss': 102.41040873388101, 'actor_loss': 33.779823537458455, 'time_step': 0.01845042538224605, 'td_error': 18.623814691847322, 'init_value': -54.16588592529297, 'ave_value': -31.218360772287255} step=2394
2022-04-20 19:58.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195712/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:58.06 [info     ] CQL_20220420195712: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.00034663830584252787, 'time_algorithm_update': 0.01799088472511336, 'temp_loss': 1.70398731956705, 'temp': 0.8613572615629052, 'alpha_loss': 3.3265463174783694, 'alpha': 1.0634061223582219, 'critic_loss': 122.33373307345207, 'actor_loss': 38.01448769039578, 'time_step': 0.018435177050138776, 'td_error': 21.899161424877875, 'init_value': -60.17280197143555, 'ave_value': -34.547317153944356} step=2736
2022-04-20 19:58.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195712/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:58.13 [info     ] CQL_20220420195712: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.0003495167570504529, 'time_algorithm_update': 0.017690170578092163, 'temp_loss': 1.4433017526453698, 'temp': 0.8479091317681541, 'alpha_loss': 4.419570144156964, 'alpha': 1.0325870371004293, 'critic_loss': 142.28441943341528, 'actor_loss': 41.88021073146173, 'time_step': 0.018135638264884724, 'td_error': 25.332632074159058, 'init_value': -66.4974365234375, 'ave_value': -37.87910144463853} step=3078
2022-04-20 19:58.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195712/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:58.19 [info     ] CQL_20220420195712: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.00035328614084344164, 'time_algorithm_update': 0.017594978126168948, 'temp_loss': 1.2238759551829064, 'temp': 0.83533319395188, 'alpha_loss': 5.361273769746747, 'alpha': 0.9955519397704922, 'critic_loss': 160.1203544572083, 'actor_loss': 45.58289109614857, 'time_step': 0.01804454981932166, 'td_error': 29.504302141600412, 'init_value': -70.80278015136719, 'ave_value': -41.19058433306781} step=3420
2022-04-20 19:58.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195712/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:58.26 [info     ] CQL_20220420195712: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.00034743303443953306, 'time_algorithm_update': 0.017638330571135584, 'temp_loss': 1.025560376849788, 'temp': 0.8236848215262095, 'alpha_loss': 6.274662282034668, 'alpha': 0.9555512929868977, 'critic_loss': 179.97309911181355, 'actor_loss': 49.202136859559175, 'time_step': 0.018086624424359952, 'td_error': 32.009359084358735, 'init_value': -76.34761047363281, 'ave_value': -44.50997244132867} step=3762
2022-04-20 19:58.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195712/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:58.32 [info     ] CQL_20220420195712: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.00035049971084148566, 'time_algorithm_update': 0.017391208319636118, 'temp_loss': 0.8796808829433039, 'temp': 0.8127075099108512, 'alpha_loss': 6.784965058516341, 'alpha': 0.9163445581121055, 'critic_loss': 200.55405484026636, 'actor_loss': 52.607936870284945, 'time_step': 0.017836459198890375, 'td_error': 36.67671743838124, 'init_value': -81.68263244628906, 'ave_value': -47.336811574144406} step=4104
2022-04-20 19:58.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195712/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:58.38 [info     ] CQL_20220420195712: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.00034724271785446076, 'time_algorithm_update': 0.01743591110608731, 'temp_loss': 0.7534618056569881, 'temp': 0.8021423306032928, 'alpha_loss': 7.217190568210088, 'alpha': 0.8796248859480807, 'critic_loss': 221.61894078840288, 'actor_loss': 55.7378201066402, 'time_step': 0.017881413649397288, 'td_error': 42.75822960689554, 'init_value': -86.63230895996094, 'ave_value': -50.35681579690259} step=4446
2022-04-20 19:58.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195712/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:58.45 [info     ] CQL_20220420195712: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.0003464668117768583, 'time_algorithm_update': 0.017465128536112824, 'temp_loss': 0.576242298955283, 'temp': 0.7925313967361785, 'alpha_loss': 7.8312027956310075, 'alpha': 0.8446715165648544, 'critic_loss': 240.8021142970749, 'actor_loss': 58.75651256940518, 'time_step': 0.017911687929030747, 'td_error': 46.14504890734695, 'init_value': -90.020751953125, 'ave_value': -52.5316949278331} step=4788
2022-04-20 19:58.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195712/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:58.51 [info     ] CQL_20220420195712: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.0003498088546663697, 'time_algorithm_update': 0.01748358785060414, 'temp_loss': 0.5094927067035123, 'temp': 0.7837784025404189, 'alpha_loss': 7.945366731164051, 'alpha': 0.811626524604552, 'critic_loss': 262.65193774128517, 'actor_loss': 61.55049566079301, 'time_step': 0.017930078227617587, 'td_error': 46.82773867102167, 'init_value': -93.29237365722656, 'ave_value': -53.90673317275069} step=5130
2022-04-20 19:58.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195712/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:58.58 [info     ] CQL_20220420195712: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.00034691924937287267, 'time_algorithm_update': 0.017868354306583515, 'temp_loss': 0.35493407481246525, 'temp': 0.7758470730126252, 'alpha_loss': 8.338805756373713, 'alpha': 0.7809984614277443, 'critic_loss': 282.96766934757346, 'actor_loss': 64.2461627937897, 'time_step': 0.018314505181117366, 'td_error': 48.14906680651613, 'init_value': -99.63529968261719, 'ave_value': -57.5493229514441} step=5472
2022-04-20 19:58.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195712/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:59.04 [info     ] CQL_20220420195712: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.00034781227334898117, 'time_algorithm_update': 0.017575323233130383, 'temp_loss': 0.26016052588493677, 'temp': 0.7691819254766431, 'alpha_loss': 8.566395634099058, 'alpha': 0.7521991654794816, 'critic_loss': 302.9862726668865, 'actor_loss': 66.87002154121622, 'time_step': 0.01802254977979158, 'td_error': 53.19752218667105, 'init_value': -101.70579528808594, 'ave_value': -58.47077035662693} step=5814
2022-04-20 19:59.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195712/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:59.10 [info     ] CQL_20220420195712: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.0003462165419818365, 'time_algorithm_update': 0.01764302086411861, 'temp_loss': 0.12242299861312785, 'temp': 0.7647945652579704, 'alpha_loss': 8.970979252056768, 'alpha': 0.724132803274177, 'critic_loss': 322.80661528291756, 'actor_loss': 69.18626369230928, 'time_step': 0.0180842151418764, 'td_error': 57.93138516990172, 'init_value': -108.08792877197266, 'ave_value': -61.77841743285726} step=6156
2022-04-20 19:59.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195712/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:59.17 [info     ] CQL_20220420195712: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.00035189327440763777, 'time_algorithm_update': 0.017696704780846312, 'temp_loss': 0.06959708040438549, 'temp': 0.7623058193259769, 'alpha_loss': 8.96012286275451, 'alpha': 0.6974279549386766, 'critic_loss': 345.3375231648049, 'actor_loss': 71.57404850519191, 'time_step': 0.018148715035957202, 'td_error': 63.58752381741975, 'init_value': -112.03946685791016, 'ave_value': -64.22784362905197} step=6498
2022-04-20 19:59.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195712/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:59.23 [info     ] CQL_20220420195712: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.0003526092272752907, 'time_algorithm_update': 0.017711332666943646, 'temp_loss': 0.021179894584487057, 'temp': 0.7606728971004486, 'alpha_loss': 8.764127453865363, 'alpha': 0.6732260895054243, 'critic_loss': 366.04647398831554, 'actor_loss': 73.65749582212571, 'time_step': 0.0181637165839212, 'td_error': 64.08052193947935, 'init_value': -114.61397552490234, 'ave_value': -64.88898914158612} step=6840
2022-04-20 19:59.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195712/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:59.30 [info     ] CQL_20220420195712: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.0003474957761708756, 'time_algorithm_update': 0.017609695244950856, 'temp_loss': -0.034169043637594286, 'temp': 0.7613629695267705, 'alpha_loss': 8.704450869420816, 'alpha': 0.6497030259921537, 'critic_loss': 387.4133570263957, 'actor_loss': 75.67050135205363, 'time_step': 0.01805554565630461, 'td_error': 57.98913509975928, 'init_value': -119.7475814819336, 'ave_value': -67.69034214907133} step=7182
2022-04-20 19:59.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195712/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:59.36 [info     ] CQL_20220420195712: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.0003516346390484369, 'time_algorithm_update': 0.01769793661017167, 'temp_loss': -0.07637694210075495, 'temp': 0.7635038890685254, 'alpha_loss': 8.562389591283965, 'alpha': 0.6283201909901803, 'critic_loss': 402.72978103369996, 'actor_loss': 77.23744105735021, 'time_step': 0.018147478326719407, 'td_error': 63.09812361941203, 'init_value': -119.4392318725586, 'ave_value': -68.95373148619592} step=7524
2022-04-20 19:59.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195712/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:59.43 [info     ] CQL_20220420195712: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.00035459256311606244, 'time_algorithm_update': 0.01776261984953406, 'temp_loss': -0.08189223755739237, 'temp': 0.7670551972431049, 'alpha_loss': 8.266890417065536, 'alpha': 0.6077450177474328, 'critic_loss': 418.8177069056104, 'actor_loss': 78.9446911728173, 'time_step': 0.018217119557118557, 'td_error': 61.80405218800372, 'init_value': -123.55598449707031, 'ave_value': -70.72685509201777} step=7866
2022-04-20 19:59.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195712/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:59.49 [info     ] CQL_20220420195712: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.0003572786063478704, 'time_algorithm_update': 0.017545649879857114, 'temp_loss': -0.10329078373700729, 'temp': 0.7719032616294615, 'alpha_loss': 7.95364398984184, 'alpha': 0.5883232576456684, 'critic_loss': 432.0633344148335, 'actor_loss': 80.18491726033172, 'time_step': 0.01800150341457791, 'td_error': 63.954993264481196, 'init_value': -123.4239501953125, 'ave_value': -70.20359036029862} step=8208
2022-04-20 19:59.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195712/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 19:59.56 [info     ] CQL_20220420195712: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.00034950839148627385, 'time_algorithm_update': 0.017843276436565914, 'temp_loss': -0.08737099537222881, 'temp': 0.7772445378944888, 'alpha_loss': 7.753565088350173, 'alpha': 0.5698493217167101, 'critic_loss': 445.9462778191817, 'actor_loss': 81.60898819862054, 'time_step': 0.01829224859761913, 'td_error': 67.42233749144411, 'init_value': -127.84877014160156, 'ave_value': -72.5575491957796} step=8550
2022-04-20 19:59.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195712/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:00.02 [info     ] CQL_20220420195712: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.0003585885142722325, 'time_algorithm_update': 0.017945380935892027, 'temp_loss': -0.12710365723295694, 'temp': 0.7840632717860373, 'alpha_loss': 7.697515799985294, 'alpha': 0.5517341048396819, 'critic_loss': 457.7517842075281, 'actor_loss': 82.92195232012118, 'time_step': 0.018398711555882505, 'td_error': 69.28863491773929, 'init_value': -130.8719024658203, 'ave_value': -74.23254860713675} step=8892
2022-04-20 20:00.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195712/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:00.09 [info     ] CQL_20220420195712: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.00035244052173101417, 'time_algorithm_update': 0.017983647814968175, 'temp_loss': -0.09814598536042617, 'temp': 0.7910098583377593, 'alpha_loss': 7.505574311429297, 'alpha': 0.534448008091129, 'critic_loss': 471.18354886595966, 'actor_loss': 84.08544076395313, 'time_step': 0.01843764907435367, 'td_error': 66.16150338969838, 'init_value': -128.89697265625, 'ave_value': -75.12580032601281} step=9234
2022-04-20 20:00.09 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195712/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:00.15 [info     ] CQL_20220420195712: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.00035844072263840345, 'time_algorithm_update': 0.018050968995568347, 'temp_loss': -0.09157986848544307, 'temp': 0.7969501095208508, 'alpha_loss': 7.312843138711495, 'alpha': 0.5177380355478031, 'critic_loss': 484.1624656811095, 'actor_loss': 85.20045531423469, 'time_step': 0.018508282321238377, 'td_error': 69.91590480272305, 'init_value': -132.46669006347656, 'ave_value': -75.56704135450694} step=9576
2022-04-20 20:00.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195712/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:00.22 [info     ] CQL_20220420195712: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.000358690295303077, 'time_algorithm_update': 0.01802414690541942, 'temp_loss': -0.09217825931175584, 'temp': 0.8035204035845417, 'alpha_loss': 7.174364534734982, 'alpha': 0.5014337811902253, 'critic_loss': 495.7968066477636, 'actor_loss': 86.34079028570164, 'time_step': 0.018483115218536197, 'td_error': 65.98885840877243, 'init_value': -130.23587036132812, 'ave_value': -75.50111737486748} step=9918
2022-04-20 20:00.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195712/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:00.28 [info     ] CQL_20220420195712: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.0003540083678842288, 'time_algorithm_update': 0.01801730735957274, 'temp_loss': -0.09604471595685558, 'temp': 0.8098816986669574, 'alpha_loss': 7.005663736521849, 'alpha': 0.48574152815411664, 'critic_loss': 507.2609200282404, 'actor_loss': 87.29389141595851, 'time_step': 0.018467592217071713, 'td_error': 69.74485617358323, 'init_value': -133.73199462890625, 'ave_value': -76.82957826885823} step=10260
2022-04-20 20:00.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195712/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:00.35 [info     ] CQL_20220420195712: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.000349829071446469, 'time_algorithm_update': 0.017928202946980793, 'temp_loss': -0.09910082073108843, 'temp': 0.8184630776706495, 'alpha_loss': 6.749155469805176, 'alpha': 0.4705816125660612, 'critic_loss': 515.9593073973182, 'actor_loss': 88.14093896520068, 'time_step': 0.018378132965132507, 'td_error': 67.09427185665099, 'init_value': -137.06141662597656, 'ave_value': -78.59583740937414} step=10602
2022-04-20 20:00.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195712/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:00.41 [info     ] CQL_20220420195712: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.00036007549330505014, 'time_algorithm_update': 0.017991346922534252, 'temp_loss': -0.05504860415270454, 'temp': 0.8236238738598182, 'alpha_loss': 6.554688526175872, 'alpha': 0.456219666534, 'critic_loss': 524.7984164053934, 'actor_loss': 88.93561422336869, 'time_step': 0.018451017943042065, 'td_error': 70.20790650195015, 'init_value': -137.4959716796875, 'ave_value': -78.47929728410668} step=10944
2022-04-20 20:00.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195712/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:00.48 [info     ] CQL_20220420195712: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.00035548140431008145, 'time_algorithm_update': 0.0178690577111049, 'temp_loss': -0.03428294300510172, 'temp': 0.8271280866965913, 'alpha_loss': 6.397865352574845, 'alpha': 0.4421833004170691, 'critic_loss': 534.3858479282312, 'actor_loss': 89.7337583798414, 'time_step': 0.018321841083772, 'td_error': 68.83931000687443, 'init_value': -137.52841186523438, 'ave_value': -79.7854823121603} step=11286
2022-04-20 20:00.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195712/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:00.54 [info     ] CQL_20220420195712: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.00036218431260850694, 'time_algorithm_update': 0.017885967304832058, 'temp_loss': -0.03631956887306177, 'temp': 0.8293722163864047, 'alpha_loss': 6.056983604765775, 'alpha': 0.4286932797111266, 'critic_loss': 543.3601507889597, 'actor_loss': 90.37599293669761, 'time_step': 0.01834822049614979, 'td_error': 68.43072035508526, 'init_value': -139.18991088867188, 'ave_value': -80.18539151471612} step=11628
2022-04-20 20:00.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195712/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:01.01 [info     ] CQL_20220420195712: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.0003567104451140465, 'time_algorithm_update': 0.018105713944686085, 'temp_loss': 0.026714486372248644, 'temp': 0.8301139749281587, 'alpha_loss': 5.968147404709755, 'alpha': 0.41579810856727134, 'critic_loss': 552.0166124488875, 'actor_loss': 91.14427280983729, 'time_step': 0.018564883728473508, 'td_error': 67.58595722924805, 'init_value': -138.20018005371094, 'ave_value': -80.35142418198623} step=11970
2022-04-20 20:01.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195712/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:01.08 [info     ] CQL_20220420195712: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.0003458247547261199, 'time_algorithm_update': 0.017740313072650754, 'temp_loss': 0.03227022636071806, 'temp': 0.8266697051929451, 'alpha_loss': 5.73132394350063, 'alpha': 0.40319765110810596, 'critic_loss': 561.3539970353332, 'actor_loss': 91.7988181532475, 'time_step': 0.01818368030570404, 'td_error': 70.50706294216421, 'init_value': -141.23240661621094, 'ave_value': -82.09976144136743} step=12312
2022-04-20 20:01.08 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195712/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:01.14 [info     ] CQL_20220420195712: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.00034760173998380964, 'time_algorithm_update': 0.017789267657095927, 'temp_loss': 0.02915687001805905, 'temp': 0.8243403309269955, 'alpha_loss': 5.54415494029285, 'alpha': 0.3909794912177917, 'critic_loss': 567.1209461591397, 'actor_loss': 92.31597331532261, 'time_step': 0.018237851516545167, 'td_error': 65.60501762592791, 'init_value': -140.1368865966797, 'ave_value': -81.82880596156056} step=12654
2022-04-20 20:01.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195712/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:01.20 [info     ] CQL_20220420195712: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.0003468997297231217, 'time_algorithm_update': 0.017526646803694163, 'temp_loss': 0.029711668510316758, 'temp': 0.8214465548769075, 'alpha_loss': 5.390211182031018, 'alpha': 0.37927425439246215, 'critic_loss': 573.9898007933856, 'actor_loss': 92.84453268218459, 'time_step': 0.017976731584783186, 'td_error': 65.57952654679026, 'init_value': -140.15908813476562, 'ave_value': -82.44158755133013} step=12996
2022-04-20 20:01.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195712/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:01.27 [info     ] CQL_20220420195712: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.0003563465430722599, 'time_algorithm_update': 0.01769094927269116, 'temp_loss': 0.03273891469147819, 'temp': 0.8199248589270296, 'alpha_loss': 5.192760646691796, 'alpha': 0.3677135129943926, 'critic_loss': 579.482129637958, 'actor_loss': 93.20535911872373, 'time_step': 0.018146363615292555, 'td_error': 68.38033120963385, 'init_value': -140.60171508789062, 'ave_value': -83.25263911387943} step=13338
2022-04-20 20:01.27 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195712/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:01.33 [info     ] CQL_20220420195712: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.0003541178173489041, 'time_algorithm_update': 0.01758601651554219, 'temp_loss': 0.03978766092475046, 'temp': 0.8179283452312849, 'alpha_loss': 5.085842145116706, 'alpha': 0.35659952444291254, 'critic_loss': 583.3181629738613, 'actor_loss': 93.60475682933428, 'time_step': 0.018040169749343603, 'td_error': 70.90598774509783, 'init_value': -143.02427673339844, 'ave_value': -84.38274719115313} step=13680
2022-04-20 20:01.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195712/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:01.40 [info     ] CQL_20220420195712: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.00034890397947434095, 'time_algorithm_update': 0.017681543590032565, 'temp_loss': 0.061981833539903164, 'temp': 0.8133908933714816, 'alpha_loss': 4.914491698058725, 'alpha': 0.34581312746332404, 'critic_loss': 588.8953303286904, 'actor_loss': 94.06958130507441, 'time_step': 0.018129996388976336, 'td_error': 62.87650646127233, 'init_value': -141.85226440429688, 'ave_value': -84.36920126672264} step=14022
2022-04-20 20:01.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195712/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:01.46 [info     ] CQL_20220420195712: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.0003524230934723079, 'time_algorithm_update': 0.01770956962429292, 'temp_loss': 0.03621815973402638, 'temp': 0.8099407264363696, 'alpha_loss': 4.654456482993232, 'alpha': 0.33548728842832887, 'critic_loss': 591.7826745998093, 'actor_loss': 94.24430833783066, 'time_step': 0.01815860034429539, 'td_error': 72.34675542755204, 'init_value': -143.85537719726562, 'ave_value': -85.6164499402704} step=14364
2022-04-20 20:01.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195712/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:01.53 [info     ] CQL_20220420195712: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.00035077786585043744, 'time_algorithm_update': 0.01805615982814142, 'temp_loss': 0.0742084133487783, 'temp': 0.8052417846799594, 'alpha_loss': 4.466623053913228, 'alpha': 0.32560486202700095, 'critic_loss': 594.3761731421041, 'actor_loss': 94.58582377294351, 'time_step': 0.01850415461244639, 'td_error': 66.87486161024951, 'init_value': -145.78468322753906, 'ave_value': -85.8109639457412} step=14706
2022-04-20 20:01.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195712/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:01.59 [info     ] CQL_20220420195712: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.00034464730156792537, 'time_algorithm_update': 0.017502514939559132, 'temp_loss': 0.026332846581282324, 'temp': 0.8012861309692874, 'alpha_loss': 4.379456939753036, 'alpha': 0.3158941222736013, 'critic_loss': 594.5243681076674, 'actor_loss': 94.7108401694493, 'time_step': 0.01794493756099054, 'td_error': 65.6795573885045, 'init_value': -139.79257202148438, 'ave_value': -83.43745349981091} step=15048
2022-04-20 20:01.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195712/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:02.05 [info     ] CQL_20220420195712: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.00032502517365572744, 'time_algorithm_update': 0.016044567900094373, 'temp_loss': 0.07105922194644373, 'temp': 0.7971545449125836, 'alpha_loss': 4.225939278017011, 'alpha': 0.306448220596676, 'critic_loss': 596.7398206922743, 'actor_loss': 94.92018198827554, 'time_step': 0.016457903454875387, 'td_error': 69.31469872458389, 'init_value': -143.0675048828125, 'ave_value': -86.35203935739693} step=15390
2022-04-20 20:02.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195712/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:02.11 [info     ] CQL_20220420195712: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.0003466459742763586, 'time_algorithm_update': 0.017710645993550617, 'temp_loss': 0.03851885753765441, 'temp': 0.7931829338882401, 'alpha_loss': 4.054952257557919, 'alpha': 0.29728545536074724, 'critic_loss': 596.7187612433182, 'actor_loss': 95.05902755469607, 'time_step': 0.01815667138462178, 'td_error': 66.41094922035467, 'init_value': -141.9548797607422, 'ave_value': -84.938844136291} step=15732
2022-04-20 20:02.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195712/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:02.18 [info     ] CQL_20220420195712: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.00034675472661068564, 'time_algorithm_update': 0.01782762795163874, 'temp_loss': 0.059160632871047794, 'temp': 0.7893907443473214, 'alpha_loss': 3.8975522434502317, 'alpha': 0.2885217562927837, 'critic_loss': 593.7949944211725, 'actor_loss': 95.05683182833488, 'time_step': 0.018270258317913925, 'td_error': 69.54226168388172, 'init_value': -140.97116088867188, 'ave_value': -85.04900451136065} step=16074
2022-04-20 20:02.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195712/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:02.24 [info     ] CQL_20220420195712: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.0003505756980494449, 'time_algorithm_update': 0.017778354081494068, 'temp_loss': 0.0726455623196841, 'temp': 0.784875835765872, 'alpha_loss': 3.7386608799995735, 'alpha': 0.27996267848893214, 'critic_loss': 593.0583373844972, 'actor_loss': 95.1386390150639, 'time_step': 0.018227612065990068, 'td_error': 61.010737745363734, 'init_value': -138.50009155273438, 'ave_value': -84.38315100273705} step=16416
2022-04-20 20:02.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195712/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:02.31 [info     ] CQL_20220420195712: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.00034491639388234993, 'time_algorithm_update': 0.017790415830779494, 'temp_loss': 0.03807425666709392, 'temp': 0.7803935188996164, 'alpha_loss': 3.620038922767193, 'alpha': 0.2716708591981241, 'critic_loss': 588.8467415257504, 'actor_loss': 94.99693287185758, 'time_step': 0.018234055641798946, 'td_error': 64.58623179131536, 'init_value': -143.09896850585938, 'ave_value': -86.07477674670719} step=16758
2022-04-20 20:02.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195712/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:02.37 [info     ] CQL_20220420195712: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.00035233804356982136, 'time_algorithm_update': 0.01752915368442647, 'temp_loss': 0.044643939630678524, 'temp': 0.777796513155887, 'alpha_loss': 3.4625432459931624, 'alpha': 0.26364561218267296, 'critic_loss': 586.1715187831231, 'actor_loss': 95.02080964205558, 'time_step': 0.017979672777722455, 'td_error': 64.73376271922817, 'init_value': -140.13868713378906, 'ave_value': -85.28280449465723} step=17100
2022-04-20 20:02.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420195712/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.5

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 20:02.38 [info     ] FQE_20220420200237: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00014665902379047438, 'time_algorithm_update': 0.0020681835082640133, 'loss': 0.007904350343833577, 'time_step': 0.0022792528910809255, 'init_value': -0.3093109130859375, 'ave_value': -0.24642559910478357, 'soft_opc': nan} step=166




2022-04-20 20:02.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200237/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:02.38 [info     ] FQE_20220420200237: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00014366442898669876, 'time_algorithm_update': 0.0019839772258896425, 'loss': 0.006492890449835503, 'time_step': 0.0021941949086016917, 'init_value': -0.4475920498371124, 'ave_value': -0.3252449850845444, 'soft_opc': nan} step=332




2022-04-20 20:02.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200237/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:02.39 [info     ] FQE_20220420200237: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00014600696333919662, 'time_algorithm_update': 0.002078821860164045, 'loss': 0.0060305439120899125, 'time_step': 0.002289372754384236, 'init_value': -0.5302911996841431, 'ave_value': -0.37191912124420073, 'soft_opc': nan} step=498




2022-04-20 20:02.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200237/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:02.39 [info     ] FQE_20220420200237: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00014557321387601187, 'time_algorithm_update': 0.002003307802131377, 'loss': 0.006133067366913949, 'time_step': 0.0022144676691078276, 'init_value': -0.631738543510437, 'ave_value': -0.4346703832390139, 'soft_opc': nan} step=664




2022-04-20 20:02.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200237/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:02.39 [info     ] FQE_20220420200237: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00014390428382230093, 'time_algorithm_update': 0.002091874559241605, 'loss': 0.005744208751459527, 'time_step': 0.0023039579391479492, 'init_value': -0.6452368497848511, 'ave_value': -0.4250201507723211, 'soft_opc': nan} step=830




2022-04-20 20:02.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200237/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:02.40 [info     ] FQE_20220420200237: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.0001472521977252271, 'time_algorithm_update': 0.0020438188529876343, 'loss': 0.005514131415725113, 'time_step': 0.0022564178489776977, 'init_value': -0.6967365741729736, 'ave_value': -0.4613499106537249, 'soft_opc': nan} step=996




2022-04-20 20:02.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200237/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:02.40 [info     ] FQE_20220420200237: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00014161345470382506, 'time_algorithm_update': 0.0020061171198465736, 'loss': 0.0053298382035243405, 'time_step': 0.0022132798849818216, 'init_value': -0.7340487241744995, 'ave_value': -0.4615804780039701, 'soft_opc': nan} step=1162




2022-04-20 20:02.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200237/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:02.41 [info     ] FQE_20220420200237: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00014449889401355422, 'time_algorithm_update': 0.001987332321075072, 'loss': 0.004962054110715637, 'time_step': 0.0022027061646243177, 'init_value': -0.8005478382110596, 'ave_value': -0.4856530587063045, 'soft_opc': nan} step=1328




2022-04-20 20:02.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200237/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:02.41 [info     ] FQE_20220420200237: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00014266910323177474, 'time_algorithm_update': 0.0020207439560488046, 'loss': 0.004780861330976014, 'time_step': 0.0022314743823315724, 'init_value': -0.8299866318702698, 'ave_value': -0.49649691389672257, 'soft_opc': nan} step=1494




2022-04-20 20:02.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200237/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:02.42 [info     ] FQE_20220420200237: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00014530463391039744, 'time_algorithm_update': 0.0020509915179516896, 'loss': 0.0047925239420462265, 'time_step': 0.0022633190614631377, 'init_value': -0.9310001730918884, 'ave_value': -0.5456967658816358, 'soft_opc': nan} step=1660




2022-04-20 20:02.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200237/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:02.42 [info     ] FQE_20220420200237: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00014274522482630718, 'time_algorithm_update': 0.0020284652709960938, 'loss': 0.004779032960320902, 'time_step': 0.0022372711135680416, 'init_value': -0.973197340965271, 'ave_value': -0.5625314521762702, 'soft_opc': nan} step=1826




2022-04-20 20:02.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200237/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:02.42 [info     ] FQE_20220420200237: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00014546262212546476, 'time_algorithm_update': 0.002063968095434717, 'loss': 0.004820449516900916, 'time_step': 0.0022766073066067985, 'init_value': -1.0139445066452026, 'ave_value': -0.5791566954950828, 'soft_opc': nan} step=1992




2022-04-20 20:02.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200237/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:02.43 [info     ] FQE_20220420200237: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00014274378856980657, 'time_algorithm_update': 0.001969607479601021, 'loss': 0.004996283677636634, 'time_step': 0.0021807357489344583, 'init_value': -1.1324273347854614, 'ave_value': -0.6713387606766176, 'soft_opc': nan} step=2158




2022-04-20 20:02.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200237/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:02.43 [info     ] FQE_20220420200237: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.0001525505479559841, 'time_algorithm_update': 0.002063613340079066, 'loss': 0.0052140032471415685, 'time_step': 0.0022908779511968775, 'init_value': -1.1844364404678345, 'ave_value': -0.7102391534929609, 'soft_opc': nan} step=2324




2022-04-20 20:02.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200237/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:02.44 [info     ] FQE_20220420200237: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.0001412414642701666, 'time_algorithm_update': 0.0019682631435164488, 'loss': 0.005311176779747279, 'time_step': 0.0021742165806781814, 'init_value': -1.2095038890838623, 'ave_value': -0.699801503924804, 'soft_opc': nan} step=2490




2022-04-20 20:02.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200237/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:02.44 [info     ] FQE_20220420200237: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00014419728014842574, 'time_algorithm_update': 0.002016238419406385, 'loss': 0.005600834303571989, 'time_step': 0.0022246593452361694, 'init_value': -1.29331636428833, 'ave_value': -0.7685947945760983, 'soft_opc': nan} step=2656




2022-04-20 20:02.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200237/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:02.44 [info     ] FQE_20220420200237: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00014285007155085183, 'time_algorithm_update': 0.0019992733576211586, 'loss': 0.006131108294813389, 'time_step': 0.0022084741707307748, 'init_value': -1.350287675857544, 'ave_value': -0.7726597080762322, 'soft_opc': nan} step=2822




2022-04-20 20:02.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200237/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:02.45 [info     ] FQE_20220420200237: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00014562779162303512, 'time_algorithm_update': 0.002041747771113752, 'loss': 0.006023269252576697, 'time_step': 0.0022591309375073537, 'init_value': -1.3975917100906372, 'ave_value': -0.8230334976788711, 'soft_opc': nan} step=2988




2022-04-20 20:02.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200237/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:02.45 [info     ] FQE_20220420200237: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.0001426489956407662, 'time_algorithm_update': 0.0020423926502825267, 'loss': 0.006515779625063381, 'time_step': 0.0022520860993718527, 'init_value': -1.4473989009857178, 'ave_value': -0.8515612398819612, 'soft_opc': nan} step=3154




2022-04-20 20:02.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200237/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:02.46 [info     ] FQE_20220420200237: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.0001429893884314112, 'time_algorithm_update': 0.0020144660788846305, 'loss': 0.006541014580948122, 'time_step': 0.0022296359740107893, 'init_value': -1.5396018028259277, 'ave_value': -0.8901713305681541, 'soft_opc': nan} step=3320




2022-04-20 20:02.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200237/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:02.46 [info     ] FQE_20220420200237: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00014163212603833302, 'time_algorithm_update': 0.0019529727568109352, 'loss': 0.006912080673444791, 'time_step': 0.0021596500672489763, 'init_value': -1.579329490661621, 'ave_value': -0.9188760546883484, 'soft_opc': nan} step=3486




2022-04-20 20:02.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200237/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:02.46 [info     ] FQE_20220420200237: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.0001448263604956937, 'time_algorithm_update': 0.0020179533096681157, 'loss': 0.00751785729964073, 'time_step': 0.00223182052014822, 'init_value': -1.6589443683624268, 'ave_value': -0.9978533691248378, 'soft_opc': nan} step=3652




2022-04-20 20:02.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200237/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:02.47 [info     ] FQE_20220420200237: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00014677105179752213, 'time_algorithm_update': 0.002016068941139313, 'loss': 0.007821938654138562, 'time_step': 0.0022345522800123834, 'init_value': -1.7478611469268799, 'ave_value': -1.0655087015952345, 'soft_opc': nan} step=3818




2022-04-20 20:02.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200237/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:02.47 [info     ] FQE_20220420200237: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00014541522566094455, 'time_algorithm_update': 0.00201793894710311, 'loss': 0.007904982023889267, 'time_step': 0.002231042069124888, 'init_value': -1.7021737098693848, 'ave_value': -1.0224008329548278, 'soft_opc': nan} step=3984




2022-04-20 20:02.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200237/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:02.48 [info     ] FQE_20220420200237: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00014068706926093045, 'time_algorithm_update': 0.002024302999657321, 'loss': 0.008353446611674526, 'time_step': 0.0022338743669440947, 'init_value': -1.7627761363983154, 'ave_value': -1.0823801769933723, 'soft_opc': nan} step=4150




2022-04-20 20:02.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200237/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:02.48 [info     ] FQE_20220420200237: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00014359405241816877, 'time_algorithm_update': 0.001998250742992723, 'loss': 0.008741214184135378, 'time_step': 0.0022114199328135296, 'init_value': -1.8089120388031006, 'ave_value': -1.122600112328524, 'soft_opc': nan} step=4316




2022-04-20 20:02.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200237/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:02.49 [info     ] FQE_20220420200237: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00014357107231415897, 'time_algorithm_update': 0.0020738653389804334, 'loss': 0.009329216243385011, 'time_step': 0.002286761640066124, 'init_value': -1.8707150220870972, 'ave_value': -1.1620350182593406, 'soft_opc': nan} step=4482




2022-04-20 20:02.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200237/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:02.49 [info     ] FQE_20220420200237: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.0001464751829583961, 'time_algorithm_update': 0.0020508651273796357, 'loss': 0.01009194481295971, 'time_step': 0.002265622816890119, 'init_value': -1.8759236335754395, 'ave_value': -1.1630693876360718, 'soft_opc': nan} step=4648




2022-04-20 20:02.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200237/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:02.49 [info     ] FQE_20220420200237: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00014463677463761294, 'time_algorithm_update': 0.002003204391663333, 'loss': 0.01033524309105453, 'time_step': 0.0022159743021769695, 'init_value': -1.953129529953003, 'ave_value': -1.25125733010538, 'soft_opc': nan} step=4814




2022-04-20 20:02.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200237/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:02.50 [info     ] FQE_20220420200237: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.0001432220619845103, 'time_algorithm_update': 0.002000647855092244, 'loss': 0.010806799300312322, 'time_step': 0.0022147563566644506, 'init_value': -1.972998857498169, 'ave_value': -1.2429663748891504, 'soft_opc': nan} step=4980




2022-04-20 20:02.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200237/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:02.50 [info     ] FQE_20220420200237: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00014721485505621117, 'time_algorithm_update': 0.0020091591111148694, 'loss': 0.009088018960291407, 'time_step': 0.002228689480976886, 'init_value': -2.0142152309417725, 'ave_value': -1.2959432917470868, 'soft_opc': nan} step=5146




2022-04-20 20:02.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200237/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:02.51 [info     ] FQE_20220420200237: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00014834231640919145, 'time_algorithm_update': 0.0020526762468269072, 'loss': 0.011646427286379263, 'time_step': 0.002271538757416139, 'init_value': -1.979621171951294, 'ave_value': -1.2847911568576689, 'soft_opc': nan} step=5312




2022-04-20 20:02.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200237/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:02.51 [info     ] FQE_20220420200237: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00014408525214137802, 'time_algorithm_update': 0.002004530056413398, 'loss': 0.0120443190140818, 'time_step': 0.0022165373147252096, 'init_value': -1.997749924659729, 'ave_value': -1.2959625446212453, 'soft_opc': nan} step=5478




2022-04-20 20:02.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200237/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:02.51 [info     ] FQE_20220420200237: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00014342170163809536, 'time_algorithm_update': 0.001997558467359428, 'loss': 0.012591596782566553, 'time_step': 0.0022061402539172805, 'init_value': -1.9700965881347656, 'ave_value': -1.2720409312830852, 'soft_opc': nan} step=5644




2022-04-20 20:02.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200237/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:02.52 [info     ] FQE_20220420200237: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00014645507536738752, 'time_algorithm_update': 0.002019064972199589, 'loss': 0.01228447932278715, 'time_step': 0.0022337048886770226, 'init_value': -2.1230690479278564, 'ave_value': -1.3878269123064506, 'soft_opc': nan} step=5810




2022-04-20 20:02.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200237/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:02.52 [info     ] FQE_20220420200237: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00014272942600480044, 'time_algorithm_update': 0.002045224948101733, 'loss': 0.013354198931413422, 'time_step': 0.0022554397583007812, 'init_value': -2.1266870498657227, 'ave_value': -1.3990739043365727, 'soft_opc': nan} step=5976




2022-04-20 20:02.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200237/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:02.53 [info     ] FQE_20220420200237: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00014576710850359444, 'time_algorithm_update': 0.0021163555512945338, 'loss': 0.013656514376284084, 'time_step': 0.0023334529026445136, 'init_value': -2.1001460552215576, 'ave_value': -1.375552526571125, 'soft_opc': nan} step=6142




2022-04-20 20:02.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200237/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:02.53 [info     ] FQE_20220420200237: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00013407885310161546, 'time_algorithm_update': 0.0018679334456662097, 'loss': 0.014537018471318915, 'time_step': 0.002061537949435682, 'init_value': -2.0904502868652344, 'ave_value': -1.3939711395785346, 'soft_opc': nan} step=6308




2022-04-20 20:02.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200237/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:02.54 [info     ] FQE_20220420200237: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00013433163424572312, 'time_algorithm_update': 0.0018332909388714526, 'loss': 0.014903766076439565, 'time_step': 0.002029571188501565, 'init_value': -2.1863203048706055, 'ave_value': -1.4411676782963647, 'soft_opc': nan} step=6474




2022-04-20 20:02.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200237/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:02.54 [info     ] FQE_20220420200237: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.0001314734838095056, 'time_algorithm_update': 0.0018294762416058276, 'loss': 0.015741654064405305, 'time_step': 0.0020243891750473574, 'init_value': -2.241997241973877, 'ave_value': -1.525547314079495, 'soft_opc': nan} step=6640




2022-04-20 20:02.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200237/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:02.54 [info     ] FQE_20220420200237: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00013225480734583843, 'time_algorithm_update': 0.0018308076513818949, 'loss': 0.016833595993945443, 'time_step': 0.0020212021218724997, 'init_value': -2.286477565765381, 'ave_value': -1.598773065132198, 'soft_opc': nan} step=6806




2022-04-20 20:02.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200237/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:02.55 [info     ] FQE_20220420200237: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00012924584997705667, 'time_algorithm_update': 0.0018120171075843904, 'loss': 0.01615038984988154, 'time_step': 0.002001818404140243, 'init_value': -2.2995166778564453, 'ave_value': -1.5689390398830443, 'soft_opc': nan} step=6972




2022-04-20 20:02.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200237/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:02.55 [info     ] FQE_20220420200237: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.0001255546707704843, 'time_algorithm_update': 0.0017399443201271884, 'loss': 0.017762354936966873, 'time_step': 0.0019233557115118187, 'init_value': -2.358675956726074, 'ave_value': -1.6451209916120715, 'soft_opc': nan} step=7138




2022-04-20 20:02.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200237/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:02.55 [info     ] FQE_20220420200237: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00013034889496952654, 'time_algorithm_update': 0.0018367422632424228, 'loss': 0.0186404888181532, 'time_step': 0.002027062048394996, 'init_value': -2.420957088470459, 'ave_value': -1.6972648488139515, 'soft_opc': nan} step=7304




2022-04-20 20:02.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200237/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:02.56 [info     ] FQE_20220420200237: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00013557255986225173, 'time_algorithm_update': 0.0018740605158978198, 'loss': 0.019449469106025005, 'time_step': 0.002071662121508495, 'init_value': -2.360962390899658, 'ave_value': -1.634038788184255, 'soft_opc': nan} step=7470




2022-04-20 20:02.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200237/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:02.56 [info     ] FQE_20220420200237: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00012884800692638718, 'time_algorithm_update': 0.0017793465809649732, 'loss': 0.019446234863969576, 'time_step': 0.001967876790517784, 'init_value': -2.3865387439727783, 'ave_value': -1.6509518596050037, 'soft_opc': nan} step=7636




2022-04-20 20:02.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200237/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:02.56 [info     ] FQE_20220420200237: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00011070377855415803, 'time_algorithm_update': 0.0014832737934158508, 'loss': 0.019461233459878713, 'time_step': 0.001645747437534562, 'init_value': -2.276772975921631, 'ave_value': -1.5548752258162577, 'soft_opc': nan} step=7802




2022-04-20 20:02.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200237/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:02.57 [info     ] FQE_20220420200237: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00011689260781529438, 'time_algorithm_update': 0.001648951725787427, 'loss': 0.020737956844674176, 'time_step': 0.0018210123820477221, 'init_value': -2.374878168106079, 'ave_value': -1.6156972447745837, 'soft_opc': nan} step=7968




2022-04-20 20:02.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200237/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:02.57 [info     ] FQE_20220420200237: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00013009324131241763, 'time_algorithm_update': 0.0018587083701627801, 'loss': 0.021126813440392894, 'time_step': 0.00204676748758339, 'init_value': -2.4111642837524414, 'ave_value': -1.6632410686152677, 'soft_opc': nan} step=8134




2022-04-20 20:02.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200237/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:02.58 [info     ] FQE_20220420200237: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00013105840568082878, 'time_algorithm_update': 0.0018917293433683465, 'loss': 0.021761083211020445, 'time_step': 0.0020919521170926383, 'init_value': -2.457563638687134, 'ave_value': -1.700076852359616, 'soft_opc': nan} step=8300




2022-04-20 20:02.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200237/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

2022-04-20 20:02.58 [debug    ] RoundIterator is selected.
2022-04-20 20:02.58 [info     ] Directory is created at d3rlpy_logs/FQE_20220420200258
2022-04-20 20:02.58 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 20:02.58 [debug    ] Building models...
2022-04-20 20:02.58 [debug    ] Models have been built.
2022-04-20 20:02.58 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420200258/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}},

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 20:02.59 [info     ] FQE_20220420200258: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00013152388639228288, 'time_algorithm_update': 0.0017693853655526804, 'loss': 0.025370232216732274, 'time_step': 0.0019619534181994063, 'init_value': -1.0919060707092285, 'ave_value': -1.0767418634716992, 'soft_opc': nan} step=344




2022-04-20 20:02.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200258/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:02.59 [info     ] FQE_20220420200258: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00014084646868151287, 'time_algorithm_update': 0.0018870483997256258, 'loss': 0.024125320173605063, 'time_step': 0.0020902849907098813, 'init_value': -1.9220530986785889, 'ave_value': -1.8422399461403622, 'soft_opc': nan} step=688




2022-04-20 20:02.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200258/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:03.00 [info     ] FQE_20220420200258: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00014822011770204057, 'time_algorithm_update': 0.0020409628402355104, 'loss': 0.027513647332874147, 'time_step': 0.002259802679682887, 'init_value': -2.963343620300293, 'ave_value': -2.7494776654954967, 'soft_opc': nan} step=1032




2022-04-20 20:03.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200258/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:03.01 [info     ] FQE_20220420200258: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.0001500824163126391, 'time_algorithm_update': 0.00199402556862942, 'loss': 0.03003125441758785, 'time_step': 0.0022136236346045205, 'init_value': -3.6107430458068848, 'ave_value': -3.303933240943127, 'soft_opc': nan} step=1376




2022-04-20 20:03.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200258/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:03.02 [info     ] FQE_20220420200258: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00014982043310653333, 'time_algorithm_update': 0.0020400978798090024, 'loss': 0.037357001714300106, 'time_step': 0.002259969018226446, 'init_value': -4.550937652587891, 'ave_value': -4.150891047296626, 'soft_opc': nan} step=1720




2022-04-20 20:03.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200258/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:03.03 [info     ] FQE_20220420200258: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00015315898629122003, 'time_algorithm_update': 0.0020249257254046065, 'loss': 0.046466963671602655, 'time_step': 0.0022463029207185257, 'init_value': -5.030458450317383, 'ave_value': -4.60196406152435, 'soft_opc': nan} step=2064




2022-04-20 20:03.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200258/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:03.04 [info     ] FQE_20220420200258: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00015022934869278308, 'time_algorithm_update': 0.002017501481743746, 'loss': 0.058372438235511616, 'time_step': 0.002237978369690651, 'init_value': -5.700034141540527, 'ave_value': -5.288398464854706, 'soft_opc': nan} step=2408




2022-04-20 20:03.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200258/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:03.04 [info     ] FQE_20220420200258: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00015335096869357798, 'time_algorithm_update': 0.0020042020221089207, 'loss': 0.07056304804850803, 'time_step': 0.002227894095487373, 'init_value': -6.008436679840088, 'ave_value': -5.606406548466148, 'soft_opc': nan} step=2752




2022-04-20 20:03.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200258/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:03.05 [info     ] FQE_20220420200258: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.0001513244107712147, 'time_algorithm_update': 0.001986552116482757, 'loss': 0.08547632486646085, 'time_step': 0.002208490704381189, 'init_value': -6.549352169036865, 'ave_value': -6.251391417262153, 'soft_opc': nan} step=3096




2022-04-20 20:03.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200258/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:03.06 [info     ] FQE_20220420200258: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00015265719835148302, 'time_algorithm_update': 0.0020113317079322283, 'loss': 0.10326676016002027, 'time_step': 0.002230654622233191, 'init_value': -6.922746658325195, 'ave_value': -6.7946887297220195, 'soft_opc': nan} step=3440




2022-04-20 20:03.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200258/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:03.07 [info     ] FQE_20220420200258: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.0001509286636529967, 'time_algorithm_update': 0.0020442556503207183, 'loss': 0.12015272643373803, 'time_step': 0.0022644261981165687, 'init_value': -7.13047981262207, 'ave_value': -7.156289080329038, 'soft_opc': nan} step=3784




2022-04-20 20:03.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200258/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:03.08 [info     ] FQE_20220420200258: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.0001543316730233126, 'time_algorithm_update': 0.002009929612625477, 'loss': 0.14101912559898094, 'time_step': 0.002233467822851137, 'init_value': -7.282430648803711, 'ave_value': -7.501163772809076, 'soft_opc': nan} step=4128




2022-04-20 20:03.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200258/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:03.09 [info     ] FQE_20220420200258: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00014899775039317996, 'time_algorithm_update': 0.0020285075487092483, 'loss': 0.1624542472039371, 'time_step': 0.0022474298643511397, 'init_value': -7.590660095214844, 'ave_value': -7.952603398898599, 'soft_opc': nan} step=4472




2022-04-20 20:03.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200258/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:03.09 [info     ] FQE_20220420200258: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00015445088231286338, 'time_algorithm_update': 0.002022818077442258, 'loss': 0.1822506692206357, 'time_step': 0.0022466057954832566, 'init_value': -7.820716381072998, 'ave_value': -8.360552383145436, 'soft_opc': nan} step=4816




2022-04-20 20:03.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200258/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:03.10 [info     ] FQE_20220420200258: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00015293373618015024, 'time_algorithm_update': 0.0020708441734313965, 'loss': 0.20446739774630512, 'time_step': 0.002293996339620546, 'init_value': -7.9410552978515625, 'ave_value': -8.56551810165003, 'soft_opc': nan} step=5160




2022-04-20 20:03.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200258/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:03.11 [info     ] FQE_20220420200258: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00015160372090894123, 'time_algorithm_update': 0.002048740553301434, 'loss': 0.22360085054344042, 'time_step': 0.002271845590236575, 'init_value': -8.468379974365234, 'ave_value': -9.176567414596358, 'soft_opc': nan} step=5504




2022-04-20 20:03.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200258/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:03.12 [info     ] FQE_20220420200258: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00015194679415503213, 'time_algorithm_update': 0.002029182605965193, 'loss': 0.2477611267484378, 'time_step': 0.0022503484127133393, 'init_value': -8.525827407836914, 'ave_value': -9.350993885126737, 'soft_opc': nan} step=5848




2022-04-20 20:03.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200258/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:03.13 [info     ] FQE_20220420200258: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00015260521755662074, 'time_algorithm_update': 0.0020205711209496787, 'loss': 0.2623867586330879, 'time_step': 0.002245474000309789, 'init_value': -8.811552047729492, 'ave_value': -9.595475810855577, 'soft_opc': nan} step=6192




2022-04-20 20:03.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200258/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:03.14 [info     ] FQE_20220420200258: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.0001519502595413563, 'time_algorithm_update': 0.0020577727362167003, 'loss': 0.2907527711697269, 'time_step': 0.0022790695345678994, 'init_value': -9.130102157592773, 'ave_value': -9.90828772296978, 'soft_opc': nan} step=6536




2022-04-20 20:03.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200258/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:03.15 [info     ] FQE_20220420200258: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.0001531215601189192, 'time_algorithm_update': 0.0020415554212969405, 'loss': 0.3110497215773564, 'time_step': 0.0022672179133393046, 'init_value': -9.3563232421875, 'ave_value': -10.008686351125036, 'soft_opc': nan} step=6880




2022-04-20 20:03.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200258/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:03.15 [info     ] FQE_20220420200258: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00014887161033098087, 'time_algorithm_update': 0.0020140721354373666, 'loss': 0.3312162908097339, 'time_step': 0.0022333499997161154, 'init_value': -9.992162704467773, 'ave_value': -10.58240134928644, 'soft_opc': nan} step=7224




2022-04-20 20:03.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200258/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:03.16 [info     ] FQE_20220420200258: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00014965755994929823, 'time_algorithm_update': 0.0020102262496948242, 'loss': 0.3527708808826499, 'time_step': 0.0022318543389786122, 'init_value': -10.188509941101074, 'ave_value': -10.839869087120636, 'soft_opc': nan} step=7568




2022-04-20 20:03.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200258/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:03.17 [info     ] FQE_20220420200258: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.0001498751862104549, 'time_algorithm_update': 0.0019772468611251475, 'loss': 0.36720243438558525, 'time_step': 0.0021963861099509306, 'init_value': -10.246720314025879, 'ave_value': -10.934081885419987, 'soft_opc': nan} step=7912




2022-04-20 20:03.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200258/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:03.18 [info     ] FQE_20220420200258: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.0001530737377876459, 'time_algorithm_update': 0.0020142079785812734, 'loss': 0.3892808447288739, 'time_step': 0.0022377129210982213, 'init_value': -10.859602928161621, 'ave_value': -11.338980792222863, 'soft_opc': nan} step=8256




2022-04-20 20:03.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200258/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:03.19 [info     ] FQE_20220420200258: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00015258650447047033, 'time_algorithm_update': 0.0020346551440482918, 'loss': 0.40521245409712886, 'time_step': 0.0022608312063438947, 'init_value': -11.26217269897461, 'ave_value': -11.586551030054018, 'soft_opc': nan} step=8600




2022-04-20 20:03.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200258/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:03.20 [info     ] FQE_20220420200258: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00015229056047838787, 'time_algorithm_update': 0.0019963161889896834, 'loss': 0.43807119463548755, 'time_step': 0.0022202023240022882, 'init_value': -11.576546669006348, 'ave_value': -11.648863540092265, 'soft_opc': nan} step=8944




2022-04-20 20:03.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200258/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:03.20 [info     ] FQE_20220420200258: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00015226006507873535, 'time_algorithm_update': 0.002023793237153874, 'loss': 0.4496787867920343, 'time_step': 0.002248383538667546, 'init_value': -11.904529571533203, 'ave_value': -12.129159001380138, 'soft_opc': nan} step=9288




2022-04-20 20:03.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200258/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:03.21 [info     ] FQE_20220420200258: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00015314789705498275, 'time_algorithm_update': 0.00204092402790868, 'loss': 0.48174675047224347, 'time_step': 0.0022668831570203914, 'init_value': -13.330324172973633, 'ave_value': -13.290316867294747, 'soft_opc': nan} step=9632




2022-04-20 20:03.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200258/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:03.22 [info     ] FQE_20220420200258: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00015207570652629054, 'time_algorithm_update': 0.0020051785679750665, 'loss': 0.5067985628644914, 'time_step': 0.0022270845812420513, 'init_value': -13.325911521911621, 'ave_value': -13.17875060822714, 'soft_opc': nan} step=9976




2022-04-20 20:03.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200258/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:03.23 [info     ] FQE_20220420200258: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00014893329420755076, 'time_algorithm_update': 0.0019786593525908712, 'loss': 0.5156390235521072, 'time_step': 0.002197371665821519, 'init_value': -13.879670143127441, 'ave_value': -13.659974314462024, 'soft_opc': nan} step=10320




2022-04-20 20:03.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200258/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:03.24 [info     ] FQE_20220420200258: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.0001505329165347787, 'time_algorithm_update': 0.002021801333094752, 'loss': 0.5424615245658991, 'time_step': 0.0022419836632041044, 'init_value': -14.319328308105469, 'ave_value': -13.9402408971468, 'soft_opc': nan} step=10664




2022-04-20 20:03.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200258/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:03.25 [info     ] FQE_20220420200258: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.0001518033271612123, 'time_algorithm_update': 0.0020784569341082905, 'loss': 0.5625038552964323, 'time_step': 0.002299988685652267, 'init_value': -14.548227310180664, 'ave_value': -14.03801991859837, 'soft_opc': nan} step=11008




2022-04-20 20:03.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200258/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:03.26 [info     ] FQE_20220420200258: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.0001504691534264143, 'time_algorithm_update': 0.002034943464190461, 'loss': 0.5702010589911667, 'time_step': 0.002255942932395048, 'init_value': -14.63565444946289, 'ave_value': -14.016964403407329, 'soft_opc': nan} step=11352




2022-04-20 20:03.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200258/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:03.26 [info     ] FQE_20220420200258: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00014954320220060126, 'time_algorithm_update': 0.0020098422848901084, 'loss': 0.5880001101339626, 'time_step': 0.0022300862988760303, 'init_value': -14.364066123962402, 'ave_value': -13.861550473256523, 'soft_opc': nan} step=11696




2022-04-20 20:03.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200258/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:03.27 [info     ] FQE_20220420200258: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.0001522060050520786, 'time_algorithm_update': 0.0020626485347747803, 'loss': 0.5957677526572762, 'time_step': 0.0022846786088721698, 'init_value': -14.947458267211914, 'ave_value': -14.370537084649811, 'soft_opc': nan} step=12040




2022-04-20 20:03.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200258/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:03.28 [info     ] FQE_20220420200258: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.0001498266708019168, 'time_algorithm_update': 0.00202348273853923, 'loss': 0.6089283352132974, 'time_step': 0.002242740503577299, 'init_value': -15.257271766662598, 'ave_value': -14.713275872701193, 'soft_opc': nan} step=12384




2022-04-20 20:03.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200258/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:03.29 [info     ] FQE_20220420200258: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00015169174172157464, 'time_algorithm_update': 0.0020290412182031674, 'loss': 0.6428730979041998, 'time_step': 0.0022491057251774987, 'init_value': -15.420193672180176, 'ave_value': -14.885834697985535, 'soft_opc': nan} step=12728




2022-04-20 20:03.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200258/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:03.30 [info     ] FQE_20220420200258: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.0001486650733060615, 'time_algorithm_update': 0.001992614463318226, 'loss': 0.6533073268040235, 'time_step': 0.002208272385042767, 'init_value': -15.78715991973877, 'ave_value': -15.19636106796999, 'soft_opc': nan} step=13072




2022-04-20 20:03.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200258/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:03.31 [info     ] FQE_20220420200258: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.0001495529052823089, 'time_algorithm_update': 0.002040499864622604, 'loss': 0.6631778386139939, 'time_step': 0.0022611909134443416, 'init_value': -15.954399108886719, 'ave_value': -15.471376018665799, 'soft_opc': nan} step=13416




2022-04-20 20:03.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200258/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:03.31 [info     ] FQE_20220420200258: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00014818823614785837, 'time_algorithm_update': 0.002002088136451189, 'loss': 0.6811106908026823, 'time_step': 0.0022194434043972993, 'init_value': -16.29098892211914, 'ave_value': -15.759568154563507, 'soft_opc': nan} step=13760




2022-04-20 20:03.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200258/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:03.32 [info     ] FQE_20220420200258: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00015490969946218091, 'time_algorithm_update': 0.0020603988059731417, 'loss': 0.6920774714956277, 'time_step': 0.0022862622904223067, 'init_value': -16.85599136352539, 'ave_value': -16.127644896780787, 'soft_opc': nan} step=14104




2022-04-20 20:03.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200258/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:03.33 [info     ] FQE_20220420200258: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00014902755271556766, 'time_algorithm_update': 0.0019962579704994377, 'loss': 0.7189568172880384, 'time_step': 0.0022157971249070277, 'init_value': -16.727096557617188, 'ave_value': -15.772315979881531, 'soft_opc': nan} step=14448




2022-04-20 20:03.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200258/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:03.34 [info     ] FQE_20220420200258: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00015189204105111055, 'time_algorithm_update': 0.0020561918269756227, 'loss': 0.7216537513791822, 'time_step': 0.0022771025812903115, 'init_value': -16.89433479309082, 'ave_value': -16.170200795433608, 'soft_opc': nan} step=14792




2022-04-20 20:03.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200258/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:03.35 [info     ] FQE_20220420200258: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00014973379844842956, 'time_algorithm_update': 0.0020520000956779304, 'loss': 0.7369207022819928, 'time_step': 0.0022727549076080322, 'init_value': -17.525293350219727, 'ave_value': -16.94372173137963, 'soft_opc': nan} step=15136




2022-04-20 20:03.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200258/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:03.36 [info     ] FQE_20220420200258: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00014962567839511606, 'time_algorithm_update': 0.0020327055177023243, 'loss': 0.7678270977542757, 'time_step': 0.002253907364468242, 'init_value': -17.74864387512207, 'ave_value': -16.96828320892276, 'soft_opc': nan} step=15480




2022-04-20 20:03.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200258/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:03.36 [info     ] FQE_20220420200258: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00015274937762770543, 'time_algorithm_update': 0.002122786155966825, 'loss': 0.7731819551568045, 'time_step': 0.0023495970770370128, 'init_value': -17.736743927001953, 'ave_value': -17.12903552974851, 'soft_opc': nan} step=15824




2022-04-20 20:03.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200258/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:03.37 [info     ] FQE_20220420200258: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00015193917030511902, 'time_algorithm_update': 0.002067267201667608, 'loss': 0.7744151982262211, 'time_step': 0.0022918997808944346, 'init_value': -18.215730667114258, 'ave_value': -17.664133794827237, 'soft_opc': nan} step=16168




2022-04-20 20:03.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200258/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:03.38 [info     ] FQE_20220420200258: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00015015796173450558, 'time_algorithm_update': 0.002094432365062625, 'loss': 0.7726994745574112, 'time_step': 0.0023163473883340527, 'init_value': -18.12687873840332, 'ave_value': -17.56828969894816, 'soft_opc': nan} step=16512




2022-04-20 20:03.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200258/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:03.39 [info     ] FQE_20220420200258: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00014778209287066792, 'time_algorithm_update': 0.002003947662752728, 'loss': 0.7542907336705126, 'time_step': 0.002220537773398466, 'init_value': -18.145246505737305, 'ave_value': -17.61635846303071, 'soft_opc': nan} step=16856




2022-04-20 20:03.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200258/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:03.40 [info     ] FQE_20220420200258: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00014697812324346497, 'time_algorithm_update': 0.0019562923631002735, 'loss': 0.7572239746534547, 'time_step': 0.0021713084952775822, 'init_value': -18.10970115661621, 'ave_value': -17.71951909730429, 'soft_opc': nan} step=17200




2022-04-20 20:03.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200258/model_17200.pt
search iteration:  33
using hyper params:  [0.0035929126138241757, 0.00714353659400976, 5.2584765089210976e-05, 5]
2022-04-20 20:03.40 [debug    ] RoundIterator is selected.
2022-04-20 20:03.40 [info     ] Directory is created at d3rlpy_logs/CQL_20220420200340
2022-04-20 20:03.40 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 20:03.40 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 20:03.40 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220420200340/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0035929126138241757, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, '

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:03.46 [info     ] CQL_20220420200340: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.0003454650354664228, 'time_algorithm_update': 0.0176912720440424, 'temp_loss': 4.616103278266059, 'temp': 0.9911193727401265, 'alpha_loss': -14.9025413306833, 'alpha': 1.0162975003844814, 'critic_loss': 34.98572973061723, 'actor_loss': 3.725668700989227, 'time_step': 0.018133080493636995, 'td_error': 4.204334883190616, 'init_value': -10.11485767364502, 'ave_value': -6.0702165704504125} step=342
2022-04-20 20:03.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420200340/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:03.53 [info     ] CQL_20220420200340: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.0003514854531539114, 'time_algorithm_update': 0.018042062458239104, 'temp_loss': 3.8247249363458646, 'temp': 0.9744664446303719, 'alpha_loss': -6.7565082932076255, 'alpha': 1.0393945965153433, 'critic_loss': 21.648429006163838, 'actor_loss': 9.084270678068462, 'time_step': 0.018495423054834554, 'td_error': 4.482993823076062, 'init_value': -17.20229721069336, 'ave_value': -10.155175986493527} step=684
2022-04-20 20:03.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420200340/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:03.59 [info     ] CQL_20220420200340: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.0003567432102404143, 'time_algorithm_update': 0.018134649734050906, 'temp_loss': 3.140696018062837, 'temp': 0.9599552365422946, 'alpha_loss': -3.17935255728662, 'alpha': 1.055345172421974, 'critic_loss': 35.51209846853513, 'actor_loss': 14.999748388926188, 'time_step': 0.018589840297810516, 'td_error': 7.60566543311428, 'init_value': -27.076953887939453, 'ave_value': -15.306185814188542} step=1026
2022-04-20 20:03.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420200340/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:04.06 [info     ] CQL_20220420200340: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.00035331054040563036, 'time_algorithm_update': 0.017872120204724763, 'temp_loss': 2.5948157624194494, 'temp': 0.9467635722885355, 'alpha_loss': 0.16631942386711723, 'alpha': 1.0614956596441436, 'critic_loss': 59.271531690630994, 'actor_loss': 21.050602017787465, 'time_step': 0.01832292930424562, 'td_error': 10.896604401420722, 'init_value': -35.32373046875, 'ave_value': -20.431880909141388} step=1368
2022-04-20 20:04.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420200340/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:04.13 [info     ] CQL_20220420200340: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.0003496338749489589, 'time_algorithm_update': 0.01838054921891954, 'temp_loss': 2.1521334167112385, 'temp': 0.9347445703389352, 'alpha_loss': 3.1145663447460237, 'alpha': 1.052176780867995, 'critic_loss': 88.70995089882298, 'actor_loss': 26.781594962404487, 'time_step': 0.018830874509978713, 'td_error': 14.813612759507965, 'init_value': -43.79314422607422, 'ave_value': -24.891132865913278} step=1710
2022-04-20 20:04.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420200340/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:04.19 [info     ] CQL_20220420200340: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.00035602656024241307, 'time_algorithm_update': 0.018537801608704683, 'temp_loss': 1.7305015109435857, 'temp': 0.9237998436417496, 'alpha_loss': 5.417174462686505, 'alpha': 1.0259920951218633, 'critic_loss': 121.5657465750711, 'actor_loss': 32.16096496582031, 'time_step': 0.01899638719726027, 'td_error': 19.341081815938832, 'init_value': -51.32573318481445, 'ave_value': -28.31198806515752} step=2052
2022-04-20 20:04.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420200340/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:04.26 [info     ] CQL_20220420200340: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.0003486488297668814, 'time_algorithm_update': 0.017910293668334246, 'temp_loss': 1.401491903247889, 'temp': 0.9139703599333066, 'alpha_loss': 7.305728794538487, 'alpha': 0.9878949768710554, 'critic_loss': 155.81780644467003, 'actor_loss': 37.37160982723125, 'time_step': 0.018356182421857152, 'td_error': 24.62341456853963, 'init_value': -59.3017463684082, 'ave_value': -34.01481299919991} step=2394
2022-04-20 20:04.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420200340/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:04.32 [info     ] CQL_20220420200340: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.0003494087018464741, 'time_algorithm_update': 0.017939044021026432, 'temp_loss': 1.115931918572264, 'temp': 0.9050497435338316, 'alpha_loss': 8.596129160875465, 'alpha': 0.9463709779301582, 'critic_loss': 193.5694079036601, 'actor_loss': 42.388987513313516, 'time_step': 0.018386391868368226, 'td_error': 31.056031353515802, 'init_value': -67.16146087646484, 'ave_value': -37.69951316857123} step=2736
2022-04-20 20:04.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420200340/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:04.39 [info     ] CQL_20220420200340: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.0003503142741688511, 'time_algorithm_update': 0.01794230380253485, 'temp_loss': 0.8911237722556842, 'temp': 0.8970561261065522, 'alpha_loss': 9.421474012017947, 'alpha': 0.9062699006314863, 'critic_loss': 228.9774439850746, 'actor_loss': 46.98224161382307, 'time_step': 0.018388785813984117, 'td_error': 36.774471209629255, 'init_value': -74.3165512084961, 'ave_value': -42.15375151898812} step=3078
2022-04-20 20:04.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420200340/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:04.45 [info     ] CQL_20220420200340: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.0003449331250107079, 'time_algorithm_update': 0.017669245513559084, 'temp_loss': 0.7271460280454124, 'temp': 0.8896061969779389, 'alpha_loss': 9.687381189469008, 'alpha': 0.8696950682771136, 'critic_loss': 267.70174501653304, 'actor_loss': 51.48574732061018, 'time_step': 0.01811141856232582, 'td_error': 40.6934442941067, 'init_value': -81.52277374267578, 'ave_value': -45.56715492618916} step=3420
2022-04-20 20:04.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420200340/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:04.52 [info     ] CQL_20220420200340: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.0003455396284136856, 'time_algorithm_update': 0.01777057689532899, 'temp_loss': 0.5074103549013884, 'temp': 0.8831783841576493, 'alpha_loss': 10.121319938124271, 'alpha': 0.8359530559408734, 'critic_loss': 299.62177481846504, 'actor_loss': 55.31672265794542, 'time_step': 0.018213722440931533, 'td_error': 46.16271526437212, 'init_value': -87.31272888183594, 'ave_value': -50.07770739348488} step=3762
2022-04-20 20:04.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420200340/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:04.58 [info     ] CQL_20220420200340: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.00034805766323156524, 'time_algorithm_update': 0.017617619525619417, 'temp_loss': 0.3654853212892225, 'temp': 0.8780513679423527, 'alpha_loss': 10.19196591321488, 'alpha': 0.8044586603404486, 'critic_loss': 331.26781325312385, 'actor_loss': 59.083403481377495, 'time_step': 0.018064437553896542, 'td_error': 47.5812997348301, 'init_value': -92.05415344238281, 'ave_value': -52.95565244656828} step=4104
2022-04-20 20:04.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420200340/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:05.05 [info     ] CQL_20220420200340: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.00034778508526539943, 'time_algorithm_update': 0.017523403753314102, 'temp_loss': 0.24619065849273875, 'temp': 0.8738214922578711, 'alpha_loss': 10.093280238714831, 'alpha': 0.775430025587305, 'critic_loss': 361.06082537020853, 'actor_loss': 62.484240626731115, 'time_step': 0.017971154541997183, 'td_error': 52.77108672011637, 'init_value': -96.75809478759766, 'ave_value': -54.41152690597751} step=4446
2022-04-20 20:05.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420200340/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:05.11 [info     ] CQL_20220420200340: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.00035804196407920435, 'time_algorithm_update': 0.01833887337244045, 'temp_loss': 0.13409164348584518, 'temp': 0.870635838187926, 'alpha_loss': 10.241678633885076, 'alpha': 0.7481260304911095, 'critic_loss': 386.5641895316498, 'actor_loss': 65.49786198487756, 'time_step': 0.018798208376120406, 'td_error': 58.24653200938968, 'init_value': -106.26637268066406, 'ave_value': -59.457096112856455} step=4788
2022-04-20 20:05.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420200340/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:05.19 [info     ] CQL_20220420200340: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.0003669331645407872, 'time_algorithm_update': 0.019972832579361766, 'temp_loss': 0.0665764751250458, 'temp': 0.868847394198702, 'alpha_loss': 9.968411018973903, 'alpha': 0.7221495836798908, 'critic_loss': 412.10058745445565, 'actor_loss': 68.3272374917192, 'time_step': 0.020445806938305236, 'td_error': 53.238536462110105, 'init_value': -106.68254089355469, 'ave_value': -59.96033766502464} step=5130
2022-04-20 20:05.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420200340/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:05.26 [info     ] CQL_20220420200340: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.00036661736449302985, 'time_algorithm_update': 0.01954440554680183, 'temp_loss': 0.0074922632240848235, 'temp': 0.8680870707272089, 'alpha_loss': 9.850321574517858, 'alpha': 0.6980084069291054, 'critic_loss': 432.1154309546041, 'actor_loss': 70.8534223099201, 'time_step': 0.020019952316730344, 'td_error': 53.180312414747405, 'init_value': -112.44700622558594, 'ave_value': -63.024873134981135} step=5472
2022-04-20 20:05.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420200340/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:05.33 [info     ] CQL_20220420200340: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.00036940937153777186, 'time_algorithm_update': 0.019239501646387647, 'temp_loss': -0.019674995369468515, 'temp': 0.868334927928378, 'alpha_loss': 9.639666346778647, 'alpha': 0.674932196823477, 'critic_loss': 451.764960751896, 'actor_loss': 73.26849945246825, 'time_step': 0.019710978569343077, 'td_error': 55.84016803895935, 'init_value': -117.8554458618164, 'ave_value': -65.64482693594587} step=5814
2022-04-20 20:05.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420200340/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:05.39 [info     ] CQL_20220420200340: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.0003554800100493849, 'time_algorithm_update': 0.01857878729613901, 'temp_loss': -0.05487803651335818, 'temp': 0.8695844031913936, 'alpha_loss': 9.513165041717173, 'alpha': 0.6529049177964529, 'critic_loss': 471.4035267969321, 'actor_loss': 75.55758712723939, 'time_step': 0.01903206075144093, 'td_error': 60.765048865356285, 'init_value': -118.73445892333984, 'ave_value': -65.87363793026086} step=6156
2022-04-20 20:05.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420200340/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:05.46 [info     ] CQL_20220420200340: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.00034943867845144887, 'time_algorithm_update': 0.018177084058348895, 'temp_loss': -0.15259058648252, 'temp': 0.8726502378084506, 'alpha_loss': 9.536146974005895, 'alpha': 0.6315952396532248, 'critic_loss': 487.20465953447666, 'actor_loss': 77.39518576058728, 'time_step': 0.018626608346637926, 'td_error': 65.30342404872117, 'init_value': -125.0528335571289, 'ave_value': -69.41940598095591} step=6498
2022-04-20 20:05.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420200340/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:05.53 [info     ] CQL_20220420200340: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.00035101279877779775, 'time_algorithm_update': 0.017997575782195865, 'temp_loss': -0.14610945593500346, 'temp': 0.8783673879347349, 'alpha_loss': 9.281729859915393, 'alpha': 0.6109613866833915, 'critic_loss': 506.0799566793163, 'actor_loss': 79.47526813529389, 'time_step': 0.018441260209557605, 'td_error': 70.2211359275448, 'init_value': -128.6140594482422, 'ave_value': -72.20191653857427} step=6840
2022-04-20 20:05.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420200340/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:05.59 [info     ] CQL_20220420200340: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.00035599030946430405, 'time_algorithm_update': 0.017531132837485152, 'temp_loss': -0.14924047609562413, 'temp': 0.88405177485176, 'alpha_loss': 8.999483441748815, 'alpha': 0.5913719412876152, 'critic_loss': 525.540095636022, 'actor_loss': 81.39591156808953, 'time_step': 0.017981938451354266, 'td_error': 65.86281721048297, 'init_value': -126.0206298828125, 'ave_value': -70.834564307867} step=7182
2022-04-20 20:05.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420200340/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:06.05 [info     ] CQL_20220420200340: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.00035019157922755906, 'time_algorithm_update': 0.017958827186049076, 'temp_loss': -0.13346635070983429, 'temp': 0.8896675660596256, 'alpha_loss': 8.870398847680343, 'alpha': 0.5726426271667258, 'critic_loss': 545.1334488182737, 'actor_loss': 83.23540735523603, 'time_step': 0.018406875649390864, 'td_error': 70.19800890803893, 'init_value': -130.22483825683594, 'ave_value': -73.854729065442} step=7524
2022-04-20 20:06.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420200340/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:06.12 [info     ] CQL_20220420200340: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.00035282533768324825, 'time_algorithm_update': 0.017612511651557788, 'temp_loss': -0.1751824345777345, 'temp': 0.8961220448128662, 'alpha_loss': 8.690682811346667, 'alpha': 0.5544792593222613, 'critic_loss': 558.2361402009662, 'actor_loss': 84.63361229255186, 'time_step': 0.018062853673745317, 'td_error': 62.354006462323106, 'init_value': -132.73037719726562, 'ave_value': -74.8857380009362} step=7866
2022-04-20 20:06.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420200340/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:06.18 [info     ] CQL_20220420200340: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.0003497921235380117, 'time_algorithm_update': 0.018077045156244646, 'temp_loss': -0.18534457214089514, 'temp': 0.9045413305536348, 'alpha_loss': 8.416731968260647, 'alpha': 0.536964525081958, 'critic_loss': 571.8468998245329, 'actor_loss': 86.0354485539665, 'time_step': 0.018527352321914763, 'td_error': 68.83346182736143, 'init_value': -135.35073852539062, 'ave_value': -77.15614139030646} step=8208
2022-04-20 20:06.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420200340/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:06.25 [info     ] CQL_20220420200340: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.000355588762383712, 'time_algorithm_update': 0.01796713349414848, 'temp_loss': -0.18377467962689917, 'temp': 0.913406893523813, 'alpha_loss': 8.288667452962775, 'alpha': 0.5200959525958836, 'critic_loss': 582.2154704311438, 'actor_loss': 87.3217542994092, 'time_step': 0.018421049703631485, 'td_error': 66.81724090309072, 'init_value': -134.56756591796875, 'ave_value': -76.23377469006289} step=8550
2022-04-20 20:06.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420200340/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:06.32 [info     ] CQL_20220420200340: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.00035750796223244473, 'time_algorithm_update': 0.018021513844094082, 'temp_loss': -0.13765665808725253, 'temp': 0.9205630568035862, 'alpha_loss': 7.957505383686713, 'alpha': 0.5038902238447066, 'critic_loss': 595.6176065366868, 'actor_loss': 88.55244204872533, 'time_step': 0.018482514292176008, 'td_error': 67.67530454262044, 'init_value': -139.0146942138672, 'ave_value': -78.81591457059187} step=8892
2022-04-20 20:06.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420200340/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:06.38 [info     ] CQL_20220420200340: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.00035424957498472336, 'time_algorithm_update': 0.017970193896377294, 'temp_loss': -0.1266554066290458, 'temp': 0.927708820815672, 'alpha_loss': 7.763187828119735, 'alpha': 0.4883204468509607, 'critic_loss': 604.2413811042295, 'actor_loss': 89.554867638482, 'time_step': 0.01842037348719368, 'td_error': 67.85728528995426, 'init_value': -138.12977600097656, 'ave_value': -77.80821690774178} step=9234
2022-04-20 20:06.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420200340/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:06.45 [info     ] CQL_20220420200340: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.00036041917856673747, 'time_algorithm_update': 0.017877856193230166, 'temp_loss': -0.10536339745056211, 'temp': 0.9328818148688266, 'alpha_loss': 7.571776569935313, 'alpha': 0.47327000625997956, 'critic_loss': 613.6304597018058, 'actor_loss': 90.51691238224855, 'time_step': 0.01833597400732208, 'td_error': 72.37451808623125, 'init_value': -140.18008422851562, 'ave_value': -80.16334564771705} step=9576
2022-04-20 20:06.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420200340/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:06.51 [info     ] CQL_20220420200340: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.0003620128185428374, 'time_algorithm_update': 0.017889024918539484, 'temp_loss': -0.12631982800479957, 'temp': 0.9395406925887392, 'alpha_loss': 7.370452199065895, 'alpha': 0.4586303199243824, 'critic_loss': 621.8454781694022, 'actor_loss': 91.24783849437334, 'time_step': 0.01834999608714678, 'td_error': 65.63593893574371, 'init_value': -138.54440307617188, 'ave_value': -79.16609478867672} step=9918
2022-04-20 20:06.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420200340/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:06.58 [info     ] CQL_20220420200340: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.000356667223032455, 'time_algorithm_update': 0.01794109567564133, 'temp_loss': -0.06428938385709161, 'temp': 0.9444231702918895, 'alpha_loss': 7.165953551119531, 'alpha': 0.444457445228309, 'critic_loss': 629.3053626344915, 'actor_loss': 92.13047480443765, 'time_step': 0.01839861674615514, 'td_error': 67.98653562062343, 'init_value': -139.38812255859375, 'ave_value': -80.80889691595425} step=10260
2022-04-20 20:06.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420200340/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:07.04 [info     ] CQL_20220420200340: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.0003605579075060393, 'time_algorithm_update': 0.017856617420040376, 'temp_loss': -0.0759462569159461, 'temp': 0.9482143735676481, 'alpha_loss': 6.850444807643779, 'alpha': 0.430938800263126, 'critic_loss': 633.7657324361522, 'actor_loss': 92.6092472857202, 'time_step': 0.018314160101594982, 'td_error': 64.89931454180105, 'init_value': -139.5859375, 'ave_value': -81.57077519290485} step=10602
2022-04-20 20:07.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420200340/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:07.11 [info     ] CQL_20220420200340: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.0003581576877170139, 'time_algorithm_update': 0.018011289730406645, 'temp_loss': -0.025657772407894245, 'temp': 0.9504630044538375, 'alpha_loss': 6.540263781073498, 'alpha': 0.4180150198483328, 'critic_loss': 640.0516941003632, 'actor_loss': 93.2706431784825, 'time_step': 0.018469898324263722, 'td_error': 65.19846159413142, 'init_value': -137.2770233154297, 'ave_value': -80.72466677639197} step=10944
2022-04-20 20:07.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420200340/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:07.17 [info     ] CQL_20220420200340: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.0003571775224473741, 'time_algorithm_update': 0.018135328739010102, 'temp_loss': -0.040981879773718574, 'temp': 0.953057666968184, 'alpha_loss': 6.340125552394934, 'alpha': 0.4054533960352167, 'critic_loss': 643.2691396077474, 'actor_loss': 93.56910745721115, 'time_step': 0.018593262510690076, 'td_error': 66.82161122465381, 'init_value': -139.2652587890625, 'ave_value': -82.71807307980082} step=11286
2022-04-20 20:07.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420200340/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:07.24 [info     ] CQL_20220420200340: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.0003585885142722325, 'time_algorithm_update': 0.0179459700110363, 'temp_loss': 0.01799225289788511, 'temp': 0.9528516224253247, 'alpha_loss': 6.062381586833307, 'alpha': 0.39338898423470947, 'critic_loss': 648.9579059980069, 'actor_loss': 94.20233535766602, 'time_step': 0.01840323174906056, 'td_error': 63.16687292384152, 'init_value': -141.27822875976562, 'ave_value': -83.53733431622655} step=11628
2022-04-20 20:07.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420200340/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:07.30 [info     ] CQL_20220420200340: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.0003553782290185404, 'time_algorithm_update': 0.01804414757511072, 'temp_loss': -0.017576969680730362, 'temp': 0.9528573104861187, 'alpha_loss': 5.843312279522768, 'alpha': 0.38168310404520983, 'critic_loss': 648.2711360105994, 'actor_loss': 94.31717048332705, 'time_step': 0.0184979752490395, 'td_error': 59.66801328037744, 'init_value': -136.27816772460938, 'ave_value': -82.46248560673952} step=11970
2022-04-20 20:07.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420200340/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:07.37 [info     ] CQL_20220420200340: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.00036514363093682894, 'time_algorithm_update': 0.017972551591215077, 'temp_loss': 0.032418051053775335, 'temp': 0.9525556348220646, 'alpha_loss': 5.331517520703767, 'alpha': 0.3705947777332618, 'critic_loss': 645.7580978661252, 'actor_loss': 94.28124370909573, 'time_step': 0.018436821580630296, 'td_error': 61.44481252147174, 'init_value': -139.07443237304688, 'ave_value': -83.57100336144636} step=12312
2022-04-20 20:07.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420200340/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:07.44 [info     ] CQL_20220420200340: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.0003584623336791992, 'time_algorithm_update': 0.018010684621264362, 'temp_loss': 0.07895587581857952, 'temp': 0.9491175862780789, 'alpha_loss': 5.162187953441464, 'alpha': 0.36009297759560815, 'critic_loss': 640.3491207368193, 'actor_loss': 94.26871332090501, 'time_step': 0.018468661615025927, 'td_error': 61.1275036509306, 'init_value': -138.94639587402344, 'ave_value': -84.47258333852565} step=12654
2022-04-20 20:07.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420200340/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:07.50 [info     ] CQL_20220420200340: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.0003566985938981263, 'time_algorithm_update': 0.01794992622576262, 'temp_loss': 0.06442640099282328, 'temp': 0.9453131915533055, 'alpha_loss': 5.012896520352503, 'alpha': 0.34964794774501645, 'critic_loss': 633.7817312318679, 'actor_loss': 94.20210252728378, 'time_step': 0.0184082545732197, 'td_error': 59.64178267841306, 'init_value': -135.56192016601562, 'ave_value': -82.90810387704823} step=12996
2022-04-20 20:07.50 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420200340/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:07.57 [info     ] CQL_20220420200340: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.0003535956667180647, 'time_algorithm_update': 0.01789816569166574, 'temp_loss': 0.059090124661026644, 'temp': 0.9413777124463466, 'alpha_loss': 4.888081204821492, 'alpha': 0.3394079267630103, 'critic_loss': 626.4836713110494, 'actor_loss': 94.14967020492108, 'time_step': 0.01834886046180948, 'td_error': 55.452073315278234, 'init_value': -134.7066650390625, 'ave_value': -83.47946695419314} step=13338
2022-04-20 20:07.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420200340/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:08.03 [info     ] CQL_20220420200340: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.0003508287563658597, 'time_algorithm_update': 0.017912613718133224, 'temp_loss': 0.0693717286023393, 'temp': 0.9389067907430972, 'alpha_loss': 4.670893768120927, 'alpha': 0.3294523489056972, 'critic_loss': 620.8790127938254, 'actor_loss': 94.0209173905222, 'time_step': 0.01836492513355456, 'td_error': 57.35714495965237, 'init_value': -136.51705932617188, 'ave_value': -85.37097215338267} step=13680
2022-04-20 20:08.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420200340/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:08.10 [info     ] CQL_20220420200340: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.0003524718925966854, 'time_algorithm_update': 0.01806966951716016, 'temp_loss': 0.07632896411967904, 'temp': 0.9334961995046739, 'alpha_loss': 4.501874821925024, 'alpha': 0.319735758834415, 'critic_loss': 619.7188924153646, 'actor_loss': 93.97926410875823, 'time_step': 0.01852420686978346, 'td_error': 57.251564316931976, 'init_value': -134.9356231689453, 'ave_value': -83.97295333137114} step=14022
2022-04-20 20:08.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420200340/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:08.16 [info     ] CQL_20220420200340: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.0003504007183320341, 'time_algorithm_update': 0.01806595590379503, 'temp_loss': 0.08709584347555163, 'temp': 0.9302458545269324, 'alpha_loss': 4.314644815628989, 'alpha': 0.3103597426449346, 'critic_loss': 613.529905084978, 'actor_loss': 93.80461075989126, 'time_step': 0.01851346130259553, 'td_error': 50.905241691186305, 'init_value': -133.55979919433594, 'ave_value': -83.3920183386365} step=14364
2022-04-20 20:08.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420200340/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:08.23 [info     ] CQL_20220420200340: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.0003545765291180527, 'time_algorithm_update': 0.017779951207121912, 'temp_loss': 0.07375452786204759, 'temp': 0.9253421397934183, 'alpha_loss': 4.152385437000564, 'alpha': 0.3012394086833586, 'critic_loss': 608.1357007835344, 'actor_loss': 93.77961220099913, 'time_step': 0.01823175162599798, 'td_error': 51.60250938170201, 'init_value': -129.04312133789062, 'ave_value': -82.32841809623145} step=14706
2022-04-20 20:08.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420200340/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:08.29 [info     ] CQL_20220420200340: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.0003471025946544625, 'time_algorithm_update': 0.01790569888220893, 'temp_loss': 0.13728198205387732, 'temp': 0.9184372256025236, 'alpha_loss': 4.006379120531138, 'alpha': 0.29239088628027177, 'critic_loss': 605.4286918417055, 'actor_loss': 93.66709150347793, 'time_step': 0.01835466198056762, 'td_error': 52.81633865646397, 'init_value': -131.39053344726562, 'ave_value': -83.54168192038934} step=15048
2022-04-20 20:08.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420200340/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:08.36 [info     ] CQL_20220420200340: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.00035318157129120406, 'time_algorithm_update': 0.018100537054719982, 'temp_loss': 0.13137850487789912, 'temp': 0.9121069817515145, 'alpha_loss': 3.8371095043873926, 'alpha': 0.283789084058756, 'critic_loss': 601.6293388500549, 'actor_loss': 93.46928602073625, 'time_step': 0.018552594017564206, 'td_error': 52.75751518521978, 'init_value': -130.72817993164062, 'ave_value': -82.84816402616734} step=15390
2022-04-20 20:08.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420200340/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:08.42 [info     ] CQL_20220420200340: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.00035185562936883226, 'time_algorithm_update': 0.01806047785351848, 'temp_loss': 0.09463072149899968, 'temp': 0.9053630630175272, 'alpha_loss': 3.762226925258748, 'alpha': 0.2753937016976507, 'critic_loss': 595.6451768484729, 'actor_loss': 93.35002843399494, 'time_step': 0.018513298871224385, 'td_error': 47.86060452195568, 'init_value': -129.01019287109375, 'ave_value': -83.79460733012439} step=15732
2022-04-20 20:08.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420200340/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:08.49 [info     ] CQL_20220420200340: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.0003508587329708345, 'time_algorithm_update': 0.018060180875990126, 'temp_loss': 0.14509573787973637, 'temp': 0.9000491768296002, 'alpha_loss': 3.4726020481154234, 'alpha': 0.2673617078546892, 'critic_loss': 591.8983930621231, 'actor_loss': 93.19069464164868, 'time_step': 0.018509626388549805, 'td_error': 48.3236426351464, 'init_value': -127.5034408569336, 'ave_value': -82.88289264355626} step=16074
2022-04-20 20:08.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420200340/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:08.56 [info     ] CQL_20220420200340: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.0003600232085289314, 'time_algorithm_update': 0.018207689474897776, 'temp_loss': 0.16009649729127423, 'temp': 0.8907108029775452, 'alpha_loss': 3.3870799555415996, 'alpha': 0.2595860405623564, 'critic_loss': 588.4821089359752, 'actor_loss': 93.18699434068468, 'time_step': 0.018667704877797623, 'td_error': 48.34114885635076, 'init_value': -123.575439453125, 'ave_value': -80.68935556014983} step=16416
2022-04-20 20:08.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420200340/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:09.02 [info     ] CQL_20220420200340: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.0003537678579140825, 'time_algorithm_update': 0.018318422356544184, 'temp_loss': 0.1437933089603719, 'temp': 0.883558042739567, 'alpha_loss': 3.2155329595532334, 'alpha': 0.2520661823320807, 'critic_loss': 584.3469571119164, 'actor_loss': 92.92465337117513, 'time_step': 0.018770360110098857, 'td_error': 48.47677300409014, 'init_value': -126.07365417480469, 'ave_value': -81.68701934750419} step=16758
2022-04-20 20:09.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420200340/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:09.09 [info     ] CQL_20220420200340: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.00035624267065037063, 'time_algorithm_update': 0.01804276865128188, 'temp_loss': 0.14216378602956298, 'temp': 0.8765476239703552, 'alpha_loss': 3.06557099582159, 'alpha': 0.24479178439455423, 'critic_loss': 578.7746530276293, 'actor_loss': 92.68056508114464, 'time_step': 0.018499973224617584, 'td_error': 49.230753361507645, 'init_value': -126.4806137084961, 'ave_value': -82.55221943962212} step=17100
2022-04-20 20:09.09 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420200340/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.519

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 20:09.09 [info     ] FQE_20220420200909: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00013894632638218892, 'time_algorithm_update': 0.001945802964359881, 'loss': 0.008075448358992496, 'time_step': 0.0021517535290086127, 'init_value': -0.32970577478408813, 'ave_value': -0.3006371559390614, 'soft_opc': nan} step=166




2022-04-20 20:09.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200909/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:09.10 [info     ] FQE_20220420200909: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00014528165380638767, 'time_algorithm_update': 0.0020591810525181784, 'loss': 0.006291109503314169, 'time_step': 0.0022729620876082456, 'init_value': -0.5090751647949219, 'ave_value': -0.4315710966282331, 'soft_opc': nan} step=332




2022-04-20 20:09.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200909/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:09.10 [info     ] FQE_20220420200909: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00014579152486410486, 'time_algorithm_update': 0.002094339175396655, 'loss': 0.0057637054495603204, 'time_step': 0.0023131585982908687, 'init_value': -0.5492318868637085, 'ave_value': -0.4528390649806809, 'soft_opc': nan} step=498




2022-04-20 20:09.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200909/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:09.11 [info     ] FQE_20220420200909: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.0001439976404948407, 'time_algorithm_update': 0.0020675673542252504, 'loss': 0.0058432473795466035, 'time_step': 0.002277632793748235, 'init_value': -0.6031003594398499, 'ave_value': -0.4867077587910735, 'soft_opc': nan} step=664




2022-04-20 20:09.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200909/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:09.11 [info     ] FQE_20220420200909: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00014788271432899567, 'time_algorithm_update': 0.002046879515590438, 'loss': 0.005600707253429994, 'time_step': 0.002266303602471409, 'init_value': -0.6511670351028442, 'ave_value': -0.5162166872204424, 'soft_opc': nan} step=830




2022-04-20 20:09.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200909/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:09.12 [info     ] FQE_20220420200909: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00014190070600394742, 'time_algorithm_update': 0.0020039986415081716, 'loss': 0.005204076814207147, 'time_step': 0.0022133301539593434, 'init_value': -0.6776120066642761, 'ave_value': -0.5339343378471362, 'soft_opc': nan} step=996




2022-04-20 20:09.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200909/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:09.12 [info     ] FQE_20220420200909: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00014145115771925593, 'time_algorithm_update': 0.002027790230440806, 'loss': 0.005021159559578063, 'time_step': 0.00223700971488493, 'init_value': -0.7441563606262207, 'ave_value': -0.5883996387591233, 'soft_opc': nan} step=1162




2022-04-20 20:09.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200909/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:09.12 [info     ] FQE_20220420200909: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.000144836414291198, 'time_algorithm_update': 0.002005805452185941, 'loss': 0.004688675467665476, 'time_step': 0.002219724367900067, 'init_value': -0.7474381327629089, 'ave_value': -0.5864770692781553, 'soft_opc': nan} step=1328




2022-04-20 20:09.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200909/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:09.13 [info     ] FQE_20220420200909: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00014404647321586148, 'time_algorithm_update': 0.0020705978554415414, 'loss': 0.00433617563503065, 'time_step': 0.0022851171263729235, 'init_value': -0.7557917237281799, 'ave_value': -0.5898105708429137, 'soft_opc': nan} step=1494




2022-04-20 20:09.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200909/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:09.13 [info     ] FQE_20220420200909: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.000146538378244423, 'time_algorithm_update': 0.0021797073892800205, 'loss': 0.0042406379586052015, 'time_step': 0.002395122884267784, 'init_value': -0.8037446141242981, 'ave_value': -0.6204890729372834, 'soft_opc': nan} step=1660




2022-04-20 20:09.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200909/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:09.14 [info     ] FQE_20220420200909: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.0001446654997676252, 'time_algorithm_update': 0.0020697145576936654, 'loss': 0.003865924965399486, 'time_step': 0.0022803832249469066, 'init_value': -0.8710460066795349, 'ave_value': -0.6833959447203187, 'soft_opc': nan} step=1826




2022-04-20 20:09.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200909/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:09.14 [info     ] FQE_20220420200909: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.0001502195036554911, 'time_algorithm_update': 0.0022424617445612528, 'loss': 0.003613653426557914, 'time_step': 0.0024620553097092963, 'init_value': -0.8808143138885498, 'ave_value': -0.6896771911128953, 'soft_opc': nan} step=1992




2022-04-20 20:09.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200909/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:09.15 [info     ] FQE_20220420200909: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00014134631099471128, 'time_algorithm_update': 0.0020774143287934452, 'loss': 0.0036681470067060767, 'time_step': 0.002282982849213014, 'init_value': -0.9662672281265259, 'ave_value': -0.7540984709721965, 'soft_opc': nan} step=2158




2022-04-20 20:09.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200909/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:09.15 [info     ] FQE_20220420200909: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.0001532327697937747, 'time_algorithm_update': 0.0021883278007966927, 'loss': 0.0034560846944664977, 'time_step': 0.002411703029310847, 'init_value': -1.0141475200653076, 'ave_value': -0.7898730583522502, 'soft_opc': nan} step=2324




2022-04-20 20:09.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200909/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:09.15 [info     ] FQE_20220420200909: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00014738720583628458, 'time_algorithm_update': 0.0020726359034159095, 'loss': 0.003323947176959994, 'time_step': 0.002288193587797234, 'init_value': -1.0469788312911987, 'ave_value': -0.8149596663611429, 'soft_opc': nan} step=2490




2022-04-20 20:09.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200909/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:09.16 [info     ] FQE_20220420200909: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00014481917921319064, 'time_algorithm_update': 0.0019894163292574593, 'loss': 0.003465162677808205, 'time_step': 0.0022017596715904145, 'init_value': -1.129822850227356, 'ave_value': -0.8892093806049308, 'soft_opc': nan} step=2656




2022-04-20 20:09.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200909/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:09.16 [info     ] FQE_20220420200909: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.0001475724829248635, 'time_algorithm_update': 0.0020835686878985667, 'loss': 0.0035075128459966325, 'time_step': 0.002301583807152438, 'init_value': -1.1674655675888062, 'ave_value': -0.9405443309932142, 'soft_opc': nan} step=2822




2022-04-20 20:09.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200909/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:09.17 [info     ] FQE_20220420200909: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00014989921845585467, 'time_algorithm_update': 0.0021011886826480725, 'loss': 0.0034755983008408404, 'time_step': 0.002322591930986887, 'init_value': -1.2201803922653198, 'ave_value': -0.9918236432177526, 'soft_opc': nan} step=2988




2022-04-20 20:09.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200909/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:09.17 [info     ] FQE_20220420200909: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.000150597239115152, 'time_algorithm_update': 0.002161221331860646, 'loss': 0.0038847711795649254, 'time_step': 0.002382228173405291, 'init_value': -1.2728869915008545, 'ave_value': -1.037397025357764, 'soft_opc': nan} step=3154




2022-04-20 20:09.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200909/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:09.18 [info     ] FQE_20220420200909: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00014488381075571818, 'time_algorithm_update': 0.002076660294130624, 'loss': 0.003819546348440562, 'time_step': 0.0022901770580245786, 'init_value': -1.3377463817596436, 'ave_value': -1.1004189112164953, 'soft_opc': nan} step=3320




2022-04-20 20:09.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200909/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:09.18 [info     ] FQE_20220420200909: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00014055493366287416, 'time_algorithm_update': 0.0020052611109722092, 'loss': 0.003916197365601491, 'time_step': 0.0022146184760403922, 'init_value': -1.3995091915130615, 'ave_value': -1.1632205695182354, 'soft_opc': nan} step=3486




2022-04-20 20:09.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200909/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:09.18 [info     ] FQE_20220420200909: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.0001473010304462479, 'time_algorithm_update': 0.002105250416031803, 'loss': 0.0040172052682596205, 'time_step': 0.002319672021521143, 'init_value': -1.4661250114440918, 'ave_value': -1.2376759508607893, 'soft_opc': nan} step=3652




2022-04-20 20:09.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200909/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:09.19 [info     ] FQE_20220420200909: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00015035594802304922, 'time_algorithm_update': 0.0021496336144137093, 'loss': 0.004373317111019853, 'time_step': 0.0023733765245920204, 'init_value': -1.538933277130127, 'ave_value': -1.3003615196223732, 'soft_opc': nan} step=3818




2022-04-20 20:09.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200909/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:09.19 [info     ] FQE_20220420200909: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.0001460773399077266, 'time_algorithm_update': 0.002094113683126059, 'loss': 0.004537985091383782, 'time_step': 0.002307735293744558, 'init_value': -1.567789077758789, 'ave_value': -1.3306777968499306, 'soft_opc': nan} step=3984




2022-04-20 20:09.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200909/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:09.20 [info     ] FQE_20220420200909: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00014359548867466938, 'time_algorithm_update': 0.0021062759031732397, 'loss': 0.004951220859930548, 'time_step': 0.0023187614348997554, 'init_value': -1.641553282737732, 'ave_value': -1.39067248094592, 'soft_opc': nan} step=4150




2022-04-20 20:09.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200909/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:09.20 [info     ] FQE_20220420200909: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00014391433761780522, 'time_algorithm_update': 0.0020091806549623788, 'loss': 0.005254720860856574, 'time_step': 0.0022217020931014097, 'init_value': -1.746673345565796, 'ave_value': -1.505565625590247, 'soft_opc': nan} step=4316




2022-04-20 20:09.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200909/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:09.21 [info     ] FQE_20220420200909: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00014375922191573913, 'time_algorithm_update': 0.0020714452467769026, 'loss': 0.005430004672366138, 'time_step': 0.0022860521293548218, 'init_value': -1.7751187086105347, 'ave_value': -1.5126201474988783, 'soft_opc': nan} step=4482




2022-04-20 20:09.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200909/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:09.21 [info     ] FQE_20220420200909: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.000144766037722668, 'time_algorithm_update': 0.002119259661938771, 'loss': 0.0059669337842714444, 'time_step': 0.002331641783197242, 'init_value': -1.8717563152313232, 'ave_value': -1.6155540615857185, 'soft_opc': nan} step=4648




2022-04-20 20:09.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200909/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:09.21 [info     ] FQE_20220420200909: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00014698936278561512, 'time_algorithm_update': 0.0020700132990457924, 'loss': 0.006189871479473806, 'time_step': 0.002285466136702572, 'init_value': -1.8919379711151123, 'ave_value': -1.6253362656579362, 'soft_opc': nan} step=4814




2022-04-20 20:09.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200909/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:09.22 [info     ] FQE_20220420200909: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00014535346663141824, 'time_algorithm_update': 0.0020031512501728103, 'loss': 0.006059287072100441, 'time_step': 0.002215451504810747, 'init_value': -1.903795599937439, 'ave_value': -1.6361809219728718, 'soft_opc': nan} step=4980




2022-04-20 20:09.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200909/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:09.22 [info     ] FQE_20220420200909: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00014655992209193218, 'time_algorithm_update': 0.002079505518258336, 'loss': 0.006396391146384314, 'time_step': 0.002296868577060929, 'init_value': -1.9877591133117676, 'ave_value': -1.7149959104219537, 'soft_opc': nan} step=5146




2022-04-20 20:09.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200909/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:09.23 [info     ] FQE_20220420200909: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00014155313193079937, 'time_algorithm_update': 0.0020109917744096503, 'loss': 0.006828286004791328, 'time_step': 0.0022216834217669017, 'init_value': -2.0483880043029785, 'ave_value': -1.757226999533606, 'soft_opc': nan} step=5312




2022-04-20 20:09.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200909/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:09.23 [info     ] FQE_20220420200909: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00014526872749788216, 'time_algorithm_update': 0.0020206592169152685, 'loss': 0.007263717374831708, 'time_step': 0.002233592860669975, 'init_value': -2.0469863414764404, 'ave_value': -1.756690992690153, 'soft_opc': nan} step=5478




2022-04-20 20:09.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200909/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:09.24 [info     ] FQE_20220420200909: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00014600839959569723, 'time_algorithm_update': 0.0019932798592441053, 'loss': 0.007023483534422086, 'time_step': 0.0022035391933946723, 'init_value': -2.1002414226531982, 'ave_value': -1.813636057696364, 'soft_opc': nan} step=5644




2022-04-20 20:09.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200909/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:09.24 [info     ] FQE_20220420200909: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.0001520751470542816, 'time_algorithm_update': 0.002165511430027973, 'loss': 0.007692660597792591, 'time_step': 0.002388319337224386, 'init_value': -2.231147050857544, 'ave_value': -1.9436085418403686, 'soft_opc': nan} step=5810




2022-04-20 20:09.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200909/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:09.24 [info     ] FQE_20220420200909: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00015046366726059512, 'time_algorithm_update': 0.0021087376468152887, 'loss': 0.008055504372887627, 'time_step': 0.002327686332794557, 'init_value': -2.2386927604675293, 'ave_value': -1.9388704985984273, 'soft_opc': nan} step=5976




2022-04-20 20:09.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200909/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:09.25 [info     ] FQE_20220420200909: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00015235378081540027, 'time_algorithm_update': 0.002169910683689347, 'loss': 0.008531471848325154, 'time_step': 0.0023933979402105494, 'init_value': -2.3586344718933105, 'ave_value': -2.066089889514554, 'soft_opc': nan} step=6142




2022-04-20 20:09.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200909/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:09.25 [info     ] FQE_20220420200909: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00014212045324854105, 'time_algorithm_update': 0.002032914793634989, 'loss': 0.008923619475737438, 'time_step': 0.0022435202656022036, 'init_value': -2.3831162452697754, 'ave_value': -2.0900445616862795, 'soft_opc': nan} step=6308




2022-04-20 20:09.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200909/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:09.26 [info     ] FQE_20220420200909: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00014881915356739457, 'time_algorithm_update': 0.0021932527243372904, 'loss': 0.009258478277514363, 'time_step': 0.0024123550897621245, 'init_value': -2.3885507583618164, 'ave_value': -2.1071663505590714, 'soft_opc': nan} step=6474




2022-04-20 20:09.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200909/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:09.26 [info     ] FQE_20220420200909: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.0001442906368209655, 'time_algorithm_update': 0.0020432472229003906, 'loss': 0.009521797852007201, 'time_step': 0.002258530582290098, 'init_value': -2.443561553955078, 'ave_value': -2.1636776056174223, 'soft_opc': nan} step=6640




2022-04-20 20:09.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200909/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:09.27 [info     ] FQE_20220420200909: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00015003279031041157, 'time_algorithm_update': 0.002135375896132136, 'loss': 0.010247097952369914, 'time_step': 0.002360912690679711, 'init_value': -2.4736506938934326, 'ave_value': -2.1628109529614448, 'soft_opc': nan} step=6806




2022-04-20 20:09.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200909/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:09.27 [info     ] FQE_20220420200909: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00014197970011148108, 'time_algorithm_update': 0.001965301582612187, 'loss': 0.010342333492113233, 'time_step': 0.002176386764250606, 'init_value': -2.490067720413208, 'ave_value': -2.1828191188071764, 'soft_opc': nan} step=6972




2022-04-20 20:09.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200909/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:09.27 [info     ] FQE_20220420200909: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00014734268188476562, 'time_algorithm_update': 0.0020628377615687357, 'loss': 0.010817617740403547, 'time_step': 0.0022796062101800756, 'init_value': -2.576254367828369, 'ave_value': -2.2723394640058547, 'soft_opc': nan} step=7138




2022-04-20 20:09.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200909/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:09.28 [info     ] FQE_20220420200909: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00014293768319738917, 'time_algorithm_update': 0.00204226051468447, 'loss': 0.01126159874354882, 'time_step': 0.0022531460566693044, 'init_value': -2.6159751415252686, 'ave_value': -2.3075385685036847, 'soft_opc': nan} step=7304




2022-04-20 20:09.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200909/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:09.28 [info     ] FQE_20220420200909: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00014413982988840127, 'time_algorithm_update': 0.002031909414084561, 'loss': 0.011530841744213129, 'time_step': 0.0022462261728493564, 'init_value': -2.6029305458068848, 'ave_value': -2.279407761044599, 'soft_opc': nan} step=7470




2022-04-20 20:09.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200909/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:09.29 [info     ] FQE_20220420200909: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00014658864722194443, 'time_algorithm_update': 0.0020878042083188712, 'loss': 0.011474432206659365, 'time_step': 0.0023044275950236492, 'init_value': -2.7459425926208496, 'ave_value': -2.421971045233108, 'soft_opc': nan} step=7636




2022-04-20 20:09.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200909/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:09.29 [info     ] FQE_20220420200909: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00015298429741916885, 'time_algorithm_update': 0.0020809805536844642, 'loss': 0.012398832572304296, 'time_step': 0.0023032498646931476, 'init_value': -2.664376974105835, 'ave_value': -2.3421543136306173, 'soft_opc': nan} step=7802




2022-04-20 20:09.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200909/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:09.30 [info     ] FQE_20220420200909: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00014854913734527956, 'time_algorithm_update': 0.0020826782088681876, 'loss': 0.012888073393126882, 'time_step': 0.0022995242153305605, 'init_value': -2.7058496475219727, 'ave_value': -2.3967951056589416, 'soft_opc': nan} step=7968




2022-04-20 20:09.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200909/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:09.30 [info     ] FQE_20220420200909: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.0001445448542215738, 'time_algorithm_update': 0.0020539616963949547, 'loss': 0.01226142500541229, 'time_step': 0.0022665477660765133, 'init_value': -2.7978150844573975, 'ave_value': -2.4554142783152626, 'soft_opc': nan} step=8134




2022-04-20 20:09.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200909/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:09.30 [info     ] FQE_20220420200909: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00014220662863857774, 'time_algorithm_update': 0.001954982079655291, 'loss': 0.012922298208759895, 'time_step': 0.0021686036902737907, 'init_value': -2.8216891288757324, 'ave_value': -2.4748088410375892, 'soft_opc': nan} step=8300




2022-04-20 20:09.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200909/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

start
[ 0.00000000e+00  7.95731469e+08  4.27108923e-02  1.24000047e-02
  1.42999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.34732792e-01  6.00000000e-01  3.37421461e-01]
Read chunk # 39 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.38489108e-01  4.94000047e-02
 -1.56000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -7.49080829e-02  7.04145269e-02]
Read chunk # 40 out of 4999
torch.Size([44400, 6])
2022-04-20 20:09.31 [debug    ] RoundIterator is selected.
2022-04-20 20:09.31 [info     ] Directory is created at d3rlpy_logs/FQE_20220420200931
2022-04-20 20:09.31 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 20:09.31 [debug    ] Building models...
2022-04-20 20:09.31 [debug    ] Models have been built.
2022-04-20 20:09.31 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420200931/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_

Epoch 1/50:   0%|          | 0/355 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 20:09.32 [info     ] FQE_20220420200931: epoch=1 step=355 epoch=1 metrics={'time_sample_batch': 0.00014929569942850462, 'time_algorithm_update': 0.002075917284253617, 'loss': 0.023722515588390156, 'time_step': 0.0022958325668120047, 'init_value': -0.978035032749176, 'ave_value': -0.9651316195151061, 'soft_opc': nan} step=355




2022-04-20 20:09.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200931/model_355.pt


Epoch 2/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 20:09.32 [info     ] FQE_20220420200931: epoch=2 step=710 epoch=2 metrics={'time_sample_batch': 0.00014844410856005173, 'time_algorithm_update': 0.0019804712752221334, 'loss': 0.02331600116539589, 'time_step': 0.0021976833612146513, 'init_value': -2.163482666015625, 'ave_value': -2.1563526170747775, 'soft_opc': nan} step=710




2022-04-20 20:09.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200931/model_710.pt


Epoch 3/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 20:09.33 [info     ] FQE_20220420200931: epoch=3 step=1065 epoch=3 metrics={'time_sample_batch': 0.00014993036297005668, 'time_algorithm_update': 0.002035643349231129, 'loss': 0.02468283760400725, 'time_step': 0.002256022708516725, 'init_value': -2.8169102668762207, 'ave_value': -2.7806732315968055, 'soft_opc': nan} step=1065




2022-04-20 20:09.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200931/model_1065.pt


Epoch 4/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 20:09.34 [info     ] FQE_20220420200931: epoch=4 step=1420 epoch=4 metrics={'time_sample_batch': 0.00015371618136553696, 'time_algorithm_update': 0.002085937580592196, 'loss': 0.02812298514526075, 'time_step': 0.0023097723302706865, 'init_value': -3.8976378440856934, 'ave_value': -3.8737592111529837, 'soft_opc': nan} step=1420




2022-04-20 20:09.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200931/model_1420.pt


Epoch 5/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 20:09.35 [info     ] FQE_20220420200931: epoch=5 step=1775 epoch=5 metrics={'time_sample_batch': 0.00015340522981025802, 'time_algorithm_update': 0.002074643927560726, 'loss': 0.0323291843258579, 'time_step': 0.002299326238497882, 'init_value': -4.5162529945373535, 'ave_value': -4.4997203640342525, 'soft_opc': nan} step=1775




2022-04-20 20:09.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200931/model_1775.pt


Epoch 6/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 20:09.36 [info     ] FQE_20220420200931: epoch=6 step=2130 epoch=6 metrics={'time_sample_batch': 0.00015107544375137543, 'time_algorithm_update': 0.0020484017654204033, 'loss': 0.03955708023580447, 'time_step': 0.0022726152984189315, 'init_value': -5.438466548919678, 'ave_value': -5.468099480706292, 'soft_opc': nan} step=2130




2022-04-20 20:09.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200931/model_2130.pt


Epoch 7/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 20:09.37 [info     ] FQE_20220420200931: epoch=7 step=2485 epoch=7 metrics={'time_sample_batch': 0.0001493178622823366, 'time_algorithm_update': 0.002009822281313614, 'loss': 0.04491312095082142, 'time_step': 0.0022293332596899757, 'init_value': -5.958823204040527, 'ave_value': -6.0396818595503285, 'soft_opc': nan} step=2485




2022-04-20 20:09.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200931/model_2485.pt


Epoch 8/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 20:09.38 [info     ] FQE_20220420200931: epoch=8 step=2840 epoch=8 metrics={'time_sample_batch': 0.0001827454902756382, 'time_algorithm_update': 0.0035171999058253326, 'loss': 0.05407902280681989, 'time_step': 0.0037709215996970594, 'init_value': -6.798771381378174, 'ave_value': -6.958763368678983, 'soft_opc': nan} step=2840




2022-04-20 20:09.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200931/model_2840.pt


Epoch 9/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 20:09.39 [info     ] FQE_20220420200931: epoch=9 step=3195 epoch=9 metrics={'time_sample_batch': 0.0001517792822609485, 'time_algorithm_update': 0.00209610361448476, 'loss': 0.06232235751731295, 'time_step': 0.0023187912685770384, 'init_value': -7.337265968322754, 'ave_value': -7.5304716643219285, 'soft_opc': nan} step=3195




2022-04-20 20:09.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200931/model_3195.pt


Epoch 10/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 20:09.40 [info     ] FQE_20220420200931: epoch=10 step=3550 epoch=10 metrics={'time_sample_batch': 0.00015405534018932934, 'time_algorithm_update': 0.0020773357068988637, 'loss': 0.07261968836135847, 'time_step': 0.0023040952816815443, 'init_value': -7.771103858947754, 'ave_value': -8.055943089961392, 'soft_opc': nan} step=3550




2022-04-20 20:09.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200931/model_3550.pt


Epoch 11/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 20:09.41 [info     ] FQE_20220420200931: epoch=11 step=3905 epoch=11 metrics={'time_sample_batch': 0.00015173562815491582, 'time_algorithm_update': 0.002041053771972656, 'loss': 0.08079183797653712, 'time_step': 0.0022658193615120903, 'init_value': -8.498649597167969, 'ave_value': -8.881854703681043, 'soft_opc': nan} step=3905




2022-04-20 20:09.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200931/model_3905.pt


Epoch 12/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 20:09.42 [info     ] FQE_20220420200931: epoch=12 step=4260 epoch=12 metrics={'time_sample_batch': 0.00015503923657914282, 'time_algorithm_update': 0.0020720434860444403, 'loss': 0.08972274749211862, 'time_step': 0.0022975713434353682, 'init_value': -8.848363876342773, 'ave_value': -9.314242629808502, 'soft_opc': nan} step=4260




2022-04-20 20:09.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200931/model_4260.pt


Epoch 13/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 20:09.43 [info     ] FQE_20220420200931: epoch=13 step=4615 epoch=13 metrics={'time_sample_batch': 0.00014998812071034606, 'time_algorithm_update': 0.0020771892977432464, 'loss': 0.09874017649710598, 'time_step': 0.0022971912169120682, 'init_value': -9.316940307617188, 'ave_value': -9.932372027719957, 'soft_opc': nan} step=4615




2022-04-20 20:09.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200931/model_4615.pt


Epoch 14/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 20:09.43 [info     ] FQE_20220420200931: epoch=14 step=4970 epoch=14 metrics={'time_sample_batch': 0.00015102238722250496, 'time_algorithm_update': 0.0020780267849774426, 'loss': 0.10859620514994776, 'time_step': 0.0022975552249962176, 'init_value': -9.573687553405762, 'ave_value': -10.310093027805884, 'soft_opc': nan} step=4970




2022-04-20 20:09.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200931/model_4970.pt


Epoch 15/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 20:09.44 [info     ] FQE_20220420200931: epoch=15 step=5325 epoch=15 metrics={'time_sample_batch': 0.00015009826337787467, 'time_algorithm_update': 0.002045689838033327, 'loss': 0.11796953632745524, 'time_step': 0.002265367373614244, 'init_value': -9.814196586608887, 'ave_value': -10.70450531960761, 'soft_opc': nan} step=5325




2022-04-20 20:09.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200931/model_5325.pt


Epoch 16/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 20:09.45 [info     ] FQE_20220420200931: epoch=16 step=5680 epoch=16 metrics={'time_sample_batch': 0.0001505845029589156, 'time_algorithm_update': 0.0020465065056169537, 'loss': 0.1281275947014211, 'time_step': 0.0022685514369481046, 'init_value': -10.271992683410645, 'ave_value': -11.271189455482798, 'soft_opc': nan} step=5680




2022-04-20 20:09.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200931/model_5680.pt


Epoch 17/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 20:09.46 [info     ] FQE_20220420200931: epoch=17 step=6035 epoch=17 metrics={'time_sample_batch': 0.00015499759727800397, 'time_algorithm_update': 0.0021113926256206674, 'loss': 0.14266225560240342, 'time_step': 0.0023387244049931917, 'init_value': -10.457703590393066, 'ave_value': -11.703641412387023, 'soft_opc': nan} step=6035




2022-04-20 20:09.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200931/model_6035.pt


Epoch 18/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 20:09.47 [info     ] FQE_20220420200931: epoch=18 step=6390 epoch=18 metrics={'time_sample_batch': 0.00015102910323881768, 'time_algorithm_update': 0.0020391041124370735, 'loss': 0.1470236334057761, 'time_step': 0.002261583570023658, 'init_value': -10.788655281066895, 'ave_value': -12.284530210126782, 'soft_opc': nan} step=6390




2022-04-20 20:09.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200931/model_6390.pt


Epoch 19/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 20:09.48 [info     ] FQE_20220420200931: epoch=19 step=6745 epoch=19 metrics={'time_sample_batch': 0.00015357715982786365, 'time_algorithm_update': 0.00210357988384408, 'loss': 0.15992930277027714, 'time_step': 0.002326833698111521, 'init_value': -11.086952209472656, 'ave_value': -12.844336069412673, 'soft_opc': nan} step=6745




2022-04-20 20:09.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200931/model_6745.pt


Epoch 20/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 20:09.49 [info     ] FQE_20220420200931: epoch=20 step=7100 epoch=20 metrics={'time_sample_batch': 0.00015126685021628797, 'time_algorithm_update': 0.0021244519193407514, 'loss': 0.16768422715470824, 'time_step': 0.002347093232920472, 'init_value': -11.289823532104492, 'ave_value': -13.346543907658933, 'soft_opc': nan} step=7100




2022-04-20 20:09.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200931/model_7100.pt


Epoch 21/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 20:09.50 [info     ] FQE_20220420200931: epoch=21 step=7455 epoch=21 metrics={'time_sample_batch': 0.00014896392822265624, 'time_algorithm_update': 0.0020729158965634626, 'loss': 0.18488919996774533, 'time_step': 0.0022921226394008584, 'init_value': -11.694877624511719, 'ave_value': -14.157102159643726, 'soft_opc': nan} step=7455




2022-04-20 20:09.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200931/model_7455.pt


Epoch 22/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 20:09.51 [info     ] FQE_20220420200931: epoch=22 step=7810 epoch=22 metrics={'time_sample_batch': 0.0001533985137939453, 'time_algorithm_update': 0.002061639705174406, 'loss': 0.19032568330286254, 'time_step': 0.0022829801263943524, 'init_value': -11.883306503295898, 'ave_value': -14.680178790877992, 'soft_opc': nan} step=7810




2022-04-20 20:09.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200931/model_7810.pt


Epoch 23/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 20:09.51 [info     ] FQE_20220420200931: epoch=23 step=8165 epoch=23 metrics={'time_sample_batch': 0.00014968321356974857, 'time_algorithm_update': 0.002031130186268981, 'loss': 0.19804113550819982, 'time_step': 0.002248725756792955, 'init_value': -12.080545425415039, 'ave_value': -15.258877263934457, 'soft_opc': nan} step=8165




2022-04-20 20:09.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200931/model_8165.pt


Epoch 24/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 20:09.52 [info     ] FQE_20220420200931: epoch=24 step=8520 epoch=24 metrics={'time_sample_batch': 0.00015223865777673856, 'time_algorithm_update': 0.0020783666154028666, 'loss': 0.20248636402924297, 'time_step': 0.002303030121494347, 'init_value': -12.10857105255127, 'ave_value': -15.687296651474451, 'soft_opc': nan} step=8520




2022-04-20 20:09.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200931/model_8520.pt


Epoch 25/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 20:09.53 [info     ] FQE_20220420200931: epoch=25 step=8875 epoch=25 metrics={'time_sample_batch': 0.00015288406694439094, 'time_algorithm_update': 0.002109127313318387, 'loss': 0.2112596035948102, 'time_step': 0.00233322197282818, 'init_value': -12.555720329284668, 'ave_value': -16.51871588816207, 'soft_opc': nan} step=8875




2022-04-20 20:09.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200931/model_8875.pt


Epoch 26/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 20:09.54 [info     ] FQE_20220420200931: epoch=26 step=9230 epoch=26 metrics={'time_sample_batch': 0.0001494951651129924, 'time_algorithm_update': 0.0020303141902869855, 'loss': 0.22032651186094318, 'time_step': 0.0022497465912724884, 'init_value': -12.702163696289062, 'ave_value': -17.051063076900547, 'soft_opc': nan} step=9230




2022-04-20 20:09.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200931/model_9230.pt


Epoch 27/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 20:09.55 [info     ] FQE_20220420200931: epoch=27 step=9585 epoch=27 metrics={'time_sample_batch': 0.0001519115877823091, 'time_algorithm_update': 0.00208865891040211, 'loss': 0.23011605560674633, 'time_step': 0.002307728646506726, 'init_value': -13.002188682556152, 'ave_value': -17.584440127118675, 'soft_opc': nan} step=9585




2022-04-20 20:09.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200931/model_9585.pt


Epoch 28/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 20:09.56 [info     ] FQE_20220420200931: epoch=28 step=9940 epoch=28 metrics={'time_sample_batch': 0.00015253684890102333, 'time_algorithm_update': 0.0020117988049144476, 'loss': 0.24752692716117475, 'time_step': 0.0022355355007547726, 'init_value': -13.104696273803711, 'ave_value': -18.0768001955295, 'soft_opc': nan} step=9940




2022-04-20 20:09.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200931/model_9940.pt


Epoch 29/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 20:09.57 [info     ] FQE_20220420200931: epoch=29 step=10295 epoch=29 metrics={'time_sample_batch': 0.00014725201566454391, 'time_algorithm_update': 0.001994222318622428, 'loss': 0.2537098702410577, 'time_step': 0.0022107957114636057, 'init_value': -13.388590812683105, 'ave_value': -18.506739892309085, 'soft_opc': nan} step=10295




2022-04-20 20:09.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200931/model_10295.pt


Epoch 30/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 20:09.58 [info     ] FQE_20220420200931: epoch=30 step=10650 epoch=30 metrics={'time_sample_batch': 0.00015066778156119334, 'time_algorithm_update': 0.0020877273989395356, 'loss': 0.26928699927955446, 'time_step': 0.00230971591573366, 'init_value': -13.588685035705566, 'ave_value': -18.848460250103336, 'soft_opc': nan} step=10650




2022-04-20 20:09.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200931/model_10650.pt


Epoch 31/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 20:09.58 [info     ] FQE_20220420200931: epoch=31 step=11005 epoch=31 metrics={'time_sample_batch': 0.00015481425003266672, 'time_algorithm_update': 0.0021510782376141617, 'loss': 0.2783576686774761, 'time_step': 0.002378330096392564, 'init_value': -14.080673217773438, 'ave_value': -19.49291718168762, 'soft_opc': nan} step=11005




2022-04-20 20:09.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200931/model_11005.pt


Epoch 32/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 20:09.59 [info     ] FQE_20220420200931: epoch=32 step=11360 epoch=32 metrics={'time_sample_batch': 0.00015250058241293463, 'time_algorithm_update': 0.002108487276963785, 'loss': 0.29642804000054446, 'time_step': 0.002333073548867669, 'init_value': -14.346957206726074, 'ave_value': -19.92641102239739, 'soft_opc': nan} step=11360




2022-04-20 20:09.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200931/model_11360.pt


Epoch 33/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 20:10.00 [info     ] FQE_20220420200931: epoch=33 step=11715 epoch=33 metrics={'time_sample_batch': 0.0001571292608556613, 'time_algorithm_update': 0.0022156742257131655, 'loss': 0.3081400253770637, 'time_step': 0.0024481108490849884, 'init_value': -14.317098617553711, 'ave_value': -20.120699645070648, 'soft_opc': nan} step=11715




2022-04-20 20:10.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200931/model_11715.pt


Epoch 34/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 20:10.01 [info     ] FQE_20220420200931: epoch=34 step=12070 epoch=34 metrics={'time_sample_batch': 0.00015418160129600846, 'time_algorithm_update': 0.002106426131557411, 'loss': 0.3200107719276992, 'time_step': 0.00233173303201165, 'init_value': -14.15312385559082, 'ave_value': -20.07748167542417, 'soft_opc': nan} step=12070




2022-04-20 20:10.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200931/model_12070.pt


Epoch 35/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 20:10.02 [info     ] FQE_20220420200931: epoch=35 step=12425 epoch=35 metrics={'time_sample_batch': 0.00015591231869979645, 'time_algorithm_update': 0.002134616610030053, 'loss': 0.3278475462340973, 'time_step': 0.0023628006518726617, 'init_value': -14.806735038757324, 'ave_value': -20.902858953525048, 'soft_opc': nan} step=12425




2022-04-20 20:10.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200931/model_12425.pt


Epoch 36/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 20:10.03 [info     ] FQE_20220420200931: epoch=36 step=12780 epoch=36 metrics={'time_sample_batch': 0.0001519230050100407, 'time_algorithm_update': 0.0020816460461683677, 'loss': 0.35264263906529253, 'time_step': 0.0023020186894376514, 'init_value': -15.229934692382812, 'ave_value': -21.55121342402474, 'soft_opc': nan} step=12780




2022-04-20 20:10.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200931/model_12780.pt


Epoch 37/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 20:10.04 [info     ] FQE_20220420200931: epoch=37 step=13135 epoch=37 metrics={'time_sample_batch': 0.00015669809260838468, 'time_algorithm_update': 0.002185980031188105, 'loss': 0.36464205037435177, 'time_step': 0.00241818025078572, 'init_value': -15.167759895324707, 'ave_value': -21.65840678828717, 'soft_opc': nan} step=13135




2022-04-20 20:10.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200931/model_13135.pt


Epoch 38/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 20:10.05 [info     ] FQE_20220420200931: epoch=38 step=13490 epoch=38 metrics={'time_sample_batch': 0.00015514132002709618, 'time_algorithm_update': 0.00208865891040211, 'loss': 0.36832740954844884, 'time_step': 0.0023144601096569653, 'init_value': -15.41707706451416, 'ave_value': -21.914637841957415, 'soft_opc': nan} step=13490




2022-04-20 20:10.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200931/model_13490.pt


Epoch 39/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 20:10.06 [info     ] FQE_20220420200931: epoch=39 step=13845 epoch=39 metrics={'time_sample_batch': 0.00015567927293374506, 'time_algorithm_update': 0.002144434754277619, 'loss': 0.3890483728596862, 'time_step': 0.0023707826372603297, 'init_value': -16.113845825195312, 'ave_value': -22.66988835997563, 'soft_opc': nan} step=13845




2022-04-20 20:10.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200931/model_13845.pt


Epoch 40/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 20:10.07 [info     ] FQE_20220420200931: epoch=40 step=14200 epoch=40 metrics={'time_sample_batch': 0.0001544213630783726, 'time_algorithm_update': 0.002103915013058085, 'loss': 0.40769614281368927, 'time_step': 0.002331920408866775, 'init_value': -16.30757713317871, 'ave_value': -22.914279489934522, 'soft_opc': nan} step=14200




2022-04-20 20:10.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200931/model_14200.pt


Epoch 41/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 20:10.07 [info     ] FQE_20220420200931: epoch=41 step=14555 epoch=41 metrics={'time_sample_batch': 0.0001556638260962258, 'time_algorithm_update': 0.002138418546864684, 'loss': 0.42638802402456044, 'time_step': 0.002365623393528898, 'init_value': -16.745256423950195, 'ave_value': -23.444307100266563, 'soft_opc': nan} step=14555




2022-04-20 20:10.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200931/model_14555.pt


Epoch 42/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 20:10.08 [info     ] FQE_20220420200931: epoch=42 step=14910 epoch=42 metrics={'time_sample_batch': 0.00015310435227944817, 'time_algorithm_update': 0.0020757097593495544, 'loss': 0.42808330178680554, 'time_step': 0.002300346401375784, 'init_value': -16.555866241455078, 'ave_value': -23.423593677425018, 'soft_opc': nan} step=14910




2022-04-20 20:10.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200931/model_14910.pt


Epoch 43/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 20:10.09 [info     ] FQE_20220420200931: epoch=43 step=15265 epoch=43 metrics={'time_sample_batch': 0.00015102305882413623, 'time_algorithm_update': 0.0020629029878428285, 'loss': 0.4376689507088191, 'time_step': 0.0022834153242514165, 'init_value': -16.525009155273438, 'ave_value': -23.49720931102257, 'soft_opc': nan} step=15265




2022-04-20 20:10.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200931/model_15265.pt


Epoch 44/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 20:10.10 [info     ] FQE_20220420200931: epoch=44 step=15620 epoch=44 metrics={'time_sample_batch': 0.00015639654347594355, 'time_algorithm_update': 0.002098972025051923, 'loss': 0.4411504073357078, 'time_step': 0.0023304334828551385, 'init_value': -16.86012840270996, 'ave_value': -23.6450545736875, 'soft_opc': nan} step=15620




2022-04-20 20:10.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200931/model_15620.pt


Epoch 45/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 20:10.11 [info     ] FQE_20220420200931: epoch=45 step=15975 epoch=45 metrics={'time_sample_batch': 0.0001526577371946523, 'time_algorithm_update': 0.0021147922730781663, 'loss': 0.45055111060050174, 'time_step': 0.002339900379449549, 'init_value': -16.8769588470459, 'ave_value': -23.7314205054304, 'soft_opc': nan} step=15975




2022-04-20 20:10.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200931/model_15975.pt


Epoch 46/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 20:10.12 [info     ] FQE_20220420200931: epoch=46 step=16330 epoch=46 metrics={'time_sample_batch': 0.00015104320687307437, 'time_algorithm_update': 0.00207290918054715, 'loss': 0.4713408433058312, 'time_step': 0.0022943604160362567, 'init_value': -17.449342727661133, 'ave_value': -24.309852285642883, 'soft_opc': nan} step=16330




2022-04-20 20:10.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200931/model_16330.pt


Epoch 47/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 20:10.13 [info     ] FQE_20220420200931: epoch=47 step=16685 epoch=47 metrics={'time_sample_batch': 0.00015545227158237512, 'time_algorithm_update': 0.002082738070420816, 'loss': 0.48066811432511036, 'time_step': 0.0023104271418611768, 'init_value': -17.54696273803711, 'ave_value': -24.458997534416817, 'soft_opc': nan} step=16685




2022-04-20 20:10.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200931/model_16685.pt


Epoch 48/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 20:10.14 [info     ] FQE_20220420200931: epoch=48 step=17040 epoch=48 metrics={'time_sample_batch': 0.00015346231594891616, 'time_algorithm_update': 0.0021356763974042007, 'loss': 0.49869553265752087, 'time_step': 0.002362159272314797, 'init_value': -17.40859031677246, 'ave_value': -24.34128309449941, 'soft_opc': nan} step=17040




2022-04-20 20:10.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200931/model_17040.pt


Epoch 49/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 20:10.15 [info     ] FQE_20220420200931: epoch=49 step=17395 epoch=49 metrics={'time_sample_batch': 0.00015303047610000826, 'time_algorithm_update': 0.0020826037500945617, 'loss': 0.5019254742636228, 'time_step': 0.0023075835805543713, 'init_value': -17.769351959228516, 'ave_value': -24.596687297563296, 'soft_opc': nan} step=17395




2022-04-20 20:10.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200931/model_17395.pt


Epoch 50/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-20 20:10.15 [info     ] FQE_20220420200931: epoch=50 step=17750 epoch=50 metrics={'time_sample_batch': 0.00015064763351225517, 'time_algorithm_update': 0.0020726391966913787, 'loss': 0.5062536231491347, 'time_step': 0.0022961582936031717, 'init_value': -17.90662956237793, 'ave_value': -24.64696917926636, 'soft_opc': nan} step=17750




2022-04-20 20:10.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420200931/model_17750.pt
search iteration:  34
using hyper params:  [0.0035752516425723705, 0.002285277420148277, 2.11693378959021e-05, 5]
2022-04-20 20:10.15 [debug    ] RoundIterator is selected.
2022-04-20 20:10.15 [info     ] Directory is created at d3rlpy_logs/CQL_20220420201015
2022-04-20 20:10.15 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 20:10.15 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 20:10.16 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220420201015/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0035752516425723705, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'w

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:10.22 [info     ] CQL_20220420201015: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.00034900785189623024, 'time_algorithm_update': 0.018056439377411068, 'temp_loss': 4.603434836655333, 'temp': 0.9964542772337707, 'alpha_loss': -17.82199894475658, 'alpha': 1.0175691009962071, 'critic_loss': 54.55305958351894, 'actor_loss': 3.009329736995244, 'time_step': 0.018503900159869278, 'td_error': 4.0467109953280245, 'init_value': -6.39160680770874, 'ave_value': -4.979732852623135} step=342
2022-04-20 20:10.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201015/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:10.29 [info     ] CQL_20220420201015: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.00035123727474993435, 'time_algorithm_update': 0.018269383419326872, 'temp_loss': 4.416345507080792, 'temp': 0.9894343154472217, 'alpha_loss': -11.584054606699803, 'alpha': 1.0476726746698568, 'critic_loss': 30.519421136867233, 'actor_loss': 7.231743657798098, 'time_step': 0.01872264153776113, 'td_error': 4.424205278817593, 'init_value': -15.872106552124023, 'ave_value': -9.713540607883788} step=684
2022-04-20 20:10.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201015/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:10.35 [info     ] CQL_20220420201015: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.0003552562312075966, 'time_algorithm_update': 0.018254432761878298, 'temp_loss': 3.9582816697003547, 'temp': 0.9827135871028343, 'alpha_loss': -7.573462044286449, 'alpha': 1.0706520460502447, 'critic_loss': 30.111518564280015, 'actor_loss': 12.766279418566073, 'time_step': 0.018711987991779172, 'td_error': 5.5813636890519085, 'init_value': -23.417194366455078, 'ave_value': -14.237204028829462} step=1026
2022-04-20 20:10.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201015/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:10.42 [info     ] CQL_20220420201015: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.00035807333494487564, 'time_algorithm_update': 0.01811663658298247, 'temp_loss': 3.6192217235676725, 'temp': 0.9764058241370128, 'alpha_loss': -5.82137680123424, 'alpha': 1.0913912992031254, 'critic_loss': 36.10833268416555, 'actor_loss': 18.129482818625824, 'time_step': 0.01857781828495494, 'td_error': 7.715446761559003, 'init_value': -31.572391510009766, 'ave_value': -18.971673401408903} step=1368
2022-04-20 20:10.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201015/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:10.49 [info     ] CQL_20220420201015: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.00035443849730909917, 'time_algorithm_update': 0.01815233383959497, 'temp_loss': 3.33373432898382, 'temp': 0.9703003255247372, 'alpha_loss': -4.451587482502586, 'alpha': 1.110795831819724, 'critic_loss': 44.8044358638295, 'actor_loss': 23.30449402680871, 'time_step': 0.018606724097714786, 'td_error': 10.30142959113893, 'init_value': -37.615272521972656, 'ave_value': -22.381307795010844} step=1710
2022-04-20 20:10.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201015/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:10.55 [info     ] CQL_20220420201015: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.0003539233179817423, 'time_algorithm_update': 0.018018148795903077, 'temp_loss': 3.0862698115800558, 'temp': 0.9643583167017552, 'alpha_loss': -3.1515171027671522, 'alpha': 1.1280371804683529, 'critic_loss': 55.93483530011093, 'actor_loss': 28.15202479892307, 'time_step': 0.018475387528625844, 'td_error': 13.555185572252066, 'init_value': -44.913787841796875, 'ave_value': -26.561376656385157} step=2052
2022-04-20 20:10.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201015/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:11.02 [info     ] CQL_20220420201015: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.0003563479373329564, 'time_algorithm_update': 0.018278404983163576, 'temp_loss': 2.858999828846134, 'temp': 0.9585645636271315, 'alpha_loss': -1.9208853085204, 'alpha': 1.1417630126601772, 'critic_loss': 67.7231247327481, 'actor_loss': 32.52008652826499, 'time_step': 0.018735577488503262, 'td_error': 17.218179711250688, 'init_value': -52.164512634277344, 'ave_value': -31.002398331265997} step=2394
2022-04-20 20:11.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201015/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:11.08 [info     ] CQL_20220420201015: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.00035131883900067957, 'time_algorithm_update': 0.01807758891791628, 'temp_loss': 2.6693363384893765, 'temp': 0.952860287581271, 'alpha_loss': -0.7198260750986462, 'alpha': 1.1502891566321167, 'critic_loss': 80.93929010803936, 'actor_loss': 36.637078770420004, 'time_step': 0.01852957616772568, 'td_error': 21.19069514148008, 'init_value': -58.642051696777344, 'ave_value': -34.599508669789174} step=2736
2022-04-20 20:11.08 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201015/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:11.15 [info     ] CQL_20220420201015: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.00035694049812896904, 'time_algorithm_update': 0.018241195650825725, 'temp_loss': 2.4873775125246995, 'temp': 0.9472459554672241, 'alpha_loss': 0.3668802603412071, 'alpha': 1.151711162419347, 'critic_loss': 94.80120744760971, 'actor_loss': 40.516989880835105, 'time_step': 0.018703908251042952, 'td_error': 23.887361635860536, 'init_value': -63.271690368652344, 'ave_value': -36.96974780552291} step=3078
2022-04-20 20:11.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201015/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:11.22 [info     ] CQL_20220420201015: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.00036139237253289476, 'time_algorithm_update': 0.018189706300434313, 'temp_loss': 2.315659777462831, 'temp': 0.9417078299829137, 'alpha_loss': 1.3665996515382712, 'alpha': 1.143725219525789, 'critic_loss': 109.35844593159636, 'actor_loss': 44.02176714222333, 'time_step': 0.018650402799684402, 'td_error': 27.49290729114452, 'init_value': -69.05729675292969, 'ave_value': -40.02291805157656} step=3420
2022-04-20 20:11.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201015/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:11.28 [info     ] CQL_20220420201015: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.00035018739644546957, 'time_algorithm_update': 0.01815089914533827, 'temp_loss': 2.1460451396585207, 'temp': 0.9362871259973761, 'alpha_loss': 2.226278171362628, 'alpha': 1.1244578093116047, 'critic_loss': 123.9395509240223, 'actor_loss': 47.31016219289679, 'time_step': 0.01859786635950992, 'td_error': 30.393189079678578, 'init_value': -73.87301635742188, 'ave_value': -43.22873569932875} step=3762
2022-04-20 20:11.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201015/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:11.35 [info     ] CQL_20220420201015: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.0003478568896912692, 'time_algorithm_update': 0.01779584787045306, 'temp_loss': 1.9760635418501513, 'temp': 0.9309516641480183, 'alpha_loss': 2.9209223703334204, 'alpha': 1.095447185792421, 'critic_loss': 138.5541744120637, 'actor_loss': 50.273553190175555, 'time_step': 0.01824253135257297, 'td_error': 33.32216916013504, 'init_value': -77.9466323852539, 'ave_value': -46.254341662944974} step=4104
2022-04-20 20:11.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201015/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:11.41 [info     ] CQL_20220420201015: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.0003315217313710709, 'time_algorithm_update': 0.01692235609244185, 'temp_loss': 1.8503363540298061, 'temp': 0.9256896828001703, 'alpha_loss': 3.501982693260873, 'alpha': 1.0599502921104431, 'critic_loss': 153.08886683056926, 'actor_loss': 53.06909307959484, 'time_step': 0.017348435887119228, 'td_error': 36.0066467081685, 'init_value': -82.31779479980469, 'ave_value': -48.23126302390485} step=4446
2022-04-20 20:11.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201015/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:11.48 [info     ] CQL_20220420201015: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.0003521330872474358, 'time_algorithm_update': 0.018278508855585466, 'temp_loss': 1.7064720019262436, 'temp': 0.9204879073720229, 'alpha_loss': 3.9233770909016594, 'alpha': 1.0213203315149273, 'critic_loss': 167.03090819420174, 'actor_loss': 55.570572925589936, 'time_step': 0.018730766591969986, 'td_error': 37.9460855762207, 'init_value': -84.60310363769531, 'ave_value': -50.310554505281885} step=4788
2022-04-20 20:11.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201015/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:11.54 [info     ] CQL_20220420201015: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.00035289644497876976, 'time_algorithm_update': 0.018192574991817364, 'temp_loss': 1.5950497366182985, 'temp': 0.9153704333026507, 'alpha_loss': 4.230416373551241, 'alpha': 0.98241368249843, 'critic_loss': 180.10366455435056, 'actor_loss': 57.892927069413034, 'time_step': 0.018648050681889405, 'td_error': 39.761666151281844, 'init_value': -87.73208618164062, 'ave_value': -51.484333674243175} step=5130
2022-04-20 20:11.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201015/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:12.01 [info     ] CQL_20220420201015: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.00034927345855891354, 'time_algorithm_update': 0.018087072679173876, 'temp_loss': 1.5014757934020975, 'temp': 0.9102266552852608, 'alpha_loss': 4.47091309438672, 'alpha': 0.9453586153468193, 'critic_loss': 192.77905014662716, 'actor_loss': 60.09217507657949, 'time_step': 0.018536494489301714, 'td_error': 41.976545220307614, 'init_value': -91.15922546386719, 'ave_value': -54.06248121273276} step=5472
2022-04-20 20:12.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201015/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:12.07 [info     ] CQL_20220420201015: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.0003573037030404074, 'time_algorithm_update': 0.01823367082584671, 'temp_loss': 1.4336700763618737, 'temp': 0.9050904765115146, 'alpha_loss': 4.648743612724438, 'alpha': 0.909724658867072, 'critic_loss': 204.69668739720396, 'actor_loss': 62.1591989394517, 'time_step': 0.018694699159142566, 'td_error': 43.560334845520345, 'init_value': -93.50151824951172, 'ave_value': -55.35904869999032} step=5814
2022-04-20 20:12.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201015/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:12.14 [info     ] CQL_20220420201015: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.0003547180465787475, 'time_algorithm_update': 0.018214229951825058, 'temp_loss': 1.335305759955568, 'temp': 0.8999051354084796, 'alpha_loss': 4.784264438333567, 'alpha': 0.8762495786125897, 'critic_loss': 217.27724189089056, 'actor_loss': 63.98636030453687, 'time_step': 0.018667660261455336, 'td_error': 45.65339274614118, 'init_value': -97.5262680053711, 'ave_value': -57.862869076318034} step=6156
2022-04-20 20:12.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201015/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:12.21 [info     ] CQL_20220420201015: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.00035668325703046473, 'time_algorithm_update': 0.018220369578802097, 'temp_loss': 1.2268598043082053, 'temp': 0.8948754997629869, 'alpha_loss': 4.91410800239496, 'alpha': 0.8441345747451336, 'critic_loss': 228.08410439296077, 'actor_loss': 65.74790452655994, 'time_step': 0.018681561040599443, 'td_error': 46.9388886692661, 'init_value': -100.16173553466797, 'ave_value': -59.4398848284687} step=6498
2022-04-20 20:12.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201015/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:12.27 [info     ] CQL_20220420201015: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.0003537392755698042, 'time_algorithm_update': 0.018260023747271266, 'temp_loss': 1.2040103132787503, 'temp': 0.889745814584152, 'alpha_loss': 4.968555377240767, 'alpha': 0.8140321856702281, 'critic_loss': 239.4103077670984, 'actor_loss': 67.31748961286935, 'time_step': 0.01871448720407765, 'td_error': 48.393154389112766, 'init_value': -101.46290588378906, 'ave_value': -60.59436725275339} step=6840
2022-04-20 20:12.27 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201015/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:12.34 [info     ] CQL_20220420201015: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.00035287831958971525, 'time_algorithm_update': 0.018228832044099506, 'temp_loss': 1.1149769125626101, 'temp': 0.8846581316830819, 'alpha_loss': 5.01341980870007, 'alpha': 0.7853573328918881, 'critic_loss': 250.48647294964707, 'actor_loss': 68.8093062952945, 'time_step': 0.018682127110442225, 'td_error': 47.86379699034983, 'init_value': -100.3797607421875, 'ave_value': -59.9875826125639} step=7182
2022-04-20 20:12.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201015/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:12.40 [info     ] CQL_20220420201015: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.0003414906953510485, 'time_algorithm_update': 0.01774512327205368, 'temp_loss': 1.024327114311575, 'temp': 0.8796343413012767, 'alpha_loss': 5.032238548262077, 'alpha': 0.758176485000298, 'critic_loss': 261.7406823676929, 'actor_loss': 70.18787468804254, 'time_step': 0.018182882788585642, 'td_error': 48.72607228757451, 'init_value': -103.81895446777344, 'ave_value': -62.447686810046434} step=7524
2022-04-20 20:12.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201015/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:12.47 [info     ] CQL_20220420201015: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.00032970431255318266, 'time_algorithm_update': 0.016913460709198178, 'temp_loss': 0.9899571049980253, 'temp': 0.8746717551298309, 'alpha_loss': 4.997362575335809, 'alpha': 0.7322149574756622, 'critic_loss': 271.3399978994626, 'actor_loss': 71.46380067568774, 'time_step': 0.017338976525423818, 'td_error': 49.4311629767675, 'init_value': -107.49165344238281, 'ave_value': -64.59243191946197} step=7866
2022-04-20 20:12.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201015/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:12.53 [info     ] CQL_20220420201015: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.00035091938331113223, 'time_algorithm_update': 0.01832693780374806, 'temp_loss': 0.9379008386218757, 'temp': 0.8696124097060042, 'alpha_loss': 5.00468443290532, 'alpha': 0.7074350653684627, 'critic_loss': 280.1649695502387, 'actor_loss': 72.64817067196495, 'time_step': 0.018781782590854935, 'td_error': 49.493648429647486, 'init_value': -108.46533203125, 'ave_value': -65.92695977425254} step=8208
2022-04-20 20:12.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201015/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:13.00 [info     ] CQL_20220420201015: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.00035259877032006693, 'time_algorithm_update': 0.018364110885307802, 'temp_loss': 0.8856713035476138, 'temp': 0.8646413101787456, 'alpha_loss': 4.9184128889563485, 'alpha': 0.6838238756907614, 'critic_loss': 288.90354919433594, 'actor_loss': 73.7084150035479, 'time_step': 0.01881967720232512, 'td_error': 50.19992834528275, 'init_value': -109.3814926147461, 'ave_value': -65.68240433578421} step=8550
2022-04-20 20:13.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201015/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:13.07 [info     ] CQL_20220420201015: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.00035599728076778656, 'time_algorithm_update': 0.01817518577241061, 'temp_loss': 0.8752002004991498, 'temp': 0.8595345938763423, 'alpha_loss': 4.849327903741981, 'alpha': 0.6613639706408071, 'critic_loss': 296.4579383894714, 'actor_loss': 74.75011958295141, 'time_step': 0.018633494600217942, 'td_error': 50.39051148890161, 'init_value': -110.44891357421875, 'ave_value': -66.27072094519664} step=8892
2022-04-20 20:13.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201015/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:13.13 [info     ] CQL_20220420201015: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.00035240845373499464, 'time_algorithm_update': 0.01835126695577164, 'temp_loss': 0.7920854781803331, 'temp': 0.8545347208168075, 'alpha_loss': 4.807967660719888, 'alpha': 0.6397027122346979, 'critic_loss': 303.3861759029634, 'actor_loss': 75.66252564547355, 'time_step': 0.01880596325411434, 'td_error': 51.10376306989201, 'init_value': -111.0767822265625, 'ave_value': -67.42698362026128} step=9234
2022-04-20 20:13.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201015/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:13.20 [info     ] CQL_20220420201015: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.00035162697061460617, 'time_algorithm_update': 0.01815817091200087, 'temp_loss': 0.745785695898254, 'temp': 0.8496169949832716, 'alpha_loss': 4.708933031349852, 'alpha': 0.6190972467612105, 'critic_loss': 309.0990959747493, 'actor_loss': 76.47877291629189, 'time_step': 0.01861083856103016, 'td_error': 51.37106895917253, 'init_value': -113.71435546875, 'ave_value': -69.19582077867275} step=9576
2022-04-20 20:13.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201015/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:13.26 [info     ] CQL_20220420201015: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.0003549404311598393, 'time_algorithm_update': 0.018235837506969072, 'temp_loss': 0.7124498300581125, 'temp': 0.8448221286137899, 'alpha_loss': 4.635138826760632, 'alpha': 0.5990696887523808, 'critic_loss': 315.02242997375845, 'actor_loss': 77.28890048690707, 'time_step': 0.018690484309057048, 'td_error': 49.96513653164309, 'init_value': -113.32978820800781, 'ave_value': -67.99826381721878} step=9918
2022-04-20 20:13.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201015/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:13.33 [info     ] CQL_20220420201015: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.00035373091000562523, 'time_algorithm_update': 0.018035065360933716, 'temp_loss': 0.6685513854135721, 'temp': 0.8399846165152322, 'alpha_loss': 4.540194885075441, 'alpha': 0.57998998541581, 'critic_loss': 320.79983935439793, 'actor_loss': 77.96935819882398, 'time_step': 0.01848884353860777, 'td_error': 50.66610845835775, 'init_value': -113.81678771972656, 'ave_value': -69.702876014837} step=10260
2022-04-20 20:13.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201015/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:13.40 [info     ] CQL_20220420201015: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.00034851916352210687, 'time_algorithm_update': 0.018109426163790517, 'temp_loss': 0.6587204033051405, 'temp': 0.8350680613378335, 'alpha_loss': 4.450219088827658, 'alpha': 0.5616830528131005, 'critic_loss': 326.343901070935, 'actor_loss': 78.71365894072237, 'time_step': 0.018557642635546233, 'td_error': 49.851471145911546, 'init_value': -114.59883880615234, 'ave_value': -69.88285470399771} step=10602
2022-04-20 20:13.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201015/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:13.46 [info     ] CQL_20220420201015: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.00035188630310415526, 'time_algorithm_update': 0.01812319030538637, 'temp_loss': 0.621684487255519, 'temp': 0.8302267409904659, 'alpha_loss': 4.355109382791129, 'alpha': 0.5438541827494638, 'critic_loss': 330.37522691871686, 'actor_loss': 79.20129445840044, 'time_step': 0.01857795073972111, 'td_error': 50.89604237571248, 'init_value': -115.54854583740234, 'ave_value': -70.48764863026035} step=10944
2022-04-20 20:13.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201015/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:13.53 [info     ] CQL_20220420201015: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.00034990715004547295, 'time_algorithm_update': 0.01797335538250661, 'temp_loss': 0.5895205496412794, 'temp': 0.8254264220159654, 'alpha_loss': 4.271237275753802, 'alpha': 0.5267295474894562, 'critic_loss': 334.66520824766997, 'actor_loss': 79.84468300579584, 'time_step': 0.01842298424034788, 'td_error': 49.85626005305808, 'init_value': -114.4269790649414, 'ave_value': -70.65914352454044} step=11286
2022-04-20 20:13.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201015/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:13.59 [info     ] CQL_20220420201015: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.0003580315071239806, 'time_algorithm_update': 0.018061911850644832, 'temp_loss': 0.5386551614872545, 'temp': 0.820830750186541, 'alpha_loss': 4.154334096183554, 'alpha': 0.5101711640232488, 'critic_loss': 338.70932265610725, 'actor_loss': 80.35332350703011, 'time_step': 0.018519973894308882, 'td_error': 48.907505464616676, 'init_value': -113.93758392333984, 'ave_value': -70.79593854748182} step=11628
2022-04-20 20:13.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201015/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:14.06 [info     ] CQL_20220420201015: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.0003497001023320427, 'time_algorithm_update': 0.018071646578827795, 'temp_loss': 0.5644307727135761, 'temp': 0.815968973769082, 'alpha_loss': 4.04056421567125, 'alpha': 0.4944832238711809, 'critic_loss': 343.76107913011697, 'actor_loss': 80.93929235001056, 'time_step': 0.01851911781824123, 'td_error': 47.69298379045681, 'init_value': -116.28694152832031, 'ave_value': -70.54135622422572} step=11970
2022-04-20 20:14.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201015/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:14.13 [info     ] CQL_20220420201015: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.0003507966883698402, 'time_algorithm_update': 0.018123607189334624, 'temp_loss': 0.5339027203249129, 'temp': 0.8110614113988932, 'alpha_loss': 3.943305352626488, 'alpha': 0.47919898562961155, 'critic_loss': 348.24411644294247, 'actor_loss': 81.31529088605915, 'time_step': 0.018577466931259425, 'td_error': 48.59739422618357, 'init_value': -117.6070785522461, 'ave_value': -72.20553972392604} step=12312
2022-04-20 20:14.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201015/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:14.19 [info     ] CQL_20220420201015: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.0003452921471400568, 'time_algorithm_update': 0.018161215280231676, 'temp_loss': 0.5245633633338918, 'temp': 0.8062965744420102, 'alpha_loss': 3.824765320409808, 'alpha': 0.46435157911122193, 'critic_loss': 351.1505225109078, 'actor_loss': 81.74787592748453, 'time_step': 0.01860712913044712, 'td_error': 48.31050701387491, 'init_value': -119.61055755615234, 'ave_value': -72.69497176474816} step=12654
2022-04-20 20:14.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201015/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:14.26 [info     ] CQL_20220420201015: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.00035148336176286666, 'time_algorithm_update': 0.018114870054680005, 'temp_loss': 0.469384565274095, 'temp': 0.8016581564967395, 'alpha_loss': 3.7724915547677647, 'alpha': 0.4499642204122934, 'critic_loss': 353.4261449624223, 'actor_loss': 82.15311643812392, 'time_step': 0.0185682529594466, 'td_error': 47.83648311914375, 'init_value': -118.7491455078125, 'ave_value': -73.23505287070935} step=12996
2022-04-20 20:14.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201015/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:14.32 [info     ] CQL_20220420201015: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.0003520577971698248, 'time_algorithm_update': 0.01804508033551668, 'temp_loss': 0.4715615430334855, 'temp': 0.7970576885848017, 'alpha_loss': 3.6461439620681673, 'alpha': 0.43611332094460203, 'critic_loss': 356.2301814207557, 'actor_loss': 82.37661520082351, 'time_step': 0.0185000066868743, 'td_error': 46.35139478753592, 'init_value': -116.59135437011719, 'ave_value': -72.51754416113367} step=13338
2022-04-20 20:14.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201015/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:14.39 [info     ] CQL_20220420201015: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.0003525959817986739, 'time_algorithm_update': 0.018093481398465342, 'temp_loss': 0.45659170576442054, 'temp': 0.7923612744487517, 'alpha_loss': 3.517244810249373, 'alpha': 0.42276622842975525, 'critic_loss': 359.7742657577782, 'actor_loss': 82.77832941423382, 'time_step': 0.018548699847438878, 'td_error': 45.70961537709329, 'init_value': -117.22896575927734, 'ave_value': -73.82020415813} step=13680
2022-04-20 20:14.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201015/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:14.45 [info     ] CQL_20220420201015: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.00035424957498472336, 'time_algorithm_update': 0.01802439090104131, 'temp_loss': 0.4563657984475208, 'temp': 0.7875649758249695, 'alpha_loss': 3.4064297289179084, 'alpha': 0.40994411889921156, 'critic_loss': 363.27436917845966, 'actor_loss': 83.10021102637576, 'time_step': 0.018479881927981015, 'td_error': 45.194890464592845, 'init_value': -117.08375549316406, 'ave_value': -73.53911109168654} step=14022
2022-04-20 20:14.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201015/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:14.52 [info     ] CQL_20220420201015: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.00035719983061851813, 'time_algorithm_update': 0.018062846702441834, 'temp_loss': 0.4332100497395323, 'temp': 0.782994560330932, 'alpha_loss': 3.316759092765942, 'alpha': 0.39745195453975635, 'critic_loss': 365.7622815405416, 'actor_loss': 83.41142168100815, 'time_step': 0.01852099937305116, 'td_error': 45.03700079436046, 'init_value': -119.16032409667969, 'ave_value': -74.48468278809442} step=14364
2022-04-20 20:14.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201015/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:14.59 [info     ] CQL_20220420201015: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.00035593105338470283, 'time_algorithm_update': 0.018121710297657034, 'temp_loss': 0.437553770066789, 'temp': 0.7782934687988102, 'alpha_loss': 3.195187671491277, 'alpha': 0.3854366993172127, 'critic_loss': 368.92487776906864, 'actor_loss': 83.61782794528537, 'time_step': 0.018578461736266375, 'td_error': 45.88194347001116, 'init_value': -120.12532043457031, 'ave_value': -74.90088965810769} step=14706
2022-04-20 20:14.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201015/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:15.05 [info     ] CQL_20220420201015: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.0003542530606364646, 'time_algorithm_update': 0.018125294941907736, 'temp_loss': 0.41918204171441453, 'temp': 0.7736901111072965, 'alpha_loss': 3.090563154360007, 'alpha': 0.37381192980802547, 'critic_loss': 371.02080308345325, 'actor_loss': 83.86533563178882, 'time_step': 0.018584800742523014, 'td_error': 45.99522269937096, 'init_value': -118.85709381103516, 'ave_value': -74.65655144941297} step=15048
2022-04-20 20:15.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201015/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:15.12 [info     ] CQL_20220420201015: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.000357511447884186, 'time_algorithm_update': 0.018231521572983057, 'temp_loss': 0.4117269783242666, 'temp': 0.7690116229810213, 'alpha_loss': 2.9944812324312, 'alpha': 0.3625457675484886, 'critic_loss': 372.48512839155586, 'actor_loss': 84.07706074407923, 'time_step': 0.01869267817826299, 'td_error': 43.58224270359499, 'init_value': -115.69636535644531, 'ave_value': -73.84986363916919} step=15390
2022-04-20 20:15.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201015/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:15.19 [info     ] CQL_20220420201015: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.00035581811826828626, 'time_algorithm_update': 0.01836035892977352, 'temp_loss': 0.3928551511915281, 'temp': 0.7644966847715322, 'alpha_loss': 2.9043371339290465, 'alpha': 0.3516185114606779, 'critic_loss': 374.64137964081345, 'actor_loss': 84.14340662816812, 'time_step': 0.018820699892546002, 'td_error': 44.181356521347595, 'init_value': -119.9380874633789, 'ave_value': -75.03408469228997} step=15732
2022-04-20 20:15.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201015/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:15.25 [info     ] CQL_20220420201015: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.00035502060114988806, 'time_algorithm_update': 0.018262663779900087, 'temp_loss': 0.4114111930741901, 'temp': 0.759808219315713, 'alpha_loss': 2.8105233604447886, 'alpha': 0.34115921370467245, 'critic_loss': 375.34015874137657, 'actor_loss': 84.33186552259657, 'time_step': 0.01871650609356618, 'td_error': 45.26837511757119, 'init_value': -118.95500183105469, 'ave_value': -75.87595501908713} step=16074
2022-04-20 20:15.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201015/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:15.32 [info     ] CQL_20220420201015: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.0003591113620334201, 'time_algorithm_update': 0.018261677340457316, 'temp_loss': 0.39927453751408915, 'temp': 0.7551009203606879, 'alpha_loss': 2.6924470555015474, 'alpha': 0.33089163167434826, 'critic_loss': 377.1689466509903, 'actor_loss': 84.40387023122688, 'time_step': 0.018722266481633772, 'td_error': 44.49149554234653, 'init_value': -118.90708923339844, 'ave_value': -75.18548824099605} step=16416
2022-04-20 20:15.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201015/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:15.39 [info     ] CQL_20220420201015: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.00035209613933897855, 'time_algorithm_update': 0.018293831083509658, 'temp_loss': 0.3835671429810991, 'temp': 0.7505501926990977, 'alpha_loss': 2.619701333910401, 'alpha': 0.3210387385902349, 'critic_loss': 378.41612234729075, 'actor_loss': 84.49867476078502, 'time_step': 0.018749505455730953, 'td_error': 43.227595342113624, 'init_value': -118.09098052978516, 'ave_value': -74.50372438908093} step=16758
2022-04-20 20:15.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201015/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:15.45 [info     ] CQL_20220420201015: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.0003554109941449082, 'time_algorithm_update': 0.01841652532767134, 'temp_loss': 0.3940848473906081, 'temp': 0.7460053645030796, 'alpha_loss': 2.530310082505321, 'alpha': 0.31145716262491124, 'critic_loss': 379.68898715192114, 'actor_loss': 84.68781167861314, 'time_step': 0.018876019974201047, 'td_error': 41.07102056751101, 'init_value': -115.8870620727539, 'ave_value': -74.36587702436759} step=17100
2022-04-20 20:15.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201015/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.5191

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 20:15.46 [info     ] FQE_20220420201545: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.0001572025827614658, 'time_algorithm_update': 0.0021440135427268155, 'loss': 0.007160599987269436, 'time_step': 0.0023712149585585996, 'init_value': -0.431742787361145, 'ave_value': -0.3472210468472661, 'soft_opc': nan} step=166




2022-04-20 20:15.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201545/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:15.46 [info     ] FQE_20220420201545: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00016242337514118976, 'time_algorithm_update': 0.002168527568679258, 'loss': 0.005670188541849514, 'time_step': 0.00240034798541701, 'init_value': -0.557074785232544, 'ave_value': -0.40307123741588075, 'soft_opc': nan} step=332




2022-04-20 20:15.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201545/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:15.47 [info     ] FQE_20220420201545: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00015889880168868834, 'time_algorithm_update': 0.0021147871591958656, 'loss': 0.0053000324125210925, 'time_step': 0.002345587833818183, 'init_value': -0.6080800890922546, 'ave_value': -0.4225155448155092, 'soft_opc': nan} step=498




2022-04-20 20:15.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201545/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:15.47 [info     ] FQE_20220420201545: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00015812465943485857, 'time_algorithm_update': 0.002100391560290233, 'loss': 0.005175948217342866, 'time_step': 0.0023306220410818077, 'init_value': -0.6845335960388184, 'ave_value': -0.4666576808033226, 'soft_opc': nan} step=664




2022-04-20 20:15.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201545/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:15.48 [info     ] FQE_20220420201545: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00016149986221129634, 'time_algorithm_update': 0.002149831817810794, 'loss': 0.004678560037962554, 'time_step': 0.002381828894098121, 'init_value': -0.7407876253128052, 'ave_value': -0.4922822913708719, 'soft_opc': nan} step=830




2022-04-20 20:15.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201545/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:15.48 [info     ] FQE_20220420201545: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00015682484730180488, 'time_algorithm_update': 0.002119706337710461, 'loss': 0.004370647840235517, 'time_step': 0.0023469637675457692, 'init_value': -0.7505381107330322, 'ave_value': -0.4892900483431043, 'soft_opc': nan} step=996




2022-04-20 20:15.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201545/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:15.49 [info     ] FQE_20220420201545: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00015995875898613986, 'time_algorithm_update': 0.002084127391677305, 'loss': 0.004111718434099871, 'time_step': 0.0023113546601261, 'init_value': -0.7628563642501831, 'ave_value': -0.47932889394394984, 'soft_opc': nan} step=1162




2022-04-20 20:15.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201545/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:15.49 [info     ] FQE_20220420201545: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.0001563422651175993, 'time_algorithm_update': 0.002140352524906756, 'loss': 0.003870866694824254, 'time_step': 0.0023636760481868884, 'init_value': -0.8020666837692261, 'ave_value': -0.5012533039626506, 'soft_opc': nan} step=1328




2022-04-20 20:15.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201545/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:15.49 [info     ] FQE_20220420201545: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00016343306346111987, 'time_algorithm_update': 0.0021560134657894275, 'loss': 0.003639111183134906, 'time_step': 0.002391493464090738, 'init_value': -0.8109437227249146, 'ave_value': -0.5116025846552205, 'soft_opc': nan} step=1494




2022-04-20 20:15.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201545/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:15.50 [info     ] FQE_20220420201545: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00015891029174069324, 'time_algorithm_update': 0.0021017215338097997, 'loss': 0.003660392550560814, 'time_step': 0.0023313961833356374, 'init_value': -0.8789113759994507, 'ave_value': -0.558902136803613, 'soft_opc': nan} step=1660




2022-04-20 20:15.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201545/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:15.50 [info     ] FQE_20220420201545: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00016272642526281886, 'time_algorithm_update': 0.0021081143114940228, 'loss': 0.003648878716869302, 'time_step': 0.0023413752935018883, 'init_value': -0.9134680032730103, 'ave_value': -0.5788983724742859, 'soft_opc': nan} step=1826




2022-04-20 20:15.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201545/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:15.51 [info     ] FQE_20220420201545: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00015572754733533743, 'time_algorithm_update': 0.0021005868911743164, 'loss': 0.003425369105801121, 'time_step': 0.002326840377715697, 'init_value': -0.8931136727333069, 'ave_value': -0.5308694606688847, 'soft_opc': nan} step=1992




2022-04-20 20:15.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201545/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:15.51 [info     ] FQE_20220420201545: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00015456274331334126, 'time_algorithm_update': 0.0020735220736767873, 'loss': 0.003670125271204904, 'time_step': 0.0023003687341529204, 'init_value': -0.9629830718040466, 'ave_value': -0.5696716721849264, 'soft_opc': nan} step=2158




2022-04-20 20:15.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201545/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:15.52 [info     ] FQE_20220420201545: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.0001591860529888107, 'time_algorithm_update': 0.002110044640230845, 'loss': 0.003735285579838442, 'time_step': 0.0023370464164090446, 'init_value': -0.9764596223831177, 'ave_value': -0.5700485032874051, 'soft_opc': nan} step=2324




2022-04-20 20:15.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201545/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:15.52 [info     ] FQE_20220420201545: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00015411319502864974, 'time_algorithm_update': 0.002104010926671775, 'loss': 0.0038804117048787602, 'time_step': 0.0023289014057940745, 'init_value': -0.987971305847168, 'ave_value': -0.5624301914461293, 'soft_opc': nan} step=2490




2022-04-20 20:15.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201545/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:15.52 [info     ] FQE_20220420201545: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00015664818775222962, 'time_algorithm_update': 0.0021434806915650883, 'loss': 0.004073602221737874, 'time_step': 0.0023694052753678286, 'init_value': -0.9722054600715637, 'ave_value': -0.5512181099663648, 'soft_opc': nan} step=2656




2022-04-20 20:15.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201545/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:15.53 [info     ] FQE_20220420201545: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00016256987330425218, 'time_algorithm_update': 0.0021026665905872024, 'loss': 0.004203009383377047, 'time_step': 0.0023320784051734282, 'init_value': -1.0043491125106812, 'ave_value': -0.5507614485231471, 'soft_opc': nan} step=2822




2022-04-20 20:15.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201545/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:15.53 [info     ] FQE_20220420201545: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.0001564126416861293, 'time_algorithm_update': 0.0021116044147905097, 'loss': 0.004344686544963147, 'time_step': 0.0023390543029969, 'init_value': -1.0322967767715454, 'ave_value': -0.5589230527555956, 'soft_opc': nan} step=2988




2022-04-20 20:15.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201545/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:15.54 [info     ] FQE_20220420201545: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.0001607630626264825, 'time_algorithm_update': 0.002146150692399726, 'loss': 0.004710704184990062, 'time_step': 0.0023783129381846234, 'init_value': -1.0750365257263184, 'ave_value': -0.5825591596669878, 'soft_opc': nan} step=3154




2022-04-20 20:15.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201545/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:15.54 [info     ] FQE_20220420201545: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00016044421368334666, 'time_algorithm_update': 0.002102633556687688, 'loss': 0.004850079594053754, 'time_step': 0.002332961702921304, 'init_value': -1.0752633810043335, 'ave_value': -0.5525094244725633, 'soft_opc': nan} step=3320




2022-04-20 20:15.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201545/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:15.55 [info     ] FQE_20220420201545: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00015690815017884037, 'time_algorithm_update': 0.002068990684417357, 'loss': 0.0049657723000301164, 'time_step': 0.002294062131858734, 'init_value': -1.1569926738739014, 'ave_value': -0.6344509681611247, 'soft_opc': nan} step=3486




2022-04-20 20:15.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201545/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:15.55 [info     ] FQE_20220420201545: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.0001594029277204031, 'time_algorithm_update': 0.0021282377013240954, 'loss': 0.005565191769526419, 'time_step': 0.002357006072998047, 'init_value': -1.187196969985962, 'ave_value': -0.6509171829982618, 'soft_opc': nan} step=3652




2022-04-20 20:15.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201545/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:15.56 [info     ] FQE_20220420201545: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.000161553003701819, 'time_algorithm_update': 0.002193091863609222, 'loss': 0.005557105554609997, 'time_step': 0.002429124820663268, 'init_value': -1.237436056137085, 'ave_value': -0.6929923650582095, 'soft_opc': nan} step=3818




2022-04-20 20:15.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201545/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:15.56 [info     ] FQE_20220420201545: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00016046432127435524, 'time_algorithm_update': 0.002154123352234622, 'loss': 0.006087708880238414, 'time_step': 0.002386524016598621, 'init_value': -1.2425962686538696, 'ave_value': -0.7059520434881854, 'soft_opc': nan} step=3984




2022-04-20 20:15.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201545/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:15.56 [info     ] FQE_20220420201545: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00015730886574251107, 'time_algorithm_update': 0.0020748563559658557, 'loss': 0.006295923011781401, 'time_step': 0.0022996247532856033, 'init_value': -1.2929120063781738, 'ave_value': -0.7348490004754953, 'soft_opc': nan} step=4150




2022-04-20 20:15.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201545/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:15.57 [info     ] FQE_20220420201545: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00016133756522672722, 'time_algorithm_update': 0.0020848986614181333, 'loss': 0.0067112717734711495, 'time_step': 0.0023146207074084915, 'init_value': -1.3498568534851074, 'ave_value': -0.7876219311695513, 'soft_opc': nan} step=4316




2022-04-20 20:15.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201545/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:15.57 [info     ] FQE_20220420201545: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00015811460563935428, 'time_algorithm_update': 0.0020737059145088657, 'loss': 0.007198491059762639, 'time_step': 0.0022991996213614224, 'init_value': -1.4059977531433105, 'ave_value': -0.8243504071843114, 'soft_opc': nan} step=4482




2022-04-20 20:15.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201545/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:15.58 [info     ] FQE_20220420201545: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00015886433153267367, 'time_algorithm_update': 0.002163476254566606, 'loss': 0.007564146335360156, 'time_step': 0.0023922719151140697, 'init_value': -1.463483214378357, 'ave_value': -0.9007634482447159, 'soft_opc': nan} step=4648




2022-04-20 20:15.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201545/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:15.58 [info     ] FQE_20220420201545: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00016247077160570994, 'time_algorithm_update': 0.002118452485785427, 'loss': 0.00808292480202459, 'time_step': 0.0023541838289743446, 'init_value': -1.4803013801574707, 'ave_value': -0.9339896422794072, 'soft_opc': nan} step=4814




2022-04-20 20:15.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201545/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:15.59 [info     ] FQE_20220420201545: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00015458428716085045, 'time_algorithm_update': 0.002072868576969009, 'loss': 0.008661879608070994, 'time_step': 0.0022989109338047995, 'init_value': -1.5234737396240234, 'ave_value': -0.9525045332876412, 'soft_opc': nan} step=4980




2022-04-20 20:15.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201545/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:15.59 [info     ] FQE_20220420201545: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00015707188341991012, 'time_algorithm_update': 0.002114614808415792, 'loss': 0.009329414213600805, 'time_step': 0.0023403397525649473, 'init_value': -1.5791475772857666, 'ave_value': -0.9775354620270632, 'soft_opc': nan} step=5146




2022-04-20 20:15.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201545/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:15.59 [info     ] FQE_20220420201545: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00015993147011262825, 'time_algorithm_update': 0.0021963119506835938, 'loss': 0.010063546428496176, 'time_step': 0.0024279772517192795, 'init_value': -1.6417688131332397, 'ave_value': -1.0435868061265094, 'soft_opc': nan} step=5312




2022-04-20 20:15.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201545/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:16.00 [info     ] FQE_20220420201545: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.0001597189041505377, 'time_algorithm_update': 0.0021757777914943465, 'loss': 0.010257602658779192, 'time_step': 0.00240553718015372, 'init_value': -1.6275328397750854, 'ave_value': -1.0119623689467574, 'soft_opc': nan} step=5478




2022-04-20 20:16.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201545/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:16.00 [info     ] FQE_20220420201545: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00016029484300728305, 'time_algorithm_update': 0.0021712334759264105, 'loss': 0.010385092162717897, 'time_step': 0.0024057037859077915, 'init_value': -1.732473373413086, 'ave_value': -1.0928726063160277, 'soft_opc': nan} step=5644




2022-04-20 20:16.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201545/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:16.01 [info     ] FQE_20220420201545: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00015565429825380625, 'time_algorithm_update': 0.0021284244146691747, 'loss': 0.011074459997948304, 'time_step': 0.0023551159594432415, 'init_value': -1.7674250602722168, 'ave_value': -1.158973084153557, 'soft_opc': nan} step=5810




2022-04-20 20:16.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201545/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:16.01 [info     ] FQE_20220420201545: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.0001573734972850386, 'time_algorithm_update': 0.0021043843533619343, 'loss': 0.011723422517590449, 'time_step': 0.002333570675677564, 'init_value': -1.796888828277588, 'ave_value': -1.1442792216508841, 'soft_opc': nan} step=5976




2022-04-20 20:16.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201545/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:16.02 [info     ] FQE_20220420201545: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.0001577024000236787, 'time_algorithm_update': 0.0021554202918546744, 'loss': 0.0124968689963143, 'time_step': 0.0023796285491391837, 'init_value': -1.8390624523162842, 'ave_value': -1.1642744083234386, 'soft_opc': nan} step=6142




2022-04-20 20:16.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201545/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:16.02 [info     ] FQE_20220420201545: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00015716093132294803, 'time_algorithm_update': 0.002150486750775073, 'loss': 0.01249168260396088, 'time_step': 0.002376326595444277, 'init_value': -1.8007526397705078, 'ave_value': -1.1097987991891936, 'soft_opc': nan} step=6308




2022-04-20 20:16.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201545/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:16.03 [info     ] FQE_20220420201545: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00015725428799548782, 'time_algorithm_update': 0.0020916275231235, 'loss': 0.013297869891898864, 'time_step': 0.0023206041519900403, 'init_value': -1.795565128326416, 'ave_value': -1.0951832079211132, 'soft_opc': nan} step=6474




2022-04-20 20:16.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201545/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:16.03 [info     ] FQE_20220420201545: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.0001608492380165192, 'time_algorithm_update': 0.0021212388233966136, 'loss': 0.01347160871990631, 'time_step': 0.0023513544036681392, 'init_value': -1.8145986795425415, 'ave_value': -1.1406181323811881, 'soft_opc': nan} step=6640




2022-04-20 20:16.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201545/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:16.03 [info     ] FQE_20220420201545: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00015978497194956583, 'time_algorithm_update': 0.002110744097146643, 'loss': 0.013848129294865978, 'time_step': 0.0023397465786301947, 'init_value': -1.8740057945251465, 'ave_value': -1.2083784167126224, 'soft_opc': nan} step=6806




2022-04-20 20:16.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201545/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:16.04 [info     ] FQE_20220420201545: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00015735913472003247, 'time_algorithm_update': 0.002145099352641278, 'loss': 0.013793311022216431, 'time_step': 0.0023744680795324854, 'init_value': -1.883885383605957, 'ave_value': -1.2212884277986311, 'soft_opc': nan} step=6972




2022-04-20 20:16.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201545/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:16.04 [info     ] FQE_20220420201545: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00015782879059573254, 'time_algorithm_update': 0.002130145049956908, 'loss': 0.014298137339078877, 'time_step': 0.002358719527003277, 'init_value': -1.9152226448059082, 'ave_value': -1.2374161759122937, 'soft_opc': nan} step=7138




2022-04-20 20:16.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201545/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:16.05 [info     ] FQE_20220420201545: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00015801263142781085, 'time_algorithm_update': 0.0021040410880582877, 'loss': 0.01493944301471922, 'time_step': 0.002332474811967597, 'init_value': -1.9765286445617676, 'ave_value': -1.3220541608863854, 'soft_opc': nan} step=7304




2022-04-20 20:16.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201545/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:16.05 [info     ] FQE_20220420201545: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00016154438616281533, 'time_algorithm_update': 0.0021539179675550348, 'loss': 0.015245720844418484, 'time_step': 0.002384462988520243, 'init_value': -1.9606882333755493, 'ave_value': -1.2850959955488106, 'soft_opc': nan} step=7470




2022-04-20 20:16.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201545/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:16.06 [info     ] FQE_20220420201545: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00016234007226415426, 'time_algorithm_update': 0.0022161265453660346, 'loss': 0.01605149883677319, 'time_step': 0.002449343003422381, 'init_value': -2.0218257904052734, 'ave_value': -1.34003488786334, 'soft_opc': nan} step=7636




2022-04-20 20:16.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201545/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:16.06 [info     ] FQE_20220420201545: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00015654477728418558, 'time_algorithm_update': 0.0020971226404948406, 'loss': 0.016597395844151635, 'time_step': 0.0023247061005557874, 'init_value': -2.099318504333496, 'ave_value': -1.4190150993733524, 'soft_opc': nan} step=7802




2022-04-20 20:16.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201545/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:16.07 [info     ] FQE_20220420201545: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00016045139496584973, 'time_algorithm_update': 0.0021993467606693864, 'loss': 0.016808783637600983, 'time_step': 0.002430709011583443, 'init_value': -2.098212242126465, 'ave_value': -1.4174467692982304, 'soft_opc': nan} step=7968




2022-04-20 20:16.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201545/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:16.07 [info     ] FQE_20220420201545: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00016003344432417168, 'time_algorithm_update': 0.0021401500127401695, 'loss': 0.016769406882238137, 'time_step': 0.00237248317304864, 'init_value': -2.1692306995391846, 'ave_value': -1.4951680750602874, 'soft_opc': nan} step=8134




2022-04-20 20:16.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201545/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:16.07 [info     ] FQE_20220420201545: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00015794943614178393, 'time_algorithm_update': 0.002165471214845956, 'loss': 0.01776347860360114, 'time_step': 0.0023912406829466304, 'init_value': -2.1896250247955322, 'ave_value': -1.5399596970905927, 'soft_opc': nan} step=8300




2022-04-20 20:16.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201545/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

2022-04-20 20:16.08 [info     ] Directory is created at d3rlpy_logs/FQE_20220420201608
2022-04-20 20:16.08 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 20:16.08 [debug    ] Building models...
2022-04-20 20:16.08 [debug    ] Models have been built.
2022-04-20 20:16.08 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420201608/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 20:16.09 [info     ] FQE_20220420201608: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.0001596399517946465, 'time_algorithm_update': 0.0021263957023620605, 'loss': 0.02133376604801607, 'time_step': 0.002360404923904774, 'init_value': -1.075943112373352, 'ave_value': -1.0766228139534726, 'soft_opc': nan} step=344




2022-04-20 20:16.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201608/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:16.10 [info     ] FQE_20220420201608: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00015846934429434843, 'time_algorithm_update': 0.002093611761581066, 'loss': 0.019718254992175242, 'time_step': 0.0023210436798805413, 'init_value': -1.8821330070495605, 'ave_value': -1.9207360692389377, 'soft_opc': nan} step=688




2022-04-20 20:16.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201608/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:16.10 [info     ] FQE_20220420201608: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.0001605236253073049, 'time_algorithm_update': 0.0021946977737338043, 'loss': 0.022336585907438814, 'time_step': 0.0024258023084596145, 'init_value': -2.981640338897705, 'ave_value': -3.0565819912397107, 'soft_opc': nan} step=1032




2022-04-20 20:16.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201608/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:16.11 [info     ] FQE_20220420201608: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.0001605832299520803, 'time_algorithm_update': 0.0021282683971316314, 'loss': 0.02586408144101327, 'time_step': 0.002360465221626814, 'init_value': -3.774575710296631, 'ave_value': -3.882975979270162, 'soft_opc': nan} step=1376




2022-04-20 20:16.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201608/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:16.12 [info     ] FQE_20220420201608: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00015793359556863474, 'time_algorithm_update': 0.0020888731923214224, 'loss': 0.032144134739673765, 'time_step': 0.002318161864613378, 'init_value': -4.935218334197998, 'ave_value': -5.098665860510088, 'soft_opc': nan} step=1720




2022-04-20 20:16.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201608/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:16.13 [info     ] FQE_20220420201608: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.0001634685106055681, 'time_algorithm_update': 0.0021141940771147263, 'loss': 0.038466450904474356, 'time_step': 0.0023483737956645876, 'init_value': -5.697871208190918, 'ave_value': -5.897270131325937, 'soft_opc': nan} step=2064




2022-04-20 20:16.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201608/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:16.14 [info     ] FQE_20220420201608: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00015956925791363384, 'time_algorithm_update': 0.0021142037801964338, 'loss': 0.04707279428298217, 'time_step': 0.002343893744224726, 'init_value': -6.6683454513549805, 'ave_value': -6.909356350700061, 'soft_opc': nan} step=2408




2022-04-20 20:16.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201608/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:16.15 [info     ] FQE_20220420201608: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00015763973080834678, 'time_algorithm_update': 0.002110587996105815, 'loss': 0.05803393576900626, 'time_step': 0.0023418498593707417, 'init_value': -7.421805381774902, 'ave_value': -7.739552990461255, 'soft_opc': nan} step=2752




2022-04-20 20:16.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201608/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:16.16 [info     ] FQE_20220420201608: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00015880063522693722, 'time_algorithm_update': 0.0021105415599290715, 'loss': 0.06708641535992366, 'time_step': 0.002341482528420382, 'init_value': -8.045912742614746, 'ave_value': -8.426146173906757, 'soft_opc': nan} step=3096




2022-04-20 20:16.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201608/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:16.17 [info     ] FQE_20220420201608: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00016101848247439363, 'time_algorithm_update': 0.002122066741765932, 'loss': 0.07985169965730504, 'time_step': 0.0023553475391033083, 'init_value': -8.982245445251465, 'ave_value': -9.406740166206617, 'soft_opc': nan} step=3440




2022-04-20 20:16.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201608/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:16.18 [info     ] FQE_20220420201608: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.0001566604126331418, 'time_algorithm_update': 0.0021144130895304125, 'loss': 0.09112627629927078, 'time_step': 0.002344096122786056, 'init_value': -9.598787307739258, 'ave_value': -10.04007350136568, 'soft_opc': nan} step=3784




2022-04-20 20:16.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201608/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:16.18 [info     ] FQE_20220420201608: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00016137125880219216, 'time_algorithm_update': 0.002103875542795935, 'loss': 0.10637219503608554, 'time_step': 0.0023388516071230865, 'init_value': -10.403456687927246, 'ave_value': -10.829267995583045, 'soft_opc': nan} step=4128




2022-04-20 20:16.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201608/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:16.19 [info     ] FQE_20220420201608: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00015983609266059342, 'time_algorithm_update': 0.002133607864379883, 'loss': 0.11791605526733018, 'time_step': 0.0023621521716894107, 'init_value': -10.916040420532227, 'ave_value': -11.367424168189366, 'soft_opc': nan} step=4472




2022-04-20 20:16.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201608/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:16.20 [info     ] FQE_20220420201608: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00016115571177282998, 'time_algorithm_update': 0.0020891836909360663, 'loss': 0.13215924763164028, 'time_step': 0.0023209695206132044, 'init_value': -11.700736999511719, 'ave_value': -12.130650355150033, 'soft_opc': nan} step=4816




2022-04-20 20:16.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201608/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:16.21 [info     ] FQE_20220420201608: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00015529712965322095, 'time_algorithm_update': 0.0020440193109734113, 'loss': 0.15101817995851297, 'time_step': 0.0022684945616611215, 'init_value': -12.369861602783203, 'ave_value': -12.85600325953853, 'soft_opc': nan} step=5160




2022-04-20 20:16.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201608/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:16.22 [info     ] FQE_20220420201608: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00016404029934905296, 'time_algorithm_update': 0.0021491736866707024, 'loss': 0.16287426690863402, 'time_step': 0.002385479073191798, 'init_value': -12.898077964782715, 'ave_value': -13.413851660033604, 'soft_opc': nan} step=5504




2022-04-20 20:16.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201608/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:16.23 [info     ] FQE_20220420201608: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00015821151955183163, 'time_algorithm_update': 0.0021053885304650596, 'loss': 0.18617496403139952, 'time_step': 0.0023359469203061835, 'init_value': -13.579582214355469, 'ave_value': -14.13803761322219, 'soft_opc': nan} step=5848




2022-04-20 20:16.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201608/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:16.24 [info     ] FQE_20220420201608: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00015845686890358148, 'time_algorithm_update': 0.0021044618861619816, 'loss': 0.20843795684794353, 'time_step': 0.0023357279078904973, 'init_value': -14.093053817749023, 'ave_value': -14.719836418827375, 'soft_opc': nan} step=6192




2022-04-20 20:16.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201608/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:16.25 [info     ] FQE_20220420201608: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00015462761701539506, 'time_algorithm_update': 0.0020504434441411218, 'loss': 0.22559804839710162, 'time_step': 0.002271914204885793, 'init_value': -14.797380447387695, 'ave_value': -15.47448054557448, 'soft_opc': nan} step=6536




2022-04-20 20:16.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201608/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:16.25 [info     ] FQE_20220420201608: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00015927123468975689, 'time_algorithm_update': 0.002091605995976648, 'loss': 0.24892214925548192, 'time_step': 0.002321489328561827, 'init_value': -15.267398834228516, 'ave_value': -16.116334268692377, 'soft_opc': nan} step=6880




2022-04-20 20:16.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201608/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:16.26 [info     ] FQE_20220420201608: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00015781438627908396, 'time_algorithm_update': 0.002068964547889177, 'loss': 0.2683758395074239, 'time_step': 0.002297092315762542, 'init_value': -15.90324592590332, 'ave_value': -16.79972122977446, 'soft_opc': nan} step=7224




2022-04-20 20:16.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201608/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:16.27 [info     ] FQE_20220420201608: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.0001566742741784384, 'time_algorithm_update': 0.0020811086477235305, 'loss': 0.28442246104092445, 'time_step': 0.002310954554136409, 'init_value': -16.145183563232422, 'ave_value': -17.037134042328542, 'soft_opc': nan} step=7568




2022-04-20 20:16.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201608/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:16.28 [info     ] FQE_20220420201608: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00015771943469380223, 'time_algorithm_update': 0.002051556526228439, 'loss': 0.29856378748893825, 'time_step': 0.0022812894610471503, 'init_value': -16.526323318481445, 'ave_value': -17.610278325064762, 'soft_opc': nan} step=7912




2022-04-20 20:16.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201608/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:16.29 [info     ] FQE_20220420201608: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00016032679136409315, 'time_algorithm_update': 0.0021591893462247626, 'loss': 0.3215828073849945, 'time_step': 0.002392399449681127, 'init_value': -16.792266845703125, 'ave_value': -18.063194480484672, 'soft_opc': nan} step=8256




2022-04-20 20:16.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201608/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:16.30 [info     ] FQE_20220420201608: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00015866687131482502, 'time_algorithm_update': 0.0021578600240308183, 'loss': 0.3493837544662055, 'time_step': 0.0023909654728201933, 'init_value': -17.44692611694336, 'ave_value': -18.851565219394796, 'soft_opc': nan} step=8600




2022-04-20 20:16.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201608/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:16.31 [info     ] FQE_20220420201608: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00015974668569343034, 'time_algorithm_update': 0.0020906523216602415, 'loss': 0.3724109651033528, 'time_step': 0.0023228013238241504, 'init_value': -17.962541580200195, 'ave_value': -19.580878245562047, 'soft_opc': nan} step=8944




2022-04-20 20:16.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201608/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:16.32 [info     ] FQE_20220420201608: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00015726616216260334, 'time_algorithm_update': 0.0021136534768481587, 'loss': 0.39355662156459553, 'time_step': 0.0023414236168528713, 'init_value': -18.50525665283203, 'ave_value': -20.133521703234663, 'soft_opc': nan} step=9288




2022-04-20 20:16.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201608/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:16.32 [info     ] FQE_20220420201608: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.0001600495604581611, 'time_algorithm_update': 0.0021244828091111292, 'loss': 0.42263171029125535, 'time_step': 0.002356863992158757, 'init_value': -19.038360595703125, 'ave_value': -20.974780552505372, 'soft_opc': nan} step=9632




2022-04-20 20:16.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201608/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:16.33 [info     ] FQE_20220420201608: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00016164225201274074, 'time_algorithm_update': 0.0021225040735200393, 'loss': 0.43619709380165955, 'time_step': 0.0023579576680826586, 'init_value': -19.57052993774414, 'ave_value': -21.58597131774232, 'soft_opc': nan} step=9976




2022-04-20 20:16.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201608/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:16.34 [info     ] FQE_20220420201608: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.0001621842384338379, 'time_algorithm_update': 0.0021534624487854715, 'loss': 0.4488283722813046, 'time_step': 0.002390483784121136, 'init_value': -20.091636657714844, 'ave_value': -22.295610896263035, 'soft_opc': nan} step=10320




2022-04-20 20:16.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201608/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:16.35 [info     ] FQE_20220420201608: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.0001602928305781165, 'time_algorithm_update': 0.0021249055862426758, 'loss': 0.46470524504954036, 'time_step': 0.00235819747281629, 'init_value': -20.34917449951172, 'ave_value': -22.713283384544354, 'soft_opc': nan} step=10664




2022-04-20 20:16.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201608/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:16.36 [info     ] FQE_20220420201608: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00016247047934421274, 'time_algorithm_update': 0.0021417771661004356, 'loss': 0.48123194469507174, 'time_step': 0.0023783299812050753, 'init_value': -20.40804100036621, 'ave_value': -23.021707319837432, 'soft_opc': nan} step=11008




2022-04-20 20:16.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201608/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:16.37 [info     ] FQE_20220420201608: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00016234433928201365, 'time_algorithm_update': 0.0021403605161711228, 'loss': 0.49035796726685627, 'time_step': 0.0023757988630339157, 'init_value': -20.786312103271484, 'ave_value': -23.539158202130515, 'soft_opc': nan} step=11352




2022-04-20 20:16.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201608/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:16.38 [info     ] FQE_20220420201608: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00016101155170174532, 'time_algorithm_update': 0.002106810031935226, 'loss': 0.5062832588893037, 'time_step': 0.002340835887332295, 'init_value': -20.808692932128906, 'ave_value': -23.73241390215384, 'soft_opc': nan} step=11696




2022-04-20 20:16.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201608/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:16.39 [info     ] FQE_20220420201608: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00015921994697215946, 'time_algorithm_update': 0.0021073561768199123, 'loss': 0.5217537634334591, 'time_step': 0.002336870792300202, 'init_value': -21.395904541015625, 'ave_value': -24.421273834103935, 'soft_opc': nan} step=12040




2022-04-20 20:16.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201608/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:16.40 [info     ] FQE_20220420201608: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00016048550605773926, 'time_algorithm_update': 0.0021243289459583372, 'loss': 0.534075174816386, 'time_step': 0.0023576936056447584, 'init_value': -21.728565216064453, 'ave_value': -24.897065143166362, 'soft_opc': nan} step=12384




2022-04-20 20:16.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201608/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:16.40 [info     ] FQE_20220420201608: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.0001595290594322737, 'time_algorithm_update': 0.002088397048240484, 'loss': 0.5466137202781474, 'time_step': 0.0023191619751065278, 'init_value': -21.91291618347168, 'ave_value': -25.359180985941542, 'soft_opc': nan} step=12728




2022-04-20 20:16.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201608/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:16.41 [info     ] FQE_20220420201608: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00015957064406816349, 'time_algorithm_update': 0.0021660272465195765, 'loss': 0.5541403796361369, 'time_step': 0.0023972807928573253, 'init_value': -22.23755645751953, 'ave_value': -25.786300229193927, 'soft_opc': nan} step=13072




2022-04-20 20:16.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201608/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:16.42 [info     ] FQE_20220420201608: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00015996084656826284, 'time_algorithm_update': 0.0021088282729304114, 'loss': 0.5653398135666151, 'time_step': 0.002341353616049123, 'init_value': -21.98053741455078, 'ave_value': -25.721950054410343, 'soft_opc': nan} step=13416




2022-04-20 20:16.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201608/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:16.43 [info     ] FQE_20220420201608: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00016065808229668197, 'time_algorithm_update': 0.0021407444809758386, 'loss': 0.5671797332754576, 'time_step': 0.00237414933914362, 'init_value': -21.90817642211914, 'ave_value': -25.793494406235112, 'soft_opc': nan} step=13760




2022-04-20 20:16.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201608/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:16.44 [info     ] FQE_20220420201608: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00016318365584972293, 'time_algorithm_update': 0.002144561257473258, 'loss': 0.5710063970398678, 'time_step': 0.002381852892942207, 'init_value': -22.205394744873047, 'ave_value': -26.237644933123846, 'soft_opc': nan} step=14104




2022-04-20 20:16.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201608/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:16.45 [info     ] FQE_20220420201608: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00015915964925011924, 'time_algorithm_update': 0.002124999844750693, 'loss': 0.5767688206126264, 'time_step': 0.0023591158001921896, 'init_value': -21.80629539489746, 'ave_value': -26.011394752145886, 'soft_opc': nan} step=14448




2022-04-20 20:16.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201608/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:16.46 [info     ] FQE_20220420201608: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00015897321146587993, 'time_algorithm_update': 0.002071519230687341, 'loss': 0.5787822713988812, 'time_step': 0.002302688914676045, 'init_value': -22.16452407836914, 'ave_value': -26.405598933243, 'soft_opc': nan} step=14792




2022-04-20 20:16.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201608/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:16.47 [info     ] FQE_20220420201608: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00016152304272318995, 'time_algorithm_update': 0.0021193893842918927, 'loss': 0.588382679422191, 'time_step': 0.0023531033549197885, 'init_value': -22.234418869018555, 'ave_value': -26.834104133202686, 'soft_opc': nan} step=15136




2022-04-20 20:16.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201608/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:16.47 [info     ] FQE_20220420201608: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.0001589773699294689, 'time_algorithm_update': 0.0020657209462897723, 'loss': 0.5927695911485962, 'time_step': 0.0022948613000470536, 'init_value': -22.269445419311523, 'ave_value': -26.87417241784635, 'soft_opc': nan} step=15480




2022-04-20 20:16.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201608/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:16.48 [info     ] FQE_20220420201608: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.0001610572948012241, 'time_algorithm_update': 0.0021251488563626313, 'loss': 0.596791595864911, 'time_step': 0.0023575875648232394, 'init_value': -22.43446922302246, 'ave_value': -26.946469304764324, 'soft_opc': nan} step=15824




2022-04-20 20:16.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201608/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:16.49 [info     ] FQE_20220420201608: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.000162053246830785, 'time_algorithm_update': 0.0021232165569482846, 'loss': 0.5999059061584777, 'time_step': 0.002358246681302093, 'init_value': -22.335718154907227, 'ave_value': -26.902797717513803, 'soft_opc': nan} step=16168




2022-04-20 20:16.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201608/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:16.50 [info     ] FQE_20220420201608: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00015671516573706338, 'time_algorithm_update': 0.0021696513475373733, 'loss': 0.6054706278711904, 'time_step': 0.002400978360065194, 'init_value': -22.410354614257812, 'ave_value': -27.004677425731426, 'soft_opc': nan} step=16512




2022-04-20 20:16.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201608/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:16.51 [info     ] FQE_20220420201608: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00016274077947749647, 'time_algorithm_update': 0.002108320247295291, 'loss': 0.6176407303347058, 'time_step': 0.002348544985749001, 'init_value': -22.684856414794922, 'ave_value': -27.432655363485452, 'soft_opc': nan} step=16856




2022-04-20 20:16.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201608/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:16.52 [info     ] FQE_20220420201608: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00015974668569343034, 'time_algorithm_update': 0.0020788270373677097, 'loss': 0.6330254603242285, 'time_step': 0.002310403557710869, 'init_value': -22.876008987426758, 'ave_value': -27.472790211013326, 'soft_opc': nan} step=17200




2022-04-20 20:16.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420201608/model_17200.pt
search iteration:  35
using hyper params:  [0.009927268676484895, 0.008497451257493138, 3.004213837430974e-05, 5]
2022-04-20 20:16.52 [debug    ] RoundIterator is selected.
2022-04-20 20:16.52 [info     ] Directory is created at d3rlpy_logs/CQL_20220420201652
2022-04-20 20:16.52 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 20:16.52 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 20:16.52 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220420201652/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.009927268676484895, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'we

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:16.58 [info     ] CQL_20220420201652: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.0003525653080633509, 'time_algorithm_update': 0.017850912802400646, 'temp_loss': 4.444232337656077, 'temp': 0.9955432083871629, 'alpha_loss': -13.633773385432729, 'alpha': 1.0156770254436291, 'critic_loss': 35.03164031491642, 'actor_loss': 4.173707905742857, 'time_step': 0.018301393553527476, 'td_error': 3.782059613295485, 'init_value': -10.8982515335083, 'ave_value': -6.503840703664029} step=342
2022-04-20 20:16.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201652/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:17.05 [info     ] CQL_20220420201652: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.00034952024270219414, 'time_algorithm_update': 0.018020699595847324, 'temp_loss': 3.646232833639223, 'temp': 0.9862783061482056, 'alpha_loss': -4.506926719382492, 'alpha': 1.0357375932715789, 'critic_loss': 29.35838710494906, 'actor_loss': 9.85713819732443, 'time_step': 0.018470287322998047, 'td_error': 5.9455381612951, 'init_value': -19.22762107849121, 'ave_value': -10.819983298600324} step=684
2022-04-20 20:17.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201652/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:17.12 [info     ] CQL_20220420201652: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.00035528620781257136, 'time_algorithm_update': 0.017929874665555897, 'temp_loss': 2.9209639538101286, 'temp': 0.9781620899836222, 'alpha_loss': 0.03544030665373702, 'alpha': 1.0429914639010067, 'critic_loss': 53.25037733156081, 'actor_loss': 16.116435474819607, 'time_step': 0.018385816038700573, 'td_error': 10.338640708447752, 'init_value': -28.626117706298828, 'ave_value': -16.25675767698602} step=1026
2022-04-20 20:17.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201652/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:17.18 [info     ] CQL_20220420201652: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.0003551656042623241, 'time_algorithm_update': 0.018072100410684508, 'temp_loss': 2.3819585536655628, 'temp': 0.9707514716867816, 'alpha_loss': 3.3560805986390303, 'alpha': 1.034227394569687, 'critic_loss': 82.68831975836503, 'actor_loss': 22.39060536323235, 'time_step': 0.01852571127707498, 'td_error': 16.78245488234366, 'init_value': -37.82693862915039, 'ave_value': -21.178002096254815} step=1368
2022-04-20 20:17.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201652/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:17.25 [info     ] CQL_20220420201652: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.00035516490713197586, 'time_algorithm_update': 0.017915163820947124, 'temp_loss': 1.9823417851799412, 'temp': 0.9639452629619174, 'alpha_loss': 5.770117924227352, 'alpha': 1.0080353018136052, 'critic_loss': 117.63207445646587, 'actor_loss': 28.631752148009184, 'time_step': 0.018371324790151494, 'td_error': 18.18453518265779, 'init_value': -46.039207458496094, 'ave_value': -25.91928001676594} step=1710
2022-04-20 20:17.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201652/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:17.31 [info     ] CQL_20220420201652: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.000353869638944927, 'time_algorithm_update': 0.01798819868188155, 'temp_loss': 1.6457240077835775, 'temp': 0.9576206292673858, 'alpha_loss': 7.541797480387994, 'alpha': 0.9713539638017353, 'critic_loss': 154.34583646093893, 'actor_loss': 34.570135596202825, 'time_step': 0.018441014122544674, 'td_error': 23.8959549291606, 'init_value': -54.448036193847656, 'ave_value': -31.744459148724605} step=2052
2022-04-20 20:17.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201652/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:17.38 [info     ] CQL_20220420201652: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.0003505994004812854, 'time_algorithm_update': 0.017928775290996707, 'temp_loss': 1.353163252187054, 'temp': 0.9516885575146703, 'alpha_loss': 8.99204000673796, 'alpha': 0.931020005230318, 'critic_loss': 194.02879503038196, 'actor_loss': 40.23818533044113, 'time_step': 0.01837888028886583, 'td_error': 33.01500500593963, 'init_value': -63.7230110168457, 'ave_value': -35.65896477746802} step=2394
2022-04-20 20:17.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201652/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:17.44 [info     ] CQL_20220420201652: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.0003596091131020708, 'time_algorithm_update': 0.01794709308802733, 'temp_loss': 1.0934198475633448, 'temp': 0.9462206503104048, 'alpha_loss': 10.382435906003094, 'alpha': 0.8913389906200052, 'critic_loss': 238.68838956063254, 'actor_loss': 46.02292606286835, 'time_step': 0.018404369465788904, 'td_error': 43.51908427908201, 'init_value': -73.04142761230469, 'ave_value': -41.4123359489065} step=2736
2022-04-20 20:17.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201652/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:17.51 [info     ] CQL_20220420201652: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.0003508343334086457, 'time_algorithm_update': 0.017852040062173766, 'temp_loss': 0.8595107711459461, 'temp': 0.9413202549630438, 'alpha_loss': 11.711805729838142, 'alpha': 0.8529580981410735, 'critic_loss': 289.3259956850643, 'actor_loss': 51.920632189477395, 'time_step': 0.018298685202124524, 'td_error': 49.92714895247862, 'init_value': -82.65006256103516, 'ave_value': -46.75286773182385} step=3078
2022-04-20 20:17.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201652/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:17.57 [info     ] CQL_20220420201652: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.00035232688948424936, 'time_algorithm_update': 0.018042150296662982, 'temp_loss': 0.6415960249540053, 'temp': 0.9369070678077943, 'alpha_loss': 13.252078250137686, 'alpha': 0.817275877933056, 'critic_loss': 346.36957724052564, 'actor_loss': 57.857672730384515, 'time_step': 0.018493471089859454, 'td_error': 93.07799937749046, 'init_value': -92.96949768066406, 'ave_value': -52.78580344422779} step=3420
2022-04-20 20:17.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201652/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:18.04 [info     ] CQL_20220420201652: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.0003529745235777738, 'time_algorithm_update': 0.017935991984361795, 'temp_loss': 0.44347415323102335, 'temp': 0.9334489595123202, 'alpha_loss': 14.578261771397283, 'alpha': 0.7822391802107381, 'critic_loss': 414.16948472407825, 'actor_loss': 63.93105007193939, 'time_step': 0.018389341426871673, 'td_error': 112.60483793853547, 'init_value': -101.73957061767578, 'ave_value': -56.308893879857415} step=3762
2022-04-20 20:18.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201652/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:18.10 [info     ] CQL_20220420201652: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.0003553796232792369, 'time_algorithm_update': 0.018031537184241223, 'temp_loss': 0.23510689449044522, 'temp': 0.930979135092239, 'alpha_loss': 17.604886872029443, 'alpha': 0.7482737440463395, 'critic_loss': 498.3802716885394, 'actor_loss': 71.0661874737656, 'time_step': 0.018485733640124225, 'td_error': 251.1895067056954, 'init_value': -117.90213775634766, 'ave_value': -64.72174479267215} step=4104
2022-04-20 20:18.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201652/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:18.17 [info     ] CQL_20220420201652: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.0003532610441509046, 'time_algorithm_update': 0.018092825398807636, 'temp_loss': 0.08633549235303674, 'temp': 0.9295277313182229, 'alpha_loss': 20.30239694996884, 'alpha': 0.7151805165915461, 'critic_loss': 601.615222774751, 'actor_loss': 78.34230389511376, 'time_step': 0.0185443093205056, 'td_error': 277.6308697573721, 'init_value': -137.11782836914062, 'ave_value': -71.92408056051747} step=4446
2022-04-20 20:18.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201652/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:18.24 [info     ] CQL_20220420201652: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.0003627001890662121, 'time_algorithm_update': 0.01811499902379443, 'temp_loss': 0.10852538454428054, 'temp': 0.928567353744953, 'alpha_loss': 13.719057126351965, 'alpha': 0.6893677167725145, 'critic_loss': 683.1951367116114, 'actor_loss': 82.39371184856572, 'time_step': 0.018577848261559917, 'td_error': 205.87410254108673, 'init_value': -142.81576538085938, 'ave_value': -75.57216180728899} step=4788
2022-04-20 20:18.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201652/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:18.30 [info     ] CQL_20220420201652: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.00034969940520169444, 'time_algorithm_update': 0.01796991713562904, 'temp_loss': 0.14692813803970117, 'temp': 0.927116780950312, 'alpha_loss': 12.365186784699647, 'alpha': 0.669468212894529, 'critic_loss': 746.1178192674068, 'actor_loss': 86.8177309315107, 'time_step': 0.018416942908749942, 'td_error': 179.16438833064967, 'init_value': -148.03787231445312, 'ave_value': -77.66588694161592} step=5130
2022-04-20 20:18.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201652/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:18.37 [info     ] CQL_20220420201652: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.00034718067325346653, 'time_algorithm_update': 0.017949559535199437, 'temp_loss': 0.05725950989545437, 'temp': 0.9255441695626019, 'alpha_loss': 11.879730121434083, 'alpha': 0.6515331920127423, 'critic_loss': 792.0652164213839, 'actor_loss': 90.05502736498738, 'time_step': 0.018393694308766147, 'td_error': 155.89342177417063, 'init_value': -160.029541015625, 'ave_value': -81.91595476609562} step=5472
2022-04-20 20:18.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201652/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:18.43 [info     ] CQL_20220420201652: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.00035135439264844036, 'time_algorithm_update': 0.01796047520219234, 'temp_loss': 0.01609040709615451, 'temp': 0.9250947731977318, 'alpha_loss': 12.304855526539317, 'alpha': 0.6324748750667126, 'critic_loss': 824.5152802049068, 'actor_loss': 93.30806029470344, 'time_step': 0.018410750299866437, 'td_error': 157.42470578396276, 'init_value': -164.05531311035156, 'ave_value': -83.2615381021415} step=5814
2022-04-20 20:18.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201652/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:18.50 [info     ] CQL_20220420201652: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.00034938569654498187, 'time_algorithm_update': 0.017982903279756244, 'temp_loss': -0.07174605967161077, 'temp': 0.92557298161133, 'alpha_loss': 12.321441332499186, 'alpha': 0.61357732247888, 'critic_loss': 851.6673685709635, 'actor_loss': 95.73885880855092, 'time_step': 0.018430566230015447, 'td_error': 150.0758945548063, 'init_value': -164.62489318847656, 'ave_value': -83.96586013713936} step=6156
2022-04-20 20:18.50 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201652/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:18.56 [info     ] CQL_20220420201652: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.0003426353833828753, 'time_algorithm_update': 0.017978599894116497, 'temp_loss': -0.0810757970807758, 'temp': 0.9270462508787188, 'alpha_loss': 11.048972345932185, 'alpha': 0.5951758799148582, 'critic_loss': 881.3686055858233, 'actor_loss': 98.19994079857541, 'time_step': 0.018419710516232497, 'td_error': 132.4289882858758, 'init_value': -172.20706176757812, 'ave_value': -86.94807250524526} step=6498
2022-04-20 20:18.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201652/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:19.03 [info     ] CQL_20220420201652: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.0003524321561668351, 'time_algorithm_update': 0.01786554487128007, 'temp_loss': -0.10668119125351397, 'temp': 0.9293111818924285, 'alpha_loss': 10.228584672972472, 'alpha': 0.5784158446983985, 'critic_loss': 900.4573091205798, 'actor_loss': 100.0465196085255, 'time_step': 0.018317848618267574, 'td_error': 104.58828723518768, 'init_value': -174.1254425048828, 'ave_value': -89.2922070369054} step=6840
2022-04-20 20:19.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201652/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:19.09 [info     ] CQL_20220420201652: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.00035456328364143593, 'time_algorithm_update': 0.017892713435212072, 'temp_loss': -0.10111240841099742, 'temp': 0.9314287060882613, 'alpha_loss': 10.451179854353967, 'alpha': 0.5619404760369083, 'critic_loss': 916.8915328533329, 'actor_loss': 101.84732167204918, 'time_step': 0.018345245840953806, 'td_error': 136.2004144331249, 'init_value': -173.8968505859375, 'ave_value': -89.1143411541116} step=7182
2022-04-20 20:19.09 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201652/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:19.16 [info     ] CQL_20220420201652: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.00035358660402353743, 'time_algorithm_update': 0.01797861801950555, 'temp_loss': -0.13701417760062984, 'temp': 0.9346577082461084, 'alpha_loss': 10.101773494865462, 'alpha': 0.5451853158529739, 'critic_loss': 935.9522794310809, 'actor_loss': 103.83578214589615, 'time_step': 0.01843371307640745, 'td_error': 123.47418815416087, 'init_value': -177.00531005859375, 'ave_value': -92.52227593381932} step=7524
2022-04-20 20:19.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201652/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:19.22 [info     ] CQL_20220420201652: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.0003507255810743187, 'time_algorithm_update': 0.01789857490718016, 'temp_loss': -0.14988414608268885, 'temp': 0.9375712949978677, 'alpha_loss': 9.994422746680634, 'alpha': 0.5289224152676544, 'critic_loss': 951.3309406481291, 'actor_loss': 105.19496480484455, 'time_step': 0.018349090514824404, 'td_error': 118.09114606860417, 'init_value': -174.3909912109375, 'ave_value': -91.31211474463478} step=7866
2022-04-20 20:19.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201652/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:19.29 [info     ] CQL_20220420201652: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.0003478248216952497, 'time_algorithm_update': 0.01789403449722201, 'temp_loss': -0.16292393804467908, 'temp': 0.9416177000915795, 'alpha_loss': 9.812374191674573, 'alpha': 0.5128678430242148, 'critic_loss': 964.2943763063665, 'actor_loss': 106.50186732777378, 'time_step': 0.018339640912953873, 'td_error': 148.21890867140036, 'init_value': -184.8107452392578, 'ave_value': -94.50850030293907} step=8208
2022-04-20 20:19.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201652/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:19.35 [info     ] CQL_20220420201652: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.000354769634224518, 'time_algorithm_update': 0.01790839607952631, 'temp_loss': -0.18130080823443437, 'temp': 0.9458447858604074, 'alpha_loss': 9.826795141599332, 'alpha': 0.4968435108138804, 'critic_loss': 979.9155207405313, 'actor_loss': 107.88183959604007, 'time_step': 0.01836192398740534, 'td_error': 129.04141420224232, 'init_value': -180.33340454101562, 'ave_value': -94.2150432158181} step=8550
2022-04-20 20:19.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201652/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:19.42 [info     ] CQL_20220420201652: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.0003553475552832174, 'time_algorithm_update': 0.01776986861089517, 'temp_loss': -0.20118642209522558, 'temp': 0.9508402358021653, 'alpha_loss': 10.048461396791781, 'alpha': 0.48108798361312577, 'critic_loss': 988.3863168459886, 'actor_loss': 108.94495980781421, 'time_step': 0.01822529271332144, 'td_error': 136.65484551934807, 'init_value': -182.1782684326172, 'ave_value': -96.23671806919079} step=8892
2022-04-20 20:19.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201652/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:19.48 [info     ] CQL_20220420201652: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.00035664212633991797, 'time_algorithm_update': 0.017897608684517487, 'temp_loss': -0.23167575381108022, 'temp': 0.9559478322316332, 'alpha_loss': 9.98300354104293, 'alpha': 0.46517938192476305, 'critic_loss': 1000.18168417055, 'actor_loss': 110.18133578384132, 'time_step': 0.018352584532129835, 'td_error': 149.50780472636677, 'init_value': -182.1796417236328, 'ave_value': -96.31274268337584} step=9234
2022-04-20 20:19.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201652/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:19.55 [info     ] CQL_20220420201652: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.0003486153675101654, 'time_algorithm_update': 0.01782107492636519, 'temp_loss': -0.13616516089273822, 'temp': 0.9609989227607236, 'alpha_loss': 9.769739941546792, 'alpha': 0.45045984828332714, 'critic_loss': 1015.3593098601402, 'actor_loss': 111.20249945657295, 'time_step': 0.018267570183291074, 'td_error': 192.2511198938773, 'init_value': -184.2460479736328, 'ave_value': -98.48460098105306} step=9576
2022-04-20 20:19.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201652/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:20.01 [info     ] CQL_20220420201652: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.0003490894161469755, 'time_algorithm_update': 0.017879513969198305, 'temp_loss': -0.17163631316731898, 'temp': 0.9651671543455961, 'alpha_loss': 9.994235285541468, 'alpha': 0.4356423453455083, 'critic_loss': 1027.1669450726426, 'actor_loss': 112.40628685309873, 'time_step': 0.01832635151712518, 'td_error': 166.6219851993487, 'init_value': -186.31472778320312, 'ave_value': -98.91604717813735} step=9918
2022-04-20 20:20.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201652/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:20.08 [info     ] CQL_20220420201652: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.0003596202671876428, 'time_algorithm_update': 0.01803261843341136, 'temp_loss': -0.10469162022444414, 'temp': 0.9685508173111587, 'alpha_loss': 9.862127983082107, 'alpha': 0.4214224402318921, 'critic_loss': 1041.4651753386559, 'actor_loss': 113.48384156701161, 'time_step': 0.018493535922981842, 'td_error': 150.60361848695226, 'init_value': -180.9574737548828, 'ave_value': -100.14263829671182} step=10260
2022-04-20 20:20.08 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201652/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:20.15 [info     ] CQL_20220420201652: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.0003498444083141305, 'time_algorithm_update': 0.017961226011577406, 'temp_loss': -0.07947184601853118, 'temp': 0.9709497400891711, 'alpha_loss': 9.626415723945662, 'alpha': 0.4075199982576203, 'critic_loss': 1059.7594843635782, 'actor_loss': 114.51703059324744, 'time_step': 0.018411077951130114, 'td_error': 182.5907583133038, 'init_value': -188.81784057617188, 'ave_value': -102.53899815197296} step=10602
2022-04-20 20:20.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201652/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:20.21 [info     ] CQL_20220420201652: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.0003534360238683154, 'time_algorithm_update': 0.0179477818528114, 'temp_loss': -0.13497933354337663, 'temp': 0.9735912724196563, 'alpha_loss': 9.895898212466324, 'alpha': 0.39426415034553464, 'critic_loss': 1074.0910667731748, 'actor_loss': 115.44434872008206, 'time_step': 0.018402275983353106, 'td_error': 247.1749237707013, 'init_value': -187.84078979492188, 'ave_value': -101.62564724888082} step=10944
2022-04-20 20:20.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201652/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:20.28 [info     ] CQL_20220420201652: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.00035683035153394555, 'time_algorithm_update': 0.017797614398755525, 'temp_loss': -0.139791776006769, 'temp': 0.9775198490647544, 'alpha_loss': 10.607348033559253, 'alpha': 0.3808159761958652, 'critic_loss': 1092.2679168522707, 'actor_loss': 116.74262088084082, 'time_step': 0.018253024558574832, 'td_error': 256.8407860015548, 'init_value': -182.27603149414062, 'ave_value': -102.4144962073683} step=11286
2022-04-20 20:20.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201652/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:20.34 [info     ] CQL_20220420201652: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.0003490050633748372, 'time_algorithm_update': 0.0178992560035304, 'temp_loss': -0.1802487897779248, 'temp': 0.9815125575191096, 'alpha_loss': 9.89183365113554, 'alpha': 0.36790172990999725, 'critic_loss': 1109.209790681538, 'actor_loss': 117.719650179322, 'time_step': 0.018351027840062192, 'td_error': 231.6970183485344, 'init_value': -188.80355834960938, 'ave_value': -102.8410458061334} step=11628
2022-04-20 20:20.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201652/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:20.41 [info     ] CQL_20220420201652: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.0003569711718642921, 'time_algorithm_update': 0.017858617487009506, 'temp_loss': -0.05303566082658475, 'temp': 0.9850856832593505, 'alpha_loss': 10.076238607105456, 'alpha': 0.3558658612401862, 'critic_loss': 1129.148245114332, 'actor_loss': 118.74073336417214, 'time_step': 0.018315983097455656, 'td_error': 382.3381357137343, 'init_value': -190.88145446777344, 'ave_value': -104.14772942794504} step=11970
2022-04-20 20:20.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201652/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:20.47 [info     ] CQL_20220420201652: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.00035017484809920103, 'time_algorithm_update': 0.0179992161299053, 'temp_loss': -0.10145814079595239, 'temp': 0.9872437659759967, 'alpha_loss': 10.73315407658181, 'alpha': 0.3435096919362308, 'critic_loss': 1144.0535069515831, 'actor_loss': 119.4634644803945, 'time_step': 0.01844704290579634, 'td_error': 340.4012488569708, 'init_value': -185.35105895996094, 'ave_value': -104.71417338207935} step=12312
2022-04-20 20:20.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201652/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:20.54 [info     ] CQL_20220420201652: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.0003526831230922052, 'time_algorithm_update': 0.017901625549584105, 'temp_loss': -0.04692953112072241, 'temp': 0.9889254439295384, 'alpha_loss': 8.909360613739281, 'alpha': 0.3324578959516615, 'critic_loss': 1163.5565315826595, 'actor_loss': 120.3993981344658, 'time_step': 0.018353269114131817, 'td_error': 208.35459771495724, 'init_value': -189.85958862304688, 'ave_value': -105.46351312344743} step=12654
2022-04-20 20:20.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201652/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:21.00 [info     ] CQL_20220420201652: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.00035470828675387197, 'time_algorithm_update': 0.01791436002965559, 'temp_loss': -0.018669917389794052, 'temp': 0.990189125140508, 'alpha_loss': 7.64397364610817, 'alpha': 0.3231718236068536, 'critic_loss': 1176.0510027255232, 'actor_loss': 120.86240806356508, 'time_step': 0.018368145875763475, 'td_error': 220.45161580163372, 'init_value': -186.564208984375, 'ave_value': -105.41574881237604} step=12996
2022-04-20 20:21.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201652/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:21.07 [info     ] CQL_20220420201652: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.0003566323665150425, 'time_algorithm_update': 0.01801635159386529, 'temp_loss': 0.008665489120005865, 'temp': 0.9907900321553325, 'alpha_loss': 8.008749327464411, 'alpha': 0.3144083930212155, 'critic_loss': 1181.3107430084408, 'actor_loss': 121.30298235263044, 'time_step': 0.0184702057587473, 'td_error': 231.0667005629699, 'init_value': -186.47848510742188, 'ave_value': -105.61825861487705} step=13338
2022-04-20 20:21.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201652/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:21.13 [info     ] CQL_20220420201652: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.00033087270301685, 'time_algorithm_update': 0.01685167962347555, 'temp_loss': -0.006061204954197532, 'temp': 0.9900625513659583, 'alpha_loss': 8.464292749326829, 'alpha': 0.3049075702826182, 'critic_loss': 1190.7155768857365, 'actor_loss': 121.79144746657701, 'time_step': 0.01727582139578479, 'td_error': 262.4396607048018, 'init_value': -187.21707153320312, 'ave_value': -106.51978267447805} step=13680
2022-04-20 20:21.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201652/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:21.19 [info     ] CQL_20220420201652: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.00035133626725938584, 'time_algorithm_update': 0.017606672487760846, 'temp_loss': 0.037097344528992625, 'temp': 0.9897355780615444, 'alpha_loss': 8.556558868341279, 'alpha': 0.2949282814536178, 'critic_loss': 1198.6493471379865, 'actor_loss': 122.25783976058514, 'time_step': 0.018057480890151353, 'td_error': 276.7005891135313, 'init_value': -196.54452514648438, 'ave_value': -111.71093760938258} step=14022
2022-04-20 20:21.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201652/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:21.26 [info     ] CQL_20220420201652: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.0003546427565011365, 'time_algorithm_update': 0.01808415100588436, 'temp_loss': -0.005954814410349082, 'temp': 0.989574967072024, 'alpha_loss': 7.836110090651707, 'alpha': 0.2856927346764949, 'critic_loss': 1209.540312649911, 'actor_loss': 122.56422455547846, 'time_step': 0.018537729804278813, 'td_error': 276.5367723316078, 'init_value': -187.26513671875, 'ave_value': -107.39864585456414} step=14364
2022-04-20 20:21.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201652/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:21.32 [info     ] CQL_20220420201652: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.000356753667195638, 'time_algorithm_update': 0.018047884890907688, 'temp_loss': 0.04388671492536863, 'temp': 0.988482690868322, 'alpha_loss': 7.6974474142866525, 'alpha': 0.27710155558864974, 'critic_loss': 1213.4588217930486, 'actor_loss': 122.69426718371653, 'time_step': 0.018504256393477234, 'td_error': 243.2567995841047, 'init_value': -188.40797424316406, 'ave_value': -107.2640462033313} step=14706
2022-04-20 20:21.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201652/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:21.39 [info     ] CQL_20220420201652: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.0003476400821529634, 'time_algorithm_update': 0.017932822829798648, 'temp_loss': 0.10192074802056042, 'temp': 0.9869520392334252, 'alpha_loss': 7.685150888231066, 'alpha': 0.2685668869325292, 'critic_loss': 1218.602382905302, 'actor_loss': 123.1082491512187, 'time_step': 0.018379137529964337, 'td_error': 286.23290760207954, 'init_value': -190.6205596923828, 'ave_value': -108.59256723609963} step=15048
2022-04-20 20:21.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201652/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:21.46 [info     ] CQL_20220420201652: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.0003502989373011896, 'time_algorithm_update': 0.017965645120854964, 'temp_loss': 0.07153175665032968, 'temp': 0.9839872867740386, 'alpha_loss': 7.605157075569644, 'alpha': 0.2601746569948587, 'critic_loss': 1222.3755209404126, 'actor_loss': 123.25903679474055, 'time_step': 0.018415264916001706, 'td_error': 230.88548628752108, 'init_value': -188.2724151611328, 'ave_value': -109.67132672915017} step=15390
2022-04-20 20:21.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201652/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:21.52 [info     ] CQL_20220420201652: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.000351740602861371, 'time_algorithm_update': 0.01797071046996535, 'temp_loss': 0.12375855530824578, 'temp': 0.9810831715837557, 'alpha_loss': 6.92668454549466, 'alpha': 0.25218147483834047, 'critic_loss': 1226.618714271233, 'actor_loss': 123.40874813592922, 'time_step': 0.01842379291155185, 'td_error': 213.25491853625255, 'init_value': -187.9426727294922, 'ave_value': -108.37508603383291} step=15732
2022-04-20 20:21.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201652/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:21.59 [info     ] CQL_20220420201652: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.0003559449959916678, 'time_algorithm_update': 0.018193774056016354, 'temp_loss': 0.17959761086916715, 'temp': 0.9767828763228411, 'alpha_loss': 6.395946263569837, 'alpha': 0.24504765467337, 'critic_loss': 1232.3523532800507, 'actor_loss': 123.61484902365166, 'time_step': 0.0186487094700685, 'td_error': 314.22654495249395, 'init_value': -189.1018524169922, 'ave_value': -109.07119037034141} step=16074
2022-04-20 20:21.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201652/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:22.05 [info     ] CQL_20220420201652: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.0003635039803577445, 'time_algorithm_update': 0.01833014808900175, 'temp_loss': 0.14819016476973282, 'temp': 0.9724999711527462, 'alpha_loss': 6.199325602654128, 'alpha': 0.23807604018359158, 'critic_loss': 1241.8306206597222, 'actor_loss': 123.87949224103961, 'time_step': 0.018794181751228912, 'td_error': 207.9641511965476, 'init_value': -183.3549041748047, 'ave_value': -108.1150618680939} step=16416
2022-04-20 20:22.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201652/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:22.12 [info     ] CQL_20220420201652: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.0003602748725846497, 'time_algorithm_update': 0.018422397256594652, 'temp_loss': 0.2379637552174734, 'temp': 0.9673374474745745, 'alpha_loss': 5.955894178814358, 'alpha': 0.23112923525578793, 'critic_loss': 1250.6634350157622, 'actor_loss': 124.37067665412413, 'time_step': 0.0188852011808875, 'td_error': 194.17705454392058, 'init_value': -186.2876434326172, 'ave_value': -108.71729128587052} step=16758
2022-04-20 20:22.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201652/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:22.19 [info     ] CQL_20220420201652: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.00034959971556189465, 'time_algorithm_update': 0.01841584144279971, 'temp_loss': 0.16595311740642055, 'temp': 0.9618011106524551, 'alpha_loss': 5.666993679358945, 'alpha': 0.22458489809991322, 'critic_loss': 1250.1412239297788, 'actor_loss': 124.20760421306767, 'time_step': 0.018862055059064898, 'td_error': 271.32973134350664, 'init_value': -182.53173828125, 'ave_value': -107.90863564016851} step=17100
2022-04-20 20:22.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420201652/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.5

Epoch 1/50:   0%|          | 0/177 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 20:22.19 [info     ] FQE_20220420202219: epoch=1 step=177 epoch=1 metrics={'time_sample_batch': 0.0001411559218067234, 'time_algorithm_update': 0.002037572321918725, 'loss': 0.007177824801686457, 'time_step': 0.002241530660855568, 'init_value': 0.04055343195796013, 'ave_value': 0.09123258857285833, 'soft_opc': nan} step=177




2022-04-20 20:22.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202219/model_177.pt


Epoch 2/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 20:22.20 [info     ] FQE_20220420202219: epoch=2 step=354 epoch=2 metrics={'time_sample_batch': 0.00013365718604481153, 'time_algorithm_update': 0.0019200090634620796, 'loss': 0.005685950439991587, 'time_step': 0.0021159406435691704, 'init_value': -0.14973700046539307, 'ave_value': -0.019749380965848586, 'soft_opc': nan} step=354




2022-04-20 20:22.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202219/model_354.pt


Epoch 3/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 20:22.20 [info     ] FQE_20220420202219: epoch=3 step=531 epoch=3 metrics={'time_sample_batch': 0.00012822878562797934, 'time_algorithm_update': 0.0018277976472499007, 'loss': 0.00535456846352393, 'time_step': 0.002013471840465136, 'init_value': -0.2217106968164444, 'ave_value': -0.04414408836099837, 'soft_opc': nan} step=531




2022-04-20 20:22.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202219/model_531.pt


Epoch 4/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 20:22.21 [info     ] FQE_20220420202219: epoch=4 step=708 epoch=4 metrics={'time_sample_batch': 0.00013035434787556275, 'time_algorithm_update': 0.0018294544543250133, 'loss': 0.005096933351945406, 'time_step': 0.0020219942944198007, 'init_value': -0.33537352085113525, 'ave_value': -0.10681363038785822, 'soft_opc': nan} step=708




2022-04-20 20:22.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202219/model_708.pt


Epoch 5/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 20:22.21 [info     ] FQE_20220420202219: epoch=5 step=885 epoch=5 metrics={'time_sample_batch': 0.00012928752575890493, 'time_algorithm_update': 0.0018512542638401527, 'loss': 0.004905525409474464, 'time_step': 0.0020384519113659186, 'init_value': -0.40252402424812317, 'ave_value': -0.13782715140408403, 'soft_opc': nan} step=885




2022-04-20 20:22.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202219/model_885.pt


Epoch 6/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 20:22.21 [info     ] FQE_20220420202219: epoch=6 step=1062 epoch=6 metrics={'time_sample_batch': 0.00013298638122903424, 'time_algorithm_update': 0.0018332193126786227, 'loss': 0.004827699866040615, 'time_step': 0.002024152184610313, 'init_value': -0.484866738319397, 'ave_value': -0.19480738439135722, 'soft_opc': nan} step=1062




2022-04-20 20:22.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202219/model_1062.pt


Epoch 7/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 20:22.22 [info     ] FQE_20220420202219: epoch=7 step=1239 epoch=7 metrics={'time_sample_batch': 0.00013091200489108847, 'time_algorithm_update': 0.0018916103125965528, 'loss': 0.00459581644190107, 'time_step': 0.002081126143029854, 'init_value': -0.5480644106864929, 'ave_value': -0.22131599694624676, 'soft_opc': nan} step=1239




2022-04-20 20:22.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202219/model_1239.pt


Epoch 8/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 20:22.22 [info     ] FQE_20220420202219: epoch=8 step=1416 epoch=8 metrics={'time_sample_batch': 0.0001324893391065005, 'time_algorithm_update': 0.0019212685062386895, 'loss': 0.004347884542135112, 'time_step': 0.002116451155667925, 'init_value': -0.5666981339454651, 'ave_value': -0.22265742070547812, 'soft_opc': nan} step=1416




2022-04-20 20:22.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202219/model_1416.pt


Epoch 9/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 20:22.23 [info     ] FQE_20220420202219: epoch=9 step=1593 epoch=9 metrics={'time_sample_batch': 0.00012934275266140869, 'time_algorithm_update': 0.0018230265816726254, 'loss': 0.0041590766826023655, 'time_step': 0.0020230786275055447, 'init_value': -0.6396949887275696, 'ave_value': -0.2843936877185339, 'soft_opc': nan} step=1593




2022-04-20 20:22.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202219/model_1593.pt


Epoch 10/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 20:22.23 [info     ] FQE_20220420202219: epoch=10 step=1770 epoch=10 metrics={'time_sample_batch': 0.00012735997216176178, 'time_algorithm_update': 0.0018211084570588364, 'loss': 0.00409821126712644, 'time_step': 0.0020049992254224875, 'init_value': -0.7169654369354248, 'ave_value': -0.3247072659835622, 'soft_opc': nan} step=1770




2022-04-20 20:22.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202219/model_1770.pt


Epoch 11/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 20:22.23 [info     ] FQE_20220420202219: epoch=11 step=1947 epoch=11 metrics={'time_sample_batch': 0.00013634040530792065, 'time_algorithm_update': 0.0018973485224664547, 'loss': 0.0040303214576810934, 'time_step': 0.0020914818607481187, 'init_value': -0.8007161021232605, 'ave_value': -0.3975104556539216, 'soft_opc': nan} step=1947




2022-04-20 20:22.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202219/model_1947.pt


Epoch 12/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 20:22.24 [info     ] FQE_20220420202219: epoch=12 step=2124 epoch=12 metrics={'time_sample_batch': 0.00013919738726427325, 'time_algorithm_update': 0.0019253108461024398, 'loss': 0.003975527868132597, 'time_step': 0.0021257616032314837, 'init_value': -0.9220018982887268, 'ave_value': -0.4985957992349838, 'soft_opc': nan} step=2124




2022-04-20 20:22.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202219/model_2124.pt


Epoch 13/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 20:22.24 [info     ] FQE_20220420202219: epoch=13 step=2301 epoch=13 metrics={'time_sample_batch': 0.00013012266428457143, 'time_algorithm_update': 0.0018481507813189663, 'loss': 0.003939881192725468, 'time_step': 0.0020352999369303384, 'init_value': -0.9385278224945068, 'ave_value': -0.5111699111714915, 'soft_opc': nan} step=2301




2022-04-20 20:22.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202219/model_2301.pt


Epoch 14/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 20:22.25 [info     ] FQE_20220420202219: epoch=14 step=2478 epoch=14 metrics={'time_sample_batch': 0.0001390249715686518, 'time_algorithm_update': 0.0019429659439345538, 'loss': 0.004025109124759286, 'time_step': 0.0021515485257078697, 'init_value': -0.9776727557182312, 'ave_value': -0.5431539935862517, 'soft_opc': nan} step=2478




2022-04-20 20:22.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202219/model_2478.pt


Epoch 15/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 20:22.25 [info     ] FQE_20220420202219: epoch=15 step=2655 epoch=15 metrics={'time_sample_batch': 0.00013432529686534472, 'time_algorithm_update': 0.0019540934912902486, 'loss': 0.004116041710110918, 'time_step': 0.0021532794176521946, 'init_value': -1.0974632501602173, 'ave_value': -0.6488073302953092, 'soft_opc': nan} step=2655




2022-04-20 20:22.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202219/model_2655.pt


Epoch 16/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 20:22.25 [info     ] FQE_20220420202219: epoch=16 step=2832 epoch=16 metrics={'time_sample_batch': 0.00013956915860795704, 'time_algorithm_update': 0.0020035889189122086, 'loss': 0.004111484196273831, 'time_step': 0.0022097159240205408, 'init_value': -1.1631112098693848, 'ave_value': -0.6773174309918473, 'soft_opc': nan} step=2832




2022-04-20 20:22.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202219/model_2832.pt


Epoch 17/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 20:22.26 [info     ] FQE_20220420202219: epoch=17 step=3009 epoch=17 metrics={'time_sample_batch': 0.0001429379996606859, 'time_algorithm_update': 0.002087706226413533, 'loss': 0.00424021746638352, 'time_step': 0.0023009628899353374, 'init_value': -1.2798155546188354, 'ave_value': -0.8142815748761008, 'soft_opc': nan} step=3009




2022-04-20 20:22.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202219/model_3009.pt


Epoch 18/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 20:22.26 [info     ] FQE_20220420202219: epoch=18 step=3186 epoch=18 metrics={'time_sample_batch': 0.00014825055828202242, 'time_algorithm_update': 0.0020740139955854687, 'loss': 0.004719807640096899, 'time_step': 0.0022915648875263452, 'init_value': -1.3573654890060425, 'ave_value': -0.8490972919909804, 'soft_opc': nan} step=3186




2022-04-20 20:22.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202219/model_3186.pt


Epoch 19/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 20:22.27 [info     ] FQE_20220420202219: epoch=19 step=3363 epoch=19 metrics={'time_sample_batch': 0.00014213923007081456, 'time_algorithm_update': 0.0019876674070196635, 'loss': 0.005274712649702837, 'time_step': 0.002196604249167577, 'init_value': -1.511623740196228, 'ave_value': -0.9596347993200606, 'soft_opc': nan} step=3363




2022-04-20 20:22.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202219/model_3363.pt


Epoch 20/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 20:22.27 [info     ] FQE_20220420202219: epoch=20 step=3540 epoch=20 metrics={'time_sample_batch': 0.00015071421693273857, 'time_algorithm_update': 0.002111946795619814, 'loss': 0.005764734759955735, 'time_step': 0.002329353558815132, 'init_value': -1.63006591796875, 'ave_value': -1.0639872024009178, 'soft_opc': nan} step=3540




2022-04-20 20:22.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202219/model_3540.pt


Epoch 21/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 20:22.28 [info     ] FQE_20220420202219: epoch=21 step=3717 epoch=21 metrics={'time_sample_batch': 0.00014646443943519376, 'time_algorithm_update': 0.002042565642103637, 'loss': 0.005953495100003767, 'time_step': 0.002254072555714408, 'init_value': -1.693017840385437, 'ave_value': -1.1262368584717357, 'soft_opc': nan} step=3717




2022-04-20 20:22.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202219/model_3717.pt


Epoch 22/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 20:22.28 [info     ] FQE_20220420202219: epoch=22 step=3894 epoch=22 metrics={'time_sample_batch': 0.00014845934291343905, 'time_algorithm_update': 0.0021083880279023767, 'loss': 0.006393599932971226, 'time_step': 0.0023269127991239905, 'init_value': -1.684036135673523, 'ave_value': -1.1073312828893418, 'soft_opc': nan} step=3894




2022-04-20 20:22.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202219/model_3894.pt


Epoch 23/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 20:22.29 [info     ] FQE_20220420202219: epoch=23 step=4071 epoch=23 metrics={'time_sample_batch': 0.00015220130230747373, 'time_algorithm_update': 0.002214231060049628, 'loss': 0.006836327990532806, 'time_step': 0.002437935037127996, 'init_value': -1.8327436447143555, 'ave_value': -1.204584880007638, 'soft_opc': nan} step=4071




2022-04-20 20:22.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202219/model_4071.pt


Epoch 24/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 20:22.29 [info     ] FQE_20220420202219: epoch=24 step=4248 epoch=24 metrics={'time_sample_batch': 0.0001532748594122418, 'time_algorithm_update': 0.0020677181287000407, 'loss': 0.007871484957467027, 'time_step': 0.002287118448375982, 'init_value': -1.9773896932601929, 'ave_value': -1.3077011228055209, 'soft_opc': nan} step=4248




2022-04-20 20:22.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202219/model_4248.pt


Epoch 25/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 20:22.30 [info     ] FQE_20220420202219: epoch=25 step=4425 epoch=25 metrics={'time_sample_batch': 0.00015310244371662032, 'time_algorithm_update': 0.002113404247046864, 'loss': 0.008337558143230228, 'time_step': 0.0023316488427630924, 'init_value': -2.1145012378692627, 'ave_value': -1.422752268249924, 'soft_opc': nan} step=4425




2022-04-20 20:22.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202219/model_4425.pt


Epoch 26/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 20:22.30 [info     ] FQE_20220420202219: epoch=26 step=4602 epoch=26 metrics={'time_sample_batch': 0.0001469978504935227, 'time_algorithm_update': 0.0021122902800134345, 'loss': 0.008831908652101154, 'time_step': 0.0023256452743616483, 'init_value': -2.2102911472320557, 'ave_value': -1.4955602873284537, 'soft_opc': nan} step=4602




2022-04-20 20:22.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202219/model_4602.pt


Epoch 27/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 20:22.30 [info     ] FQE_20220420202219: epoch=27 step=4779 epoch=27 metrics={'time_sample_batch': 0.0001527427953515349, 'time_algorithm_update': 0.002147040124666893, 'loss': 0.009647945991827774, 'time_step': 0.0023732050664007328, 'init_value': -2.3282458782196045, 'ave_value': -1.582828716079036, 'soft_opc': nan} step=4779




2022-04-20 20:22.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202219/model_4779.pt


Epoch 28/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 20:22.31 [info     ] FQE_20220420202219: epoch=28 step=4956 epoch=28 metrics={'time_sample_batch': 0.0001490627978481142, 'time_algorithm_update': 0.00212285747635836, 'loss': 0.010349258377300944, 'time_step': 0.002340143009767694, 'init_value': -2.3691916465759277, 'ave_value': -1.6019571486715083, 'soft_opc': nan} step=4956




2022-04-20 20:22.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202219/model_4956.pt


Epoch 29/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 20:22.31 [info     ] FQE_20220420202219: epoch=29 step=5133 epoch=29 metrics={'time_sample_batch': 0.0001546716959463001, 'time_algorithm_update': 0.002118041959859557, 'loss': 0.011228584028033298, 'time_step': 0.0023406656448450467, 'init_value': -2.5350186824798584, 'ave_value': -1.7216681113472214, 'soft_opc': nan} step=5133




2022-04-20 20:22.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202219/model_5133.pt


Epoch 30/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 20:22.32 [info     ] FQE_20220420202219: epoch=30 step=5310 epoch=30 metrics={'time_sample_batch': 0.00015006631107653603, 'time_algorithm_update': 0.002112618947433213, 'loss': 0.01145638804027737, 'time_step': 0.002334663423441224, 'init_value': -2.5877609252929688, 'ave_value': -1.7799202969929835, 'soft_opc': nan} step=5310




2022-04-20 20:22.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202219/model_5310.pt


Epoch 31/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 20:22.32 [info     ] FQE_20220420202219: epoch=31 step=5487 epoch=31 metrics={'time_sample_batch': 0.0001489967949646341, 'time_algorithm_update': 0.002067349051351601, 'loss': 0.012208610091695172, 'time_step': 0.002286304861812268, 'init_value': -2.6902999877929688, 'ave_value': -1.8587934118670386, 'soft_opc': nan} step=5487




2022-04-20 20:22.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202219/model_5487.pt


Epoch 32/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 20:22.33 [info     ] FQE_20220420202219: epoch=32 step=5664 epoch=32 metrics={'time_sample_batch': 0.00015337992522676113, 'time_algorithm_update': 0.0021136776875641384, 'loss': 0.013462094193466513, 'time_step': 0.002336013115058511, 'init_value': -2.8178443908691406, 'ave_value': -1.965899828496996, 'soft_opc': nan} step=5664




2022-04-20 20:22.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202219/model_5664.pt


Epoch 33/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 20:22.33 [info     ] FQE_20220420202219: epoch=33 step=5841 epoch=33 metrics={'time_sample_batch': 0.00015713266060177216, 'time_algorithm_update': 0.0021096164897336797, 'loss': 0.01413269763573015, 'time_step': 0.0023364751352428718, 'init_value': -2.8519270420074463, 'ave_value': -2.01877838783078, 'soft_opc': nan} step=5841




2022-04-20 20:22.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202219/model_5841.pt


Epoch 34/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 20:22.34 [info     ] FQE_20220420202219: epoch=34 step=6018 epoch=34 metrics={'time_sample_batch': 0.00014732651791330112, 'time_algorithm_update': 0.002153633678026792, 'loss': 0.015305408968532322, 'time_step': 0.0023725423435706876, 'init_value': -2.989438772201538, 'ave_value': -2.1409487973462356, 'soft_opc': nan} step=6018




2022-04-20 20:22.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202219/model_6018.pt


Epoch 35/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 20:22.34 [info     ] FQE_20220420202219: epoch=35 step=6195 epoch=35 metrics={'time_sample_batch': 0.0001496056378897974, 'time_algorithm_update': 0.0021968386267538125, 'loss': 0.016037610628494813, 'time_step': 0.0024161109816556595, 'init_value': -3.0530920028686523, 'ave_value': -2.1814750275633354, 'soft_opc': nan} step=6195




2022-04-20 20:22.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202219/model_6195.pt


Epoch 36/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 20:22.35 [info     ] FQE_20220420202219: epoch=36 step=6372 epoch=36 metrics={'time_sample_batch': 0.00015113986818130406, 'time_algorithm_update': 0.00214764492659919, 'loss': 0.016368425897148958, 'time_step': 0.0023704033113468837, 'init_value': -3.0957911014556885, 'ave_value': -2.2661396365534436, 'soft_opc': nan} step=6372




2022-04-20 20:22.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202219/model_6372.pt


Epoch 37/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 20:22.35 [info     ] FQE_20220420202219: epoch=37 step=6549 epoch=37 metrics={'time_sample_batch': 0.00015040171348442466, 'time_algorithm_update': 0.002141706014083604, 'loss': 0.01683180629983439, 'time_step': 0.0023637397141106384, 'init_value': -3.255547523498535, 'ave_value': -2.4225325505833726, 'soft_opc': nan} step=6549




2022-04-20 20:22.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202219/model_6549.pt


Epoch 38/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 20:22.35 [info     ] FQE_20220420202219: epoch=38 step=6726 epoch=38 metrics={'time_sample_batch': 0.00014838391104660464, 'time_algorithm_update': 0.0020923291222523835, 'loss': 0.01782501113632599, 'time_step': 0.0023113307306322, 'init_value': -3.2825212478637695, 'ave_value': -2.4633627246986998, 'soft_opc': nan} step=6726




2022-04-20 20:22.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202219/model_6726.pt


Epoch 39/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 20:22.36 [info     ] FQE_20220420202219: epoch=39 step=6903 epoch=39 metrics={'time_sample_batch': 0.00015318999856205309, 'time_algorithm_update': 0.0021355232949984274, 'loss': 0.018838929929270668, 'time_step': 0.002357319923443983, 'init_value': -3.275747299194336, 'ave_value': -2.4126378047036696, 'soft_opc': nan} step=6903




2022-04-20 20:22.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202219/model_6903.pt


Epoch 40/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 20:22.36 [info     ] FQE_20220420202219: epoch=40 step=7080 epoch=40 metrics={'time_sample_batch': 0.00015007035206940215, 'time_algorithm_update': 0.002141684462121651, 'loss': 0.019485597559224584, 'time_step': 0.002363498601536293, 'init_value': -3.354998826980591, 'ave_value': -2.4782941611679465, 'soft_opc': nan} step=7080




2022-04-20 20:22.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202219/model_7080.pt


Epoch 41/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 20:22.37 [info     ] FQE_20220420202219: epoch=41 step=7257 epoch=41 metrics={'time_sample_batch': 0.00015055257721809345, 'time_algorithm_update': 0.0021061951157736914, 'loss': 0.017494330371435374, 'time_step': 0.0023252479100631454, 'init_value': -3.4130218029022217, 'ave_value': -2.54571534747834, 'soft_opc': nan} step=7257




2022-04-20 20:22.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202219/model_7257.pt


Epoch 42/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 20:22.37 [info     ] FQE_20220420202219: epoch=42 step=7434 epoch=42 metrics={'time_sample_batch': 0.00015343784612450897, 'time_algorithm_update': 0.002151512156772075, 'loss': 0.020976736229360315, 'time_step': 0.002373840849278337, 'init_value': -3.467243194580078, 'ave_value': -2.5947324167948227, 'soft_opc': nan} step=7434




2022-04-20 20:22.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202219/model_7434.pt


Epoch 43/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 20:22.38 [info     ] FQE_20220420202219: epoch=43 step=7611 epoch=43 metrics={'time_sample_batch': 0.00015099035144525733, 'time_algorithm_update': 0.0021080970764160156, 'loss': 0.021819711145865967, 'time_step': 0.0023264817598849367, 'init_value': -3.5702226161956787, 'ave_value': -2.717990753886936, 'soft_opc': nan} step=7611




2022-04-20 20:22.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202219/model_7611.pt


Epoch 44/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 20:22.38 [info     ] FQE_20220420202219: epoch=44 step=7788 epoch=44 metrics={'time_sample_batch': 0.00015352540096994174, 'time_algorithm_update': 0.0021442787795417054, 'loss': 0.021829102376454884, 'time_step': 0.0023726905133091124, 'init_value': -3.5771195888519287, 'ave_value': -2.7113513561071936, 'soft_opc': nan} step=7788




2022-04-20 20:22.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202219/model_7788.pt


Epoch 45/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 20:22.39 [info     ] FQE_20220420202219: epoch=45 step=7965 epoch=45 metrics={'time_sample_batch': 0.00015299064291399077, 'time_algorithm_update': 0.0020801872856872905, 'loss': 0.023133446949160537, 'time_step': 0.002304108129382807, 'init_value': -3.6965625286102295, 'ave_value': -2.845561813932281, 'soft_opc': nan} step=7965




2022-04-20 20:22.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202219/model_7965.pt


Epoch 46/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 20:22.39 [info     ] FQE_20220420202219: epoch=46 step=8142 epoch=46 metrics={'time_sample_batch': 0.00014706654737224687, 'time_algorithm_update': 0.0020848209575071172, 'loss': 0.02464599742904032, 'time_step': 0.002295688047247418, 'init_value': -3.898745536804199, 'ave_value': -3.042009208152244, 'soft_opc': nan} step=8142




2022-04-20 20:22.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202219/model_8142.pt


Epoch 47/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 20:22.40 [info     ] FQE_20220420202219: epoch=47 step=8319 epoch=47 metrics={'time_sample_batch': 0.000152272693181442, 'time_algorithm_update': 0.0020812918237373655, 'loss': 0.025681092251239525, 'time_step': 0.0023000779124976553, 'init_value': -3.822376251220703, 'ave_value': -2.973267036040982, 'soft_opc': nan} step=8319




2022-04-20 20:22.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202219/model_8319.pt


Epoch 48/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 20:22.40 [info     ] FQE_20220420202219: epoch=48 step=8496 epoch=48 metrics={'time_sample_batch': 0.00015151029252736582, 'time_algorithm_update': 0.002157095461915442, 'loss': 0.026344297651075207, 'time_step': 0.002377663628529694, 'init_value': -3.8696529865264893, 'ave_value': -2.9878366004328827, 'soft_opc': nan} step=8496




2022-04-20 20:22.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202219/model_8496.pt


Epoch 49/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 20:22.41 [info     ] FQE_20220420202219: epoch=49 step=8673 epoch=49 metrics={'time_sample_batch': 0.00015122472903149277, 'time_algorithm_update': 0.002133622681353725, 'loss': 0.02666143093410732, 'time_step': 0.0023518888290319064, 'init_value': -4.048717975616455, 'ave_value': -3.1213756560384334, 'soft_opc': nan} step=8673




2022-04-20 20:22.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202219/model_8673.pt


Epoch 50/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-20 20:22.41 [info     ] FQE_20220420202219: epoch=50 step=8850 epoch=50 metrics={'time_sample_batch': 0.000149466897134727, 'time_algorithm_update': 0.002125986551834365, 'loss': 0.029015530801199292, 'time_step': 0.0023458394627113126, 'init_value': -4.147578716278076, 'ave_value': -3.290516939392319, 'soft_opc': nan} step=8850




2022-04-20 20:22.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202219/model_8850.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

2022-04-20 20:22.41 [info     ] Directory is created at d3rlpy_logs/FQE_20220420202241
2022-04-20 20:22.41 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 20:22.41 [debug    ] Building models...
2022-04-20 20:22.41 [debug    ] Models have been built.
2022-04-20 20:22.41 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420202241/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 20:22.42 [info     ] FQE_20220420202241: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.0001513084699941236, 'time_algorithm_update': 0.002077211474263391, 'loss': 0.026113233501957947, 'time_step': 0.002297341823577881, 'init_value': -1.2428081035614014, 'ave_value': -1.228719220263464, 'soft_opc': nan} step=344




2022-04-20 20:22.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202241/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:22.43 [info     ] FQE_20220420202241: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.0001514325308245282, 'time_algorithm_update': 0.002113916153131529, 'loss': 0.023457582208847756, 'time_step': 0.0023368770299955857, 'init_value': -2.019420623779297, 'ave_value': -1.9852533129018706, 'soft_opc': nan} step=688




2022-04-20 20:22.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202241/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:22.44 [info     ] FQE_20220420202241: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00015134658924368925, 'time_algorithm_update': 0.0021474486173585403, 'loss': 0.02598723452700692, 'time_step': 0.0023710741553195688, 'init_value': -2.9374427795410156, 'ave_value': -2.8925519535804654, 'soft_opc': nan} step=1032




2022-04-20 20:22.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202241/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:22.45 [info     ] FQE_20220420202241: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00015452573465746502, 'time_algorithm_update': 0.0020937316639478816, 'loss': 0.030178214358382447, 'time_step': 0.002317769582881484, 'init_value': -3.5935702323913574, 'ave_value': -3.53615642608823, 'soft_opc': nan} step=1376




2022-04-20 20:22.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202241/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:22.46 [info     ] FQE_20220420202241: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00015399899593619414, 'time_algorithm_update': 0.002092921456625295, 'loss': 0.03884410691653313, 'time_step': 0.002320593179658402, 'init_value': -4.441281318664551, 'ave_value': -4.40252956868829, 'soft_opc': nan} step=1720




2022-04-20 20:22.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202241/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:22.47 [info     ] FQE_20220420202241: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00015651902487111646, 'time_algorithm_update': 0.002121057621268339, 'loss': 0.04691257505396078, 'time_step': 0.0023491590522056404, 'init_value': -4.810672283172607, 'ave_value': -4.780400244676852, 'soft_opc': nan} step=2064




2022-04-20 20:22.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202241/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:22.47 [info     ] FQE_20220420202241: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00015438226766364518, 'time_algorithm_update': 0.002173399509385575, 'loss': 0.055235981803149155, 'time_step': 0.00239669029102769, 'init_value': -5.569993495941162, 'ave_value': -5.580355616330026, 'soft_opc': nan} step=2408




2022-04-20 20:22.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202241/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:22.48 [info     ] FQE_20220420202241: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00015432058378707532, 'time_algorithm_update': 0.002101277889207352, 'loss': 0.0664643606138524, 'time_step': 0.002327634151591811, 'init_value': -5.918638229370117, 'ave_value': -5.956306840049791, 'soft_opc': nan} step=2752




2022-04-20 20:22.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202241/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:22.49 [info     ] FQE_20220420202241: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00015266482220139613, 'time_algorithm_update': 0.0021186159100643424, 'loss': 0.07619763056336101, 'time_step': 0.0023391711157421734, 'init_value': -6.263476371765137, 'ave_value': -6.355999280821096, 'soft_opc': nan} step=3096




2022-04-20 20:22.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202241/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:22.50 [info     ] FQE_20220420202241: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00015024251716081485, 'time_algorithm_update': 0.0021012674930483794, 'loss': 0.09200831956154298, 'time_step': 0.0023204185241876645, 'init_value': -6.963554859161377, 'ave_value': -7.065016801131738, 'soft_opc': nan} step=3440




2022-04-20 20:22.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202241/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:22.51 [info     ] FQE_20220420202241: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.0001514949077783629, 'time_algorithm_update': 0.002069540495096251, 'loss': 0.10370359875716616, 'time_step': 0.002291502647621687, 'init_value': -7.243402481079102, 'ave_value': -7.39289860025988, 'soft_opc': nan} step=3784




2022-04-20 20:22.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202241/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:22.52 [info     ] FQE_20220420202241: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00015801260637682537, 'time_algorithm_update': 0.002077986334645471, 'loss': 0.12311432669805579, 'time_step': 0.002306737178979918, 'init_value': -7.803576946258545, 'ave_value': -8.00001726674067, 'soft_opc': nan} step=4128




2022-04-20 20:22.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202241/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:22.53 [info     ] FQE_20220420202241: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00015163352323132893, 'time_algorithm_update': 0.002109981553499089, 'loss': 0.13856384693103474, 'time_step': 0.002330629631530407, 'init_value': -7.93256139755249, 'ave_value': -8.201343779205471, 'soft_opc': nan} step=4472




2022-04-20 20:22.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202241/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:22.54 [info     ] FQE_20220420202241: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00015539623970209167, 'time_algorithm_update': 0.0020876499109489972, 'loss': 0.15789110069488024, 'time_step': 0.002314189838808636, 'init_value': -8.429108619689941, 'ave_value': -8.739220155893012, 'soft_opc': nan} step=4816




2022-04-20 20:22.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202241/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:22.54 [info     ] FQE_20220420202241: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00015082886052686115, 'time_algorithm_update': 0.002017929803493411, 'loss': 0.18232640300249292, 'time_step': 0.0022354597269102585, 'init_value': -8.710131645202637, 'ave_value': -9.088527421012252, 'soft_opc': nan} step=5160




2022-04-20 20:22.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202241/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:22.55 [info     ] FQE_20220420202241: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00015484108481296274, 'time_algorithm_update': 0.002105432194332744, 'loss': 0.20516154432106157, 'time_step': 0.002330678146938945, 'init_value': -9.469379425048828, 'ave_value': -9.966425821067649, 'soft_opc': nan} step=5504




2022-04-20 20:22.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202241/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:22.56 [info     ] FQE_20220420202241: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00015105688294699026, 'time_algorithm_update': 0.0020814025124838184, 'loss': 0.23392705257635477, 'time_step': 0.002301262561665025, 'init_value': -9.4733247756958, 'ave_value': -10.031184662822717, 'soft_opc': nan} step=5848




2022-04-20 20:22.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202241/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:22.57 [info     ] FQE_20220420202241: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00015120589455892874, 'time_algorithm_update': 0.0020819950935452485, 'loss': 0.2505981923340885, 'time_step': 0.0023027665393297063, 'init_value': -9.68376350402832, 'ave_value': -10.345150247999879, 'soft_opc': nan} step=6192




2022-04-20 20:22.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202241/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:22.58 [info     ] FQE_20220420202241: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00015703328820162042, 'time_algorithm_update': 0.0021215649538261945, 'loss': 0.28612101700720055, 'time_step': 0.002350949963857961, 'init_value': -10.11951732635498, 'ave_value': -10.804159696031832, 'soft_opc': nan} step=6536




2022-04-20 20:22.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202241/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:22.59 [info     ] FQE_20220420202241: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00015492217485294787, 'time_algorithm_update': 0.0020462420097617216, 'loss': 0.3100173432477425, 'time_step': 0.002269169618917066, 'init_value': -10.581544876098633, 'ave_value': -11.398872204811672, 'soft_opc': nan} step=6880




2022-04-20 20:22.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202241/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:23.00 [info     ] FQE_20220420202241: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00015489237253056017, 'time_algorithm_update': 0.002121068710504576, 'loss': 0.33635835549360965, 'time_step': 0.002346189216125843, 'init_value': -11.013425827026367, 'ave_value': -11.816868771920568, 'soft_opc': nan} step=7224




2022-04-20 20:23.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202241/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:23.00 [info     ] FQE_20220420202241: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00015565752983093262, 'time_algorithm_update': 0.0020900749882986377, 'loss': 0.3572414450423229, 'time_step': 0.002317162447197493, 'init_value': -11.078207015991211, 'ave_value': -12.082054348639003, 'soft_opc': nan} step=7568




2022-04-20 20:23.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202241/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:23.01 [info     ] FQE_20220420202241: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00015370513117590615, 'time_algorithm_update': 0.0021091138207635215, 'loss': 0.386774587867305, 'time_step': 0.002335251070732294, 'init_value': -11.506763458251953, 'ave_value': -12.585034021886216, 'soft_opc': nan} step=7912




2022-04-20 20:23.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202241/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:23.02 [info     ] FQE_20220420202241: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.0001557005006213521, 'time_algorithm_update': 0.0020733953908432363, 'loss': 0.41229495089934315, 'time_step': 0.0023023603960525157, 'init_value': -11.48516845703125, 'ave_value': -12.698037885998685, 'soft_opc': nan} step=8256




2022-04-20 20:23.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202241/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:23.03 [info     ] FQE_20220420202241: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00015486742174902627, 'time_algorithm_update': 0.002086263063342072, 'loss': 0.427930947198251, 'time_step': 0.0023097756297089335, 'init_value': -11.81627082824707, 'ave_value': -13.064722200135726, 'soft_opc': nan} step=8600




2022-04-20 20:23.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202241/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:23.04 [info     ] FQE_20220420202241: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00015453058619831884, 'time_algorithm_update': 0.0020736649978992552, 'loss': 0.44859413074892623, 'time_step': 0.002300958993823029, 'init_value': -11.81113052368164, 'ave_value': -13.182533728975702, 'soft_opc': nan} step=8944




2022-04-20 20:23.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202241/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:23.05 [info     ] FQE_20220420202241: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00015424157297888466, 'time_algorithm_update': 0.0020887664584226385, 'loss': 0.45466338146239693, 'time_step': 0.0023155926271926524, 'init_value': -11.827438354492188, 'ave_value': -13.2519772662412, 'soft_opc': nan} step=9288




2022-04-20 20:23.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202241/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:23.06 [info     ] FQE_20220420202241: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00015170976173046024, 'time_algorithm_update': 0.0020743726297866466, 'loss': 0.4690480116094181, 'time_step': 0.0022987591665844585, 'init_value': -12.206933975219727, 'ave_value': -13.706070764596062, 'soft_opc': nan} step=9632




2022-04-20 20:23.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202241/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:23.07 [info     ] FQE_20220420202241: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00015047192573547363, 'time_algorithm_update': 0.0020224403503329253, 'loss': 0.4830636044985853, 'time_step': 0.002244789239972137, 'init_value': -12.254470825195312, 'ave_value': -13.839797087503715, 'soft_opc': nan} step=9976




2022-04-20 20:23.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202241/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:23.07 [info     ] FQE_20220420202241: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00015469138012375941, 'time_algorithm_update': 0.0020222490610078323, 'loss': 0.48488145730414883, 'time_step': 0.002247925414595493, 'init_value': -12.056589126586914, 'ave_value': -13.726722306590359, 'soft_opc': nan} step=10320




2022-04-20 20:23.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202241/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:23.08 [info     ] FQE_20220420202241: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00015600060307702354, 'time_algorithm_update': 0.0021047522855359452, 'loss': 0.4828379196114838, 'time_step': 0.0023326104463532913, 'init_value': -12.119226455688477, 'ave_value': -13.897231722905024, 'soft_opc': nan} step=10664




2022-04-20 20:23.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202241/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:23.09 [info     ] FQE_20220420202241: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00015636169633200003, 'time_algorithm_update': 0.00210112263990003, 'loss': 0.4830703757335107, 'time_step': 0.0023280853448912156, 'init_value': -12.3271484375, 'ave_value': -14.210823386639088, 'soft_opc': nan} step=11008




2022-04-20 20:23.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202241/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:23.10 [info     ] FQE_20220420202241: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00015668882880099985, 'time_algorithm_update': 0.002086398906485979, 'loss': 0.4973976152927376, 'time_step': 0.0023108713848646297, 'init_value': -12.294981002807617, 'ave_value': -14.247802921404709, 'soft_opc': nan} step=11352




2022-04-20 20:23.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202241/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:23.11 [info     ] FQE_20220420202241: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.0001581027064212533, 'time_algorithm_update': 0.0021514303462449895, 'loss': 0.5089238662445961, 'time_step': 0.002383597368417784, 'init_value': -12.331938743591309, 'ave_value': -14.396290938354827, 'soft_opc': nan} step=11696




2022-04-20 20:23.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202241/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:23.12 [info     ] FQE_20220420202241: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.0001541916714158169, 'time_algorithm_update': 0.0021012439284213754, 'loss': 0.5096277600847358, 'time_step': 0.0023277512816495672, 'init_value': -12.76604175567627, 'ave_value': -14.92749169536002, 'soft_opc': nan} step=12040




2022-04-20 20:23.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202241/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:23.13 [info     ] FQE_20220420202241: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00015778666318849076, 'time_algorithm_update': 0.0020933726499246996, 'loss': 0.5144148178961735, 'time_step': 0.002324708672456963, 'init_value': -12.899946212768555, 'ave_value': -14.971394957400657, 'soft_opc': nan} step=12384




2022-04-20 20:23.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202241/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:23.14 [info     ] FQE_20220420202241: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00015136045078898585, 'time_algorithm_update': 0.002069476038910622, 'loss': 0.5120164045663333, 'time_step': 0.0022927370182303495, 'init_value': -13.171965599060059, 'ave_value': -15.38000737130642, 'soft_opc': nan} step=12728




2022-04-20 20:23.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202241/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:23.14 [info     ] FQE_20220420202241: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00015814151874808378, 'time_algorithm_update': 0.0021061599254608154, 'loss': 0.5138343034073883, 'time_step': 0.002336229695830234, 'init_value': -13.485448837280273, 'ave_value': -15.742750216121072, 'soft_opc': nan} step=13072




2022-04-20 20:23.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202241/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:23.15 [info     ] FQE_20220420202241: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00015480712402698605, 'time_algorithm_update': 0.0020978284436602925, 'loss': 0.505038678705649, 'time_step': 0.002327197512914968, 'init_value': -13.484024047851562, 'ave_value': -15.803219832520227, 'soft_opc': nan} step=13416




2022-04-20 20:23.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202241/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:23.16 [info     ] FQE_20220420202241: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00016346019367839015, 'time_algorithm_update': 0.002173570006392723, 'loss': 0.5125202518660402, 'time_step': 0.0024087124092634334, 'init_value': -13.410494804382324, 'ave_value': -15.688654140497112, 'soft_opc': nan} step=13760




2022-04-20 20:23.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202241/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:23.17 [info     ] FQE_20220420202241: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00015770349391671114, 'time_algorithm_update': 0.002120636923368587, 'loss': 0.5132616236589329, 'time_step': 0.002350291540456373, 'init_value': -13.665617942810059, 'ave_value': -16.024224944426134, 'soft_opc': nan} step=14104




2022-04-20 20:23.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202241/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:23.18 [info     ] FQE_20220420202241: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00015818518261576808, 'time_algorithm_update': 0.0021280764147292735, 'loss': 0.5125662135122734, 'time_step': 0.002357731031817059, 'init_value': -14.027576446533203, 'ave_value': -16.39325462786464, 'soft_opc': nan} step=14448




2022-04-20 20:23.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202241/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:23.19 [info     ] FQE_20220420202241: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00015289284462152527, 'time_algorithm_update': 0.0020872929761576097, 'loss': 0.5143124530860764, 'time_step': 0.002312294965566591, 'init_value': -14.19208812713623, 'ave_value': -16.539072215610798, 'soft_opc': nan} step=14792




2022-04-20 20:23.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202241/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:23.20 [info     ] FQE_20220420202241: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.0001562646655149238, 'time_algorithm_update': 0.002094832270644432, 'loss': 0.5141592703023276, 'time_step': 0.002323004395462746, 'init_value': -14.280097961425781, 'ave_value': -16.67425505927017, 'soft_opc': nan} step=15136




2022-04-20 20:23.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202241/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:23.21 [info     ] FQE_20220420202241: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00015475375707759412, 'time_algorithm_update': 0.002118071844411451, 'loss': 0.5223099260081991, 'time_step': 0.0023471047711926835, 'init_value': -14.478954315185547, 'ave_value': -16.949171013418617, 'soft_opc': nan} step=15480




2022-04-20 20:23.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202241/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:23.21 [info     ] FQE_20220420202241: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00015602624693582224, 'time_algorithm_update': 0.0021104147267896074, 'loss': 0.5278197738212035, 'time_step': 0.002339139927265256, 'init_value': -14.398473739624023, 'ave_value': -17.01735621729681, 'soft_opc': nan} step=15824




2022-04-20 20:23.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202241/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:23.22 [info     ] FQE_20220420202241: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00015518693036811296, 'time_algorithm_update': 0.002099719851516014, 'loss': 0.5228617700699414, 'time_step': 0.00232661810032157, 'init_value': -14.392601013183594, 'ave_value': -17.123703911403815, 'soft_opc': nan} step=16168




2022-04-20 20:23.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202241/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:23.23 [info     ] FQE_20220420202241: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.0001558848591737969, 'time_algorithm_update': 0.00211605568264806, 'loss': 0.5191519232358524, 'time_step': 0.0023463049600290697, 'init_value': -14.157411575317383, 'ave_value': -16.74728856438452, 'soft_opc': nan} step=16512




2022-04-20 20:23.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202241/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:23.24 [info     ] FQE_20220420202241: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00015330245328503986, 'time_algorithm_update': 0.0020807704260182936, 'loss': 0.5175831082417781, 'time_step': 0.0023082030373950337, 'init_value': -14.318531036376953, 'ave_value': -16.971921767227286, 'soft_opc': nan} step=16856




2022-04-20 20:23.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202241/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:23.25 [info     ] FQE_20220420202241: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00015918113464532898, 'time_algorithm_update': 0.0021211491074672965, 'loss': 0.529070079873511, 'time_step': 0.0023546974326288978, 'init_value': -14.337480545043945, 'ave_value': -17.02315817127625, 'soft_opc': nan} step=17200




2022-04-20 20:23.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202241/model_17200.pt
search iteration:  36
using hyper params:  [0.004214481705412256, 0.002320353189219589, 7.126816718250938e-05, 3]
2022-04-20 20:23.25 [debug    ] RoundIterator is selected.
2022-04-20 20:23.25 [info     ] Directory is created at d3rlpy_logs/CQL_20220420202325
2022-04-20 20:23.25 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 20:23.25 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 20:23.25 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220420202325/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.004214481705412256, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'we

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:23.31 [info     ] CQL_20220420202325: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.00033080786989446274, 'time_algorithm_update': 0.017804912656371356, 'temp_loss': 4.669645446782921, 'temp': 0.9880187713611893, 'alpha_loss': -16.315193686568946, 'alpha': 1.0170478737145139, 'critic_loss': 33.722687710098356, 'actor_loss': 0.8352304721082294, 'time_step': 0.01823484130770142, 'td_error': 2.5137139910118753, 'init_value': -3.393031358718872, 'ave_value': -2.3220170301131837} step=342
2022-04-20 20:23.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420202325/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:23.38 [info     ] CQL_20220420202325: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.0003266613385830706, 'time_algorithm_update': 0.017776386779651307, 'temp_loss': 4.319645925572044, 'temp': 0.9650016390440757, 'alpha_loss': -10.543894962957728, 'alpha': 1.0465582874086168, 'critic_loss': 19.162646310371265, 'actor_loss': 3.37263244076779, 'time_step': 0.018204861914205273, 'td_error': 3.624310652807938, 'init_value': -9.102799415588379, 'ave_value': -4.99477930932935} step=684
2022-04-20 20:23.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420202325/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:23.44 [info     ] CQL_20220420202325: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.00033115573793823956, 'time_algorithm_update': 0.017897067014236896, 'temp_loss': 3.7451659566477726, 'temp': 0.9438618377983918, 'alpha_loss': -7.134387454094245, 'alpha': 1.0700988013144823, 'critic_loss': 21.36455682564897, 'actor_loss': 6.6859537711617545, 'time_step': 0.01832830278496993, 'td_error': 4.3784587955113325, 'init_value': -13.62047290802002, 'ave_value': -6.946608590344722} step=1026
2022-04-20 20:23.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420202325/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:23.51 [info     ] CQL_20220420202325: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.0003315621649312694, 'time_algorithm_update': 0.01782271527407462, 'temp_loss': 3.2937455247020164, 'temp': 0.924541217541834, 'alpha_loss': -4.893183580616064, 'alpha': 1.0903503765836793, 'critic_loss': 28.787969165378147, 'actor_loss': 10.094110730098702, 'time_step': 0.01825047515289128, 'td_error': 5.61671212004073, 'init_value': -19.040695190429688, 'ave_value': -9.66259100291576} step=1368
2022-04-20 20:23.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420202325/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:23.57 [info     ] CQL_20220420202325: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.0003330414755302563, 'time_algorithm_update': 0.01798333131779007, 'temp_loss': 2.8823708916268154, 'temp': 0.9065432987715069, 'alpha_loss': -3.0412367140514807, 'alpha': 1.1068015293768274, 'critic_loss': 39.95893108077914, 'actor_loss': 13.398034179419803, 'time_step': 0.018414759496499223, 'td_error': 7.03322155758277, 'init_value': -24.266056060791016, 'ave_value': -12.391563414933982} step=1710
2022-04-20 20:23.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420202325/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:24.04 [info     ] CQL_20220420202325: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.0003279559096397712, 'time_algorithm_update': 0.017909178956907395, 'temp_loss': 2.4966729701611032, 'temp': 0.889838414059745, 'alpha_loss': -1.1245447010093244, 'alpha': 1.1172869341415272, 'critic_loss': 53.32744818124158, 'actor_loss': 16.69050821783947, 'time_step': 0.01833480073694597, 'td_error': 8.716970438456771, 'init_value': -29.7888126373291, 'ave_value': -15.455757656784744} step=2052
2022-04-20 20:24.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420202325/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:24.11 [info     ] CQL_20220420202325: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.0003301672071044208, 'time_algorithm_update': 0.017989958238880537, 'temp_loss': 2.190341698495965, 'temp': 0.8741799101494906, 'alpha_loss': 0.4674294755184119, 'alpha': 1.1192895017172162, 'critic_loss': 68.35085518597162, 'actor_loss': 19.854159438819217, 'time_step': 0.018418682946099177, 'td_error': 11.244903427672947, 'init_value': -34.45854568481445, 'ave_value': -17.160828336527622} step=2394
2022-04-20 20:24.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420202325/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:24.17 [info     ] CQL_20220420202325: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.00033240290413125915, 'time_algorithm_update': 0.01815388704601087, 'temp_loss': 1.8923820201416461, 'temp': 0.8594481814325902, 'alpha_loss': 1.8749591133323067, 'alpha': 1.110098267856397, 'critic_loss': 83.47077754505894, 'actor_loss': 22.917652174743296, 'time_step': 0.018585107479876246, 'td_error': 13.149603261677816, 'init_value': -38.6988639831543, 'ave_value': -19.597979667789495} step=2736
2022-04-20 20:24.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420202325/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:24.24 [info     ] CQL_20220420202325: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.0003356313147740057, 'time_algorithm_update': 0.018223891481321457, 'temp_loss': 1.6617442693626672, 'temp': 0.845544468763976, 'alpha_loss': 3.0601160289251315, 'alpha': 1.089384499000527, 'critic_loss': 98.40308859752632, 'actor_loss': 25.80082997104578, 'time_step': 0.018662268655341967, 'td_error': 15.595454369946872, 'init_value': -43.723236083984375, 'ave_value': -21.43002681292795} step=3078
2022-04-20 20:24.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420202325/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:24.30 [info     ] CQL_20220420202325: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.000331987414443702, 'time_algorithm_update': 0.01801994111802843, 'temp_loss': 1.4510583844449785, 'temp': 0.8321843056650887, 'alpha_loss': 3.9881375315245133, 'alpha': 1.0582027034452783, 'critic_loss': 113.0693057099281, 'actor_loss': 28.53657821744506, 'time_step': 0.01844857310691075, 'td_error': 17.790965750689598, 'init_value': -48.67644500732422, 'ave_value': -24.41209337320548} step=3420
2022-04-20 20:24.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420202325/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:24.37 [info     ] CQL_20220420202325: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.00033119477723774155, 'time_algorithm_update': 0.017892007939299646, 'temp_loss': 1.2577090045513466, 'temp': 0.8195306407777887, 'alpha_loss': 4.77625168276112, 'alpha': 1.0209134534785622, 'critic_loss': 128.39627878289474, 'actor_loss': 31.225638278046546, 'time_step': 0.01832334061115109, 'td_error': 20.022424618867618, 'init_value': -53.09456253051758, 'ave_value': -26.95444654561915} step=3762
2022-04-20 20:24.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420202325/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:24.43 [info     ] CQL_20220420202325: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.0003307520994666027, 'time_algorithm_update': 0.0179406718203896, 'temp_loss': 1.0910868998508008, 'temp': 0.8075825596413417, 'alpha_loss': 5.367036380265889, 'alpha': 0.9811155255775006, 'critic_loss': 143.02073642663788, 'actor_loss': 33.74853666205155, 'time_step': 0.01836870985421521, 'td_error': 22.224210388542136, 'init_value': -57.11701583862305, 'ave_value': -28.845539326377818} step=4104
2022-04-20 20:24.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420202325/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:24.50 [info     ] CQL_20220420202325: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.0003301930009273061, 'time_algorithm_update': 0.01787162872782925, 'temp_loss': 0.9300369158474325, 'temp': 0.7961468597253164, 'alpha_loss': 5.8507811360889015, 'alpha': 0.9426242674303333, 'critic_loss': 157.37242487857216, 'actor_loss': 36.16626871120163, 'time_step': 0.01829825925548174, 'td_error': 24.501762849249268, 'init_value': -60.95832443237305, 'ave_value': -31.329139467131448} step=4446
2022-04-20 20:24.50 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420202325/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:24.56 [info     ] CQL_20220420202325: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.0003265951111999869, 'time_algorithm_update': 0.017932150796142934, 'temp_loss': 0.8218221619115238, 'temp': 0.7852281631782041, 'alpha_loss': 6.193144591928226, 'alpha': 0.9053023143818504, 'critic_loss': 173.1408864517658, 'actor_loss': 38.54141958136307, 'time_step': 0.018356525409988493, 'td_error': 25.628372237786387, 'init_value': -64.69224548339844, 'ave_value': -32.69806370281153} step=4788
2022-04-20 20:24.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420202325/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:25.03 [info     ] CQL_20220420202325: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.00033807545377497085, 'time_algorithm_update': 0.017899600385922438, 'temp_loss': 0.6895351023004767, 'temp': 0.7747692913688414, 'alpha_loss': 6.410478624684072, 'alpha': 0.8707147519490872, 'critic_loss': 188.78254217850534, 'actor_loss': 40.82476749754789, 'time_step': 0.0183366822917559, 'td_error': 28.158256757325333, 'init_value': -68.17623901367188, 'ave_value': -34.81007903867208} step=5130
2022-04-20 20:25.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420202325/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:25.09 [info     ] CQL_20220420202325: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.0003322300158048931, 'time_algorithm_update': 0.017950129090693958, 'temp_loss': 0.6143326154904581, 'temp': 0.7648092694798408, 'alpha_loss': 6.57587094404544, 'alpha': 0.8377594764818225, 'critic_loss': 203.5925026163023, 'actor_loss': 42.974682986387734, 'time_step': 0.018383281969884684, 'td_error': 30.736905840778174, 'init_value': -72.43093872070312, 'ave_value': -36.85965809606217} step=5472
2022-04-20 20:25.09 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420202325/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:25.16 [info     ] CQL_20220420202325: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.0003321826109412121, 'time_algorithm_update': 0.01798014822061996, 'temp_loss': 0.5098509916175179, 'temp': 0.7552308201441291, 'alpha_loss': 6.741850770704928, 'alpha': 0.8068752553727891, 'critic_loss': 219.0832715843156, 'actor_loss': 45.130483861555135, 'time_step': 0.018410722414652508, 'td_error': 32.74731540338729, 'init_value': -75.27806091308594, 'ave_value': -38.62405788856584} step=5814
2022-04-20 20:25.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420202325/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:25.23 [info     ] CQL_20220420202325: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.0003314283159044054, 'time_algorithm_update': 0.01821233306014747, 'temp_loss': 0.4403423353790017, 'temp': 0.7461442914274004, 'alpha_loss': 6.789044150134973, 'alpha': 0.7777552831242656, 'critic_loss': 233.97674707780806, 'actor_loss': 47.11996686388875, 'time_step': 0.01864167054494222, 'td_error': 34.3812665475845, 'init_value': -80.31111907958984, 'ave_value': -40.220068255418596} step=6156
2022-04-20 20:25.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420202325/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:25.29 [info     ] CQL_20220420202325: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.0003341861635620831, 'time_algorithm_update': 0.018181461339805558, 'temp_loss': 0.3605116946807905, 'temp': 0.7376804048554939, 'alpha_loss': 6.82239744398329, 'alpha': 0.75025185280376, 'critic_loss': 248.99939170078923, 'actor_loss': 49.18394282826206, 'time_step': 0.01861298990528486, 'td_error': 36.21035676399697, 'init_value': -83.1594009399414, 'ave_value': -41.85671903125875} step=6498
2022-04-20 20:25.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420202325/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:25.36 [info     ] CQL_20220420202325: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.0003333537899262724, 'time_algorithm_update': 0.018354615272834288, 'temp_loss': 0.27924888577769724, 'temp': 0.729733143285004, 'alpha_loss': 6.871500246706065, 'alpha': 0.7241443379929191, 'critic_loss': 264.0026337026853, 'actor_loss': 50.97357969674451, 'time_step': 0.018788907263014052, 'td_error': 38.35083893074887, 'init_value': -85.9619369506836, 'ave_value': -43.624042660116075} step=6840
2022-04-20 20:25.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420202325/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:25.42 [info     ] CQL_20220420202325: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.00033043141950640765, 'time_algorithm_update': 0.018197896187765558, 'temp_loss': 0.21593016353898753, 'temp': 0.7233468755992533, 'alpha_loss': 6.903534996579265, 'alpha': 0.6991183330789644, 'critic_loss': 278.33787790934247, 'actor_loss': 52.82246549505936, 'time_step': 0.018634125503183108, 'td_error': 40.18749510316558, 'init_value': -90.36689758300781, 'ave_value': -46.177044332379154} step=7182
2022-04-20 20:25.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420202325/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:25.49 [info     ] CQL_20220420202325: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.00032845226644772534, 'time_algorithm_update': 0.018195935857226276, 'temp_loss': 0.1403031034384206, 'temp': 0.7179565321632296, 'alpha_loss': 6.970121410157946, 'alpha': 0.6749847754400377, 'critic_loss': 291.43564663714136, 'actor_loss': 54.4989790665476, 'time_step': 0.018621799541495697, 'td_error': 42.89859735785815, 'init_value': -92.97856903076172, 'ave_value': -47.4676015268038} step=7524
2022-04-20 20:25.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420202325/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:25.56 [info     ] CQL_20220420202325: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.00033093753613923723, 'time_algorithm_update': 0.018194937566567582, 'temp_loss': 0.10270766359454359, 'temp': 0.7137276247928017, 'alpha_loss': 6.984843068652683, 'alpha': 0.6519517910759351, 'critic_loss': 304.55322301318074, 'actor_loss': 56.17075628983347, 'time_step': 0.018621145633229037, 'td_error': 44.114172710649875, 'init_value': -95.08851623535156, 'ave_value': -48.49218435600534} step=7866
2022-04-20 20:25.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420202325/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:26.02 [info     ] CQL_20220420202325: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.0003364671740615577, 'time_algorithm_update': 0.018314415948432788, 'temp_loss': 0.07907033999510414, 'temp': 0.7104342971977434, 'alpha_loss': 6.8483551705789845, 'alpha': 0.6299209763780672, 'critic_loss': 318.2213315015648, 'actor_loss': 57.76237869262695, 'time_step': 0.0187515780242563, 'td_error': 45.981652515732556, 'init_value': -97.71045684814453, 'ave_value': -49.75511414624281} step=8208
2022-04-20 20:26.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420202325/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:26.09 [info     ] CQL_20220420202325: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.00033531342333520365, 'time_algorithm_update': 0.01859859207220245, 'temp_loss': 0.017900451342439092, 'temp': 0.7080495997130523, 'alpha_loss': 6.787878109000579, 'alpha': 0.6090656365567481, 'critic_loss': 329.73686878583584, 'actor_loss': 59.34381472715857, 'time_step': 0.019032356334708588, 'td_error': 47.62887201401728, 'init_value': -100.96568298339844, 'ave_value': -50.56012186545778} step=8550
2022-04-20 20:26.09 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420202325/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:26.16 [info     ] CQL_20220420202325: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.00032950702466462786, 'time_algorithm_update': 0.018326631066394827, 'temp_loss': 0.001338966261617273, 'temp': 0.7071459950410832, 'alpha_loss': 6.910900226113392, 'alpha': 0.5888153079308962, 'critic_loss': 341.8405742087559, 'actor_loss': 60.92060966937863, 'time_step': 0.018754432075902036, 'td_error': 49.7375074921882, 'init_value': -105.22122955322266, 'ave_value': -52.14927134035944} step=8892
2022-04-20 20:26.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420202325/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:26.22 [info     ] CQL_20220420202325: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.00033152382276211567, 'time_algorithm_update': 0.018180040588155824, 'temp_loss': -0.06973986966502771, 'temp': 0.709632091529188, 'alpha_loss': 6.996486407274391, 'alpha': 0.5688433500758389, 'critic_loss': 355.3877884714227, 'actor_loss': 62.478601924160074, 'time_step': 0.018614506860922652, 'td_error': 53.145411990072674, 'init_value': -106.92512512207031, 'ave_value': -53.24652105445261} step=9234
2022-04-20 20:26.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420202325/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:26.29 [info     ] CQL_20220420202325: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.0003377422254685073, 'time_algorithm_update': 0.018279828523334703, 'temp_loss': -0.07627332192023246, 'temp': 0.7141525766305756, 'alpha_loss': 6.826995367195174, 'alpha': 0.5497984033927583, 'critic_loss': 369.90090237444605, 'actor_loss': 63.945607681720574, 'time_step': 0.018717400511802984, 'td_error': 54.314264282613365, 'init_value': -111.87166595458984, 'ave_value': -55.56596892247329} step=9576
2022-04-20 20:26.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420202325/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:26.36 [info     ] CQL_20220420202325: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.00033050670958401866, 'time_algorithm_update': 0.01809230743095889, 'temp_loss': -0.06944890146563101, 'temp': 0.7186829470403013, 'alpha_loss': 6.582422076610097, 'alpha': 0.5317958699680908, 'critic_loss': 383.16345241613556, 'actor_loss': 65.3281007287098, 'time_step': 0.01852381996244018, 'td_error': 53.88569844606563, 'init_value': -113.29374694824219, 'ave_value': -56.39625165529616} step=9918
2022-04-20 20:26.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420202325/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:26.42 [info     ] CQL_20220420202325: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.00032687256907859043, 'time_algorithm_update': 0.01830398408990157, 'temp_loss': -0.07482264139172104, 'temp': 0.7229424146880881, 'alpha_loss': 6.592913635990076, 'alpha': 0.5147913961034072, 'critic_loss': 393.684631704587, 'actor_loss': 66.78648064150448, 'time_step': 0.018728526712161058, 'td_error': 58.592857167583865, 'init_value': -116.1176528930664, 'ave_value': -57.938816935398556} step=10260
2022-04-20 20:26.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420202325/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:26.49 [info     ] CQL_20220420202325: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.0003356201606884337, 'time_algorithm_update': 0.018291193839402226, 'temp_loss': -0.06948130627091952, 'temp': 0.7286149861171232, 'alpha_loss': 6.205680462352016, 'alpha': 0.49829137699994425, 'critic_loss': 407.50186389231544, 'actor_loss': 68.09554849591171, 'time_step': 0.018726125098111338, 'td_error': 59.56662796606336, 'init_value': -120.7154312133789, 'ave_value': -60.0162504177856} step=10602
2022-04-20 20:26.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420202325/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:26.55 [info     ] CQL_20220420202325: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.0003335441065113447, 'time_algorithm_update': 0.018121605728104798, 'temp_loss': -0.06481184000530613, 'temp': 0.733178344734928, 'alpha_loss': 6.107220485893606, 'alpha': 0.48278134956694485, 'critic_loss': 417.50403733281365, 'actor_loss': 69.24610781530191, 'time_step': 0.018556552323681568, 'td_error': 61.46953512984678, 'init_value': -122.07674407958984, 'ave_value': -60.28210322915434} step=10944
2022-04-20 20:26.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420202325/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:27.02 [info     ] CQL_20220420202325: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.00033122823949445756, 'time_algorithm_update': 0.01821540740498325, 'temp_loss': -0.09742518174543716, 'temp': 0.7404931985843949, 'alpha_loss': 6.324655159175047, 'alpha': 0.46713181972852225, 'critic_loss': 428.27699163782665, 'actor_loss': 70.55544912327103, 'time_step': 0.018645676255923265, 'td_error': 60.64403489178026, 'init_value': -124.11946105957031, 'ave_value': -61.529093707544845} step=11286
2022-04-20 20:27.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420202325/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:27.09 [info     ] CQL_20220420202325: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.00033035543229844834, 'time_algorithm_update': 0.018316017256842718, 'temp_loss': -0.07323142103796378, 'temp': 0.7455023990737067, 'alpha_loss': 5.894737726066545, 'alpha': 0.4523694023751376, 'critic_loss': 441.26565774839526, 'actor_loss': 71.75843864574767, 'time_step': 0.018747626689442416, 'td_error': 64.20718678260752, 'init_value': -127.45039367675781, 'ave_value': -62.27070543744274} step=11628
2022-04-20 20:27.09 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420202325/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:27.15 [info     ] CQL_20220420202325: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.000334860288608841, 'time_algorithm_update': 0.01817613108116284, 'temp_loss': -0.06802759096244274, 'temp': 0.7515691884777003, 'alpha_loss': 5.943914805239404, 'alpha': 0.43812075980574067, 'critic_loss': 452.775118197614, 'actor_loss': 72.9561312268352, 'time_step': 0.018610828104074936, 'td_error': 65.63269863865077, 'init_value': -130.23724365234375, 'ave_value': -63.067110372781755} step=11970
2022-04-20 20:27.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420202325/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:27.22 [info     ] CQL_20220420202325: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.0003410034012376216, 'time_algorithm_update': 0.018336422959266352, 'temp_loss': -0.087691685277424, 'temp': 0.7581339272839284, 'alpha_loss': 5.83419378459105, 'alpha': 0.42405172032222416, 'critic_loss': 465.67359879700064, 'actor_loss': 74.05466837074324, 'time_step': 0.018777387881139567, 'td_error': 65.46718758851733, 'init_value': -132.32899475097656, 'ave_value': -63.891934841463154} step=12312
2022-04-20 20:27.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420202325/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:27.29 [info     ] CQL_20220420202325: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.000337627198961046, 'time_algorithm_update': 0.018292254871792264, 'temp_loss': -0.04944548461237672, 'temp': 0.76371297222829, 'alpha_loss': 5.532078036787914, 'alpha': 0.41081785197146453, 'critic_loss': 478.3061535930076, 'actor_loss': 75.14194168403135, 'time_step': 0.018730497499655562, 'td_error': 65.51832860121166, 'init_value': -134.35861206054688, 'ave_value': -65.20717704912295} step=12654
2022-04-20 20:27.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420202325/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:27.35 [info     ] CQL_20220420202325: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.00033144295564171863, 'time_algorithm_update': 0.018199953419423244, 'temp_loss': -0.06293510971863193, 'temp': 0.7677481259867461, 'alpha_loss': 5.471947161077756, 'alpha': 0.3981293563605749, 'critic_loss': 488.6913309376142, 'actor_loss': 76.14369953445524, 'time_step': 0.018629159146582173, 'td_error': 67.78030493978461, 'init_value': -137.4792022705078, 'ave_value': -66.37183315448664} step=12996
2022-04-20 20:27.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420202325/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:27.42 [info     ] CQL_20220420202325: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.00033576795232226277, 'time_algorithm_update': 0.018189379346300983, 'temp_loss': -0.04026170930013671, 'temp': 0.7709565629736025, 'alpha_loss': 5.427626996012459, 'alpha': 0.3856835089928923, 'critic_loss': 501.01041345429, 'actor_loss': 77.21114145245468, 'time_step': 0.018623043222036974, 'td_error': 69.65392374674497, 'init_value': -138.513671875, 'ave_value': -66.16728774240694} step=13338
2022-04-20 20:27.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420202325/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:27.48 [info     ] CQL_20220420202325: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.00033014280754223204, 'time_algorithm_update': 0.01809759655891106, 'temp_loss': 0.006978197039611507, 'temp': 0.7726142812542051, 'alpha_loss': 5.094788211828087, 'alpha': 0.37368793576432946, 'critic_loss': 513.3330583181995, 'actor_loss': 78.2548019453796, 'time_step': 0.018526830868414272, 'td_error': 70.86891369098176, 'init_value': -141.15567016601562, 'ave_value': -68.24195663166476} step=13680
2022-04-20 20:27.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420202325/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:27.55 [info     ] CQL_20220420202325: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.00033421683729740613, 'time_algorithm_update': 0.01823914260195013, 'temp_loss': 0.001115100529066652, 'temp': 0.7728119834124694, 'alpha_loss': 5.023819596446746, 'alpha': 0.36223066053543873, 'critic_loss': 524.3555672628838, 'actor_loss': 79.1122824239452, 'time_step': 0.018675230399906984, 'td_error': 69.9347203188281, 'init_value': -142.35049438476562, 'ave_value': -68.02016965613038} step=14022
2022-04-20 20:27.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420202325/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:28.02 [info     ] CQL_20220420202325: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.00033165558039793494, 'time_algorithm_update': 0.018157127307869537, 'temp_loss': -0.01609862897523314, 'temp': 0.7729966394733965, 'alpha_loss': 4.928367685156259, 'alpha': 0.3512062605535775, 'critic_loss': 535.170812149494, 'actor_loss': 80.1106053625631, 'time_step': 0.01858761575486925, 'td_error': 71.30137600864813, 'init_value': -143.9326629638672, 'ave_value': -69.61126710440125} step=14364
2022-04-20 20:28.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420202325/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:28.08 [info     ] CQL_20220420202325: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.0003322690551043951, 'time_algorithm_update': 0.018216432883725528, 'temp_loss': 0.008237770633303632, 'temp': 0.7735758375006112, 'alpha_loss': 4.751749676570558, 'alpha': 0.3402412328106618, 'critic_loss': 546.1506229869107, 'actor_loss': 81.01613892449274, 'time_step': 0.018645971142060576, 'td_error': 70.08874505252223, 'init_value': -147.27468872070312, 'ave_value': -70.9774548381379} step=14706
2022-04-20 20:28.08 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420202325/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:28.15 [info     ] CQL_20220420202325: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.000331822891681515, 'time_algorithm_update': 0.018246889811510233, 'temp_loss': 0.025602161268872475, 'temp': 0.7713293110766606, 'alpha_loss': 4.526916235511066, 'alpha': 0.3300777276357015, 'critic_loss': 556.7340684857285, 'actor_loss': 81.74583651447854, 'time_step': 0.018679593738756682, 'td_error': 75.0917873756095, 'init_value': -148.99356079101562, 'ave_value': -72.47989050375314} step=15048
2022-04-20 20:28.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420202325/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:28.22 [info     ] CQL_20220420202325: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.00033407043992427356, 'time_algorithm_update': 0.01830673845190751, 'temp_loss': 0.012023231777095656, 'temp': 0.7701292637495967, 'alpha_loss': 4.525905907502648, 'alpha': 0.319851930291332, 'critic_loss': 567.5649414954827, 'actor_loss': 82.69167613425451, 'time_step': 0.018739830680757935, 'td_error': 77.39231549042286, 'init_value': -148.5699462890625, 'ave_value': -71.81679248835992} step=15390
2022-04-20 20:28.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420202325/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:28.28 [info     ] CQL_20220420202325: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.0003320898926048948, 'time_algorithm_update': 0.018233011340537265, 'temp_loss': 0.002735050961549519, 'temp': 0.7698280474944421, 'alpha_loss': 4.300311591890123, 'alpha': 0.31012335140802705, 'critic_loss': 576.62956371642, 'actor_loss': 83.27149717989023, 'time_step': 0.01866487313432303, 'td_error': 75.25793809446688, 'init_value': -149.26528930664062, 'ave_value': -71.87931548847808} step=15732
2022-04-20 20:28.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420202325/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:28.35 [info     ] CQL_20220420202325: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.0003336193965889557, 'time_algorithm_update': 0.018255182177002668, 'temp_loss': 0.003654507737693905, 'temp': 0.7694503956022318, 'alpha_loss': 4.180329048145584, 'alpha': 0.30074570428209696, 'critic_loss': 584.8597437986854, 'actor_loss': 84.05017596239234, 'time_step': 0.018690532411051074, 'td_error': 74.26022070186245, 'init_value': -151.5650634765625, 'ave_value': -73.41821692707168} step=16074
2022-04-20 20:28.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420202325/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:28.42 [info     ] CQL_20220420202325: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.0003309040738825212, 'time_algorithm_update': 0.01819462316078052, 'temp_loss': 0.0009774633993705113, 'temp': 0.7691209595105801, 'alpha_loss': 4.101503053603814, 'alpha': 0.2916036267378177, 'critic_loss': 592.9879759849861, 'actor_loss': 84.69849981341446, 'time_step': 0.01862364623978821, 'td_error': 77.71459087095324, 'init_value': -153.49716186523438, 'ave_value': -74.32784403917086} step=16416
2022-04-20 20:28.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420202325/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:28.48 [info     ] CQL_20220420202325: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.00033347021069443015, 'time_algorithm_update': 0.01808638949143259, 'temp_loss': 0.038151263085068665, 'temp': 0.7683556898992662, 'alpha_loss': 3.955799903088843, 'alpha': 0.2827342624029918, 'critic_loss': 602.4962017215483, 'actor_loss': 85.46876396491514, 'time_step': 0.018521324235793443, 'td_error': 78.20529404057604, 'init_value': -152.4642791748047, 'ave_value': -73.97137856984864} step=16758
2022-04-20 20:28.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420202325/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:28.55 [info     ] CQL_20220420202325: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.0003355225624396787, 'time_algorithm_update': 0.018070584849307413, 'temp_loss': 0.04603784483971826, 'temp': 0.7652967523413094, 'alpha_loss': 3.713309177181177, 'alpha': 0.274228765514859, 'critic_loss': 610.9692257886742, 'actor_loss': 85.97632906729714, 'time_step': 0.018509434677704037, 'td_error': 75.77020360973437, 'init_value': -154.76974487304688, 'ave_value': -74.44238737800771} step=17100
2022-04-20 20:28.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420202325/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.5191

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 20:28.55 [info     ] FQE_20220420202855: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00014549996479448066, 'time_algorithm_update': 0.0020724592438663343, 'loss': 0.007738993306790131, 'time_step': 0.002288179225232228, 'init_value': -0.2563546299934387, 'ave_value': -0.2086343486902413, 'soft_opc': nan} step=166




2022-04-20 20:28.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202855/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:28.56 [info     ] FQE_20220420202855: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00014683281082704844, 'time_algorithm_update': 0.0021056884742644898, 'loss': 0.006199319283085236, 'time_step': 0.0023196030812091136, 'init_value': -0.3757064938545227, 'ave_value': -0.2821208236151719, 'soft_opc': nan} step=332




2022-04-20 20:28.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202855/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:28.56 [info     ] FQE_20220420202855: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00014484072306069983, 'time_algorithm_update': 0.002065941511866558, 'loss': 0.006065372874152409, 'time_step': 0.002280289868274367, 'init_value': -0.4318046569824219, 'ave_value': -0.3071063997609927, 'soft_opc': nan} step=498




2022-04-20 20:28.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202855/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:28.57 [info     ] FQE_20220420202855: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00014787984181599445, 'time_algorithm_update': 0.0020512313727872916, 'loss': 0.005962829648359981, 'time_step': 0.002266977206770196, 'init_value': -0.5078966617584229, 'ave_value': -0.33553054246732644, 'soft_opc': nan} step=664




2022-04-20 20:28.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202855/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:28.57 [info     ] FQE_20220420202855: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00014846439821174345, 'time_algorithm_update': 0.0020755888467811675, 'loss': 0.00593087817289788, 'time_step': 0.0022930897862078197, 'init_value': -0.599773645401001, 'ave_value': -0.39610588353060117, 'soft_opc': nan} step=830




2022-04-20 20:28.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202855/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:28.57 [info     ] FQE_20220420202855: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00014707266566265062, 'time_algorithm_update': 0.0021244474204189807, 'loss': 0.005668006067343774, 'time_step': 0.002341411199914404, 'init_value': -0.6016441583633423, 'ave_value': -0.3855838197157592, 'soft_opc': nan} step=996




2022-04-20 20:28.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202855/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:28.58 [info     ] FQE_20220420202855: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.0001528435442821089, 'time_algorithm_update': 0.002091106162013778, 'loss': 0.005479862121023597, 'time_step': 0.0023173179971166403, 'init_value': -0.6617566347122192, 'ave_value': -0.4174906766318993, 'soft_opc': nan} step=1162




2022-04-20 20:28.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202855/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:28.58 [info     ] FQE_20220420202855: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.0001459064253841538, 'time_algorithm_update': 0.0020751148821359657, 'loss': 0.005142410619588322, 'time_step': 0.002291205417679017, 'init_value': -0.750281810760498, 'ave_value': -0.47835779638608567, 'soft_opc': nan} step=1328




2022-04-20 20:28.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202855/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:28.59 [info     ] FQE_20220420202855: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.0001483092825096774, 'time_algorithm_update': 0.002096907202019749, 'loss': 0.004741417562465621, 'time_step': 0.002317257674343615, 'init_value': -0.7837598919868469, 'ave_value': -0.5015978584485548, 'soft_opc': nan} step=1494




2022-04-20 20:28.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202855/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:28.59 [info     ] FQE_20220420202855: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.0001447990716221821, 'time_algorithm_update': 0.0020463696445327207, 'loss': 0.004707753694762128, 'time_step': 0.0022564551916467138, 'init_value': -0.8735780715942383, 'ave_value': -0.55569799746479, 'soft_opc': nan} step=1660




2022-04-20 20:28.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202855/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:29.00 [info     ] FQE_20220420202855: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.0001509103430322854, 'time_algorithm_update': 0.0021602848926222467, 'loss': 0.0043993735740645735, 'time_step': 0.0023868356842592538, 'init_value': -0.904219388961792, 'ave_value': -0.5674558295473994, 'soft_opc': nan} step=1826




2022-04-20 20:29.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202855/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:29.00 [info     ] FQE_20220420202855: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00014963063849024027, 'time_algorithm_update': 0.0020956145711691983, 'loss': 0.004149942052219601, 'time_step': 0.0023161402667861387, 'init_value': -0.9448295831680298, 'ave_value': -0.5953757038381924, 'soft_opc': nan} step=1992




2022-04-20 20:29.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202855/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:29.00 [info     ] FQE_20220420202855: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.0001559903822749494, 'time_algorithm_update': 0.002089118383016931, 'loss': 0.0041893609646561905, 'time_step': 0.0023119607603693582, 'init_value': -1.0227316617965698, 'ave_value': -0.647997727204819, 'soft_opc': nan} step=2158




2022-04-20 20:29.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202855/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:29.01 [info     ] FQE_20220420202855: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00014936205852462584, 'time_algorithm_update': 0.002069368419877018, 'loss': 0.004175375259890928, 'time_step': 0.002285858234727239, 'init_value': -1.0945990085601807, 'ave_value': -0.6898966899175231, 'soft_opc': nan} step=2324




2022-04-20 20:29.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202855/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:29.01 [info     ] FQE_20220420202855: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.0001528406717691077, 'time_algorithm_update': 0.002152045089078237, 'loss': 0.00397033394378868, 'time_step': 0.002379110060542463, 'init_value': -1.1549797058105469, 'ave_value': -0.731742599405147, 'soft_opc': nan} step=2490




2022-04-20 20:29.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202855/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:29.02 [info     ] FQE_20220420202855: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00014530463391039744, 'time_algorithm_update': 0.002048699252576713, 'loss': 0.004399317635801138, 'time_step': 0.002263892127806882, 'init_value': -1.1897797584533691, 'ave_value': -0.7441679750527214, 'soft_opc': nan} step=2656




2022-04-20 20:29.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202855/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:29.02 [info     ] FQE_20220420202855: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00015517889735210374, 'time_algorithm_update': 0.002138532787920481, 'loss': 0.003952723835224667, 'time_step': 0.0023647331329713383, 'init_value': -1.2628722190856934, 'ave_value': -0.7880027792341, 'soft_opc': nan} step=2822




2022-04-20 20:29.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202855/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:29.03 [info     ] FQE_20220420202855: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.0001501577446259648, 'time_algorithm_update': 0.0021657383585550697, 'loss': 0.004063778239897037, 'time_step': 0.0023884658353874482, 'init_value': -1.2844332456588745, 'ave_value': -0.7993028285871218, 'soft_opc': nan} step=2988




2022-04-20 20:29.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202855/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:29.03 [info     ] FQE_20220420202855: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00015185683606618858, 'time_algorithm_update': 0.0021163584238075347, 'loss': 0.004297502683223146, 'time_step': 0.002341192888926311, 'init_value': -1.3412625789642334, 'ave_value': -0.8338205392732545, 'soft_opc': nan} step=3154




2022-04-20 20:29.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202855/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:29.03 [info     ] FQE_20220420202855: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.0001504378146435841, 'time_algorithm_update': 0.0021364645785595998, 'loss': 0.004187323493849352, 'time_step': 0.0023565866860998682, 'init_value': -1.4054147005081177, 'ave_value': -0.8821060658264738, 'soft_opc': nan} step=3320




2022-04-20 20:29.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202855/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:29.04 [info     ] FQE_20220420202855: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00014631144971732633, 'time_algorithm_update': 0.002041451902274626, 'loss': 0.004426384280177663, 'time_step': 0.0022574045571936182, 'init_value': -1.4603359699249268, 'ave_value': -0.92355789379846, 'soft_opc': nan} step=3486




2022-04-20 20:29.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202855/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:29.04 [info     ] FQE_20220420202855: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.0001488220260803958, 'time_algorithm_update': 0.002050316477396402, 'loss': 0.004451157422792391, 'time_step': 0.002268515437482351, 'init_value': -1.5222687721252441, 'ave_value': -0.9484489354749654, 'soft_opc': nan} step=3652




2022-04-20 20:29.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202855/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:29.05 [info     ] FQE_20220420202855: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00014704394053263837, 'time_algorithm_update': 0.0020653655730098128, 'loss': 0.0045292817809372155, 'time_step': 0.0022815954254334233, 'init_value': -1.5677481889724731, 'ave_value': -0.9794143085753931, 'soft_opc': nan} step=3818




2022-04-20 20:29.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202855/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:29.05 [info     ] FQE_20220420202855: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00015252325908247247, 'time_algorithm_update': 0.002107703542134848, 'loss': 0.004669059257314776, 'time_step': 0.0023323369313435383, 'init_value': -1.6073687076568604, 'ave_value': -0.9997819926195384, 'soft_opc': nan} step=3984




2022-04-20 20:29.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202855/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:29.06 [info     ] FQE_20220420202855: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.0001485074859067618, 'time_algorithm_update': 0.002032397741294769, 'loss': 0.005010473988641682, 'time_step': 0.0022472387336822876, 'init_value': -1.6634371280670166, 'ave_value': -1.0261056652007339, 'soft_opc': nan} step=4150




2022-04-20 20:29.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202855/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:29.06 [info     ] FQE_20220420202855: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00014395742531282357, 'time_algorithm_update': 0.002046290650425187, 'loss': 0.005212350115208067, 'time_step': 0.0022580336375408864, 'init_value': -1.8072015047073364, 'ave_value': -1.1615430995771612, 'soft_opc': nan} step=4316




2022-04-20 20:29.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202855/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:29.06 [info     ] FQE_20220420202855: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00014496567737625306, 'time_algorithm_update': 0.0020647307476365424, 'loss': 0.005421275128498225, 'time_step': 0.002279735473265131, 'init_value': -1.851393461227417, 'ave_value': -1.192949243598022, 'soft_opc': nan} step=4482




2022-04-20 20:29.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202855/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:29.07 [info     ] FQE_20220420202855: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00014759115425937147, 'time_algorithm_update': 0.0021206887371568793, 'loss': 0.00573061125953289, 'time_step': 0.00233818680407053, 'init_value': -1.92104172706604, 'ave_value': -1.2478765461526744, 'soft_opc': nan} step=4648




2022-04-20 20:29.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202855/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:29.07 [info     ] FQE_20220420202855: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00014437250344150038, 'time_algorithm_update': 0.0020695407706570914, 'loss': 0.00606698513939887, 'time_step': 0.0022864657712269977, 'init_value': -1.9812432527542114, 'ave_value': -1.282496580646818, 'soft_opc': nan} step=4814




2022-04-20 20:29.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202855/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:29.08 [info     ] FQE_20220420202855: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00014978000916630388, 'time_algorithm_update': 0.00205967799726739, 'loss': 0.0066185123420387775, 'time_step': 0.0022786439183246658, 'init_value': -1.9981517791748047, 'ave_value': -1.2984120588145547, 'soft_opc': nan} step=4980




2022-04-20 20:29.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202855/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:29.08 [info     ] FQE_20220420202855: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.0001471358609486775, 'time_algorithm_update': 0.0020811687032860444, 'loss': 0.006460770236676088, 'time_step': 0.002297015075223992, 'init_value': -2.0459513664245605, 'ave_value': -1.3085387064311822, 'soft_opc': nan} step=5146




2022-04-20 20:29.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202855/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:29.09 [info     ] FQE_20220420202855: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00014819869075913028, 'time_algorithm_update': 0.002074214349310082, 'loss': 0.006813478728346751, 'time_step': 0.00229189338454281, 'init_value': -2.112321615219116, 'ave_value': -1.3781083216941035, 'soft_opc': nan} step=5312




2022-04-20 20:29.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202855/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:29.09 [info     ] FQE_20220420202855: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.0001456766243440559, 'time_algorithm_update': 0.002037543848336461, 'loss': 0.006690353409706962, 'time_step': 0.0022494879113622458, 'init_value': -2.144469738006592, 'ave_value': -1.3837735891946263, 'soft_opc': nan} step=5478




2022-04-20 20:29.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202855/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:29.09 [info     ] FQE_20220420202855: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00014933764216411546, 'time_algorithm_update': 0.0021006644490253494, 'loss': 0.007456855107160532, 'time_step': 0.0023206759648150706, 'init_value': -2.258871078491211, 'ave_value': -1.519614530028356, 'soft_opc': nan} step=5644




2022-04-20 20:29.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202855/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:29.10 [info     ] FQE_20220420202855: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.0001521153622362987, 'time_algorithm_update': 0.002092115850333708, 'loss': 0.007586540833132409, 'time_step': 0.0023149898253291487, 'init_value': -2.255413293838501, 'ave_value': -1.4832986178631717, 'soft_opc': nan} step=5810




2022-04-20 20:29.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202855/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:29.10 [info     ] FQE_20220420202855: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00014649529054940464, 'time_algorithm_update': 0.002092846904892519, 'loss': 0.007753179215051862, 'time_step': 0.0023109079843544097, 'init_value': -2.2704124450683594, 'ave_value': -1.4942116893316106, 'soft_opc': nan} step=5976




2022-04-20 20:29.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202855/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:29.11 [info     ] FQE_20220420202855: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.000148066555161074, 'time_algorithm_update': 0.0020535265106752694, 'loss': 0.008069657671534601, 'time_step': 0.0022706267345382505, 'init_value': -2.339141845703125, 'ave_value': -1.5706999975583844, 'soft_opc': nan} step=6142




2022-04-20 20:29.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202855/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:29.11 [info     ] FQE_20220420202855: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00015145181173301605, 'time_algorithm_update': 0.0021141724414136037, 'loss': 0.008209913776183209, 'time_step': 0.002337263291140637, 'init_value': -2.2197439670562744, 'ave_value': -1.486178215972341, 'soft_opc': nan} step=6308




2022-04-20 20:29.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202855/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:29.12 [info     ] FQE_20220420202855: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00014845290815973855, 'time_algorithm_update': 0.0020205514976777226, 'loss': 0.008485648904489466, 'time_step': 0.0022372711135680416, 'init_value': -2.3274002075195312, 'ave_value': -1.5572241155254478, 'soft_opc': nan} step=6474




2022-04-20 20:29.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202855/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:29.12 [info     ] FQE_20220420202855: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.0001486898904823395, 'time_algorithm_update': 0.002105557774922934, 'loss': 0.00905407954733931, 'time_step': 0.002324260861040598, 'init_value': -2.3180179595947266, 'ave_value': -1.5432285533429266, 'soft_opc': nan} step=6640




2022-04-20 20:29.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202855/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:29.12 [info     ] FQE_20220420202855: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.0001494511064276638, 'time_algorithm_update': 0.00210841305284615, 'loss': 0.008819288554522827, 'time_step': 0.002331812697720815, 'init_value': -2.3864524364471436, 'ave_value': -1.6307717220991984, 'soft_opc': nan} step=6806




2022-04-20 20:29.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202855/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:29.13 [info     ] FQE_20220420202855: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00014716458607868976, 'time_algorithm_update': 0.0020769690892782555, 'loss': 0.009255694301170578, 'time_step': 0.002294642379484981, 'init_value': -2.475594997406006, 'ave_value': -1.711002458819096, 'soft_opc': nan} step=6972




2022-04-20 20:29.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202855/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:29.13 [info     ] FQE_20220420202855: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00014820587204163334, 'time_algorithm_update': 0.0020759407296238176, 'loss': 0.01001103820506461, 'time_step': 0.0022957698408379613, 'init_value': -2.563053607940674, 'ave_value': -1.803797411432003, 'soft_opc': nan} step=7138




2022-04-20 20:29.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202855/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:29.14 [info     ] FQE_20220420202855: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00015210818095379565, 'time_algorithm_update': 0.002121122486620064, 'loss': 0.010121419618371874, 'time_step': 0.0023434363215802663, 'init_value': -2.6654365062713623, 'ave_value': -1.8767047492669777, 'soft_opc': nan} step=7304




2022-04-20 20:29.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202855/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:29.14 [info     ] FQE_20220420202855: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00015124786330992916, 'time_algorithm_update': 0.002103987946567765, 'loss': 0.010417507638795728, 'time_step': 0.0023251355412494705, 'init_value': -2.6683030128479004, 'ave_value': -1.8799514609414179, 'soft_opc': nan} step=7470




2022-04-20 20:29.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202855/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:29.15 [info     ] FQE_20220420202855: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.0001533534153398261, 'time_algorithm_update': 0.002117619457015072, 'loss': 0.010899285515854472, 'time_step': 0.002340976014194718, 'init_value': -2.713195562362671, 'ave_value': -1.920187538852999, 'soft_opc': nan} step=7636




2022-04-20 20:29.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202855/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:29.15 [info     ] FQE_20220420202855: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.0001485175397022661, 'time_algorithm_update': 0.002100104308990111, 'loss': 0.01126192988552916, 'time_step': 0.0023162336234586784, 'init_value': -2.7930402755737305, 'ave_value': -2.00512053442884, 'soft_opc': nan} step=7802




2022-04-20 20:29.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202855/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:29.15 [info     ] FQE_20220420202855: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.0001509448131883001, 'time_algorithm_update': 0.002121478678232216, 'loss': 0.011699429362541041, 'time_step': 0.002341984266258148, 'init_value': -2.817441463470459, 'ave_value': -2.0367242566083332, 'soft_opc': nan} step=7968




2022-04-20 20:29.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202855/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:29.16 [info     ] FQE_20220420202855: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.0001455660325935088, 'time_algorithm_update': 0.0020057307668479092, 'loss': 0.012562657842491136, 'time_step': 0.0022163778902536415, 'init_value': -2.8769400119781494, 'ave_value': -2.0574078744126334, 'soft_opc': nan} step=8134




2022-04-20 20:29.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202855/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:29.16 [info     ] FQE_20220420202855: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00014837822282170676, 'time_algorithm_update': 0.002062708498483681, 'loss': 0.012845632766025612, 'time_step': 0.0022760342402630544, 'init_value': -2.9523117542266846, 'ave_value': -2.1410567298615435, 'soft_opc': nan} step=8300




2022-04-20 20:29.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202855/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

[ 0.00000000e+00  7.95731469e+08  4.27108923e-02  1.24000047e-02
  1.42999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.34732792e-01  6.00000000e-01  3.37421461e-01]
Read chunk # 39 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.38489108e-01  4.94000047e-02
 -1.56000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -7.49080829e-02  7.04145269e-02]
Read chunk # 40 out of 4999
torch.Size([44400, 6])
2022-04-20 20:29.17 [debug    ] RoundIterator is selected.
2022-04-20 20:29.17 [info     ] Directory is created at d3rlpy_logs/FQE_20220420202917
2022-04-20 20:29.17 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 20:29.17 [debug    ] Building models...
2022-04-20 20:29.17 [debug    ] Models have been built.
2022-04-20 20:29.17 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420202917/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size':

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 20:29.18 [info     ] FQE_20220420202917: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00015442939691765365, 'time_algorithm_update': 0.0021419809308162955, 'loss': 0.023922350493173095, 'time_step': 0.0023692922536716903, 'init_value': -1.0568047761917114, 'ave_value': -1.061199296104747, 'soft_opc': nan} step=344




2022-04-20 20:29.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202917/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:29.18 [info     ] FQE_20220420202917: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00015508088954659395, 'time_algorithm_update': 0.002051065827524939, 'loss': 0.02187289099674672, 'time_step': 0.0022787874521211136, 'init_value': -1.897391676902771, 'ave_value': -1.8907413817069552, 'soft_opc': nan} step=688




2022-04-20 20:29.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202917/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:29.19 [info     ] FQE_20220420202917: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00015335027561631313, 'time_algorithm_update': 0.0020667363044827485, 'loss': 0.025283182884544828, 'time_step': 0.0022920363171156062, 'init_value': -3.0041608810424805, 'ave_value': -2.9806437047483683, 'soft_opc': nan} step=1032




2022-04-20 20:29.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202917/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:29.20 [info     ] FQE_20220420202917: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00015623624934706578, 'time_algorithm_update': 0.002126078966052033, 'loss': 0.027155887327416865, 'time_step': 0.0023541554462078004, 'init_value': -3.7102293968200684, 'ave_value': -3.681332680180266, 'soft_opc': nan} step=1376




2022-04-20 20:29.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202917/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:29.21 [info     ] FQE_20220420202917: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00015233491742333701, 'time_algorithm_update': 0.0020335226557975593, 'loss': 0.03190547717886782, 'time_step': 0.0022540688514709473, 'init_value': -4.6774821281433105, 'ave_value': -4.656242127206411, 'soft_opc': nan} step=1720




2022-04-20 20:29.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202917/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:29.22 [info     ] FQE_20220420202917: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00015486603559449662, 'time_algorithm_update': 0.0020821780659431633, 'loss': 0.03744541642223593, 'time_step': 0.002310486726982649, 'init_value': -5.341893672943115, 'ave_value': -5.365980967369166, 'soft_opc': nan} step=2064




2022-04-20 20:29.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202917/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:29.23 [info     ] FQE_20220420202917: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.000154207612192908, 'time_algorithm_update': 0.0021028955315434655, 'loss': 0.04758423976166997, 'time_step': 0.0023309165655180466, 'init_value': -6.215432167053223, 'ave_value': -6.345578876178007, 'soft_opc': nan} step=2408




2022-04-20 20:29.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202917/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:29.24 [info     ] FQE_20220420202917: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00015653150026188341, 'time_algorithm_update': 0.0020640062731365825, 'loss': 0.05800908460649987, 'time_step': 0.0022941398066143657, 'init_value': -6.803183555603027, 'ave_value': -7.066883032493763, 'soft_opc': nan} step=2752




2022-04-20 20:29.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202917/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:29.24 [info     ] FQE_20220420202917: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00015161273091338401, 'time_algorithm_update': 0.002044723477474479, 'loss': 0.07105741925201876, 'time_step': 0.0022677883159282594, 'init_value': -7.2820587158203125, 'ave_value': -7.709193894629543, 'soft_opc': nan} step=3096




2022-04-20 20:29.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202917/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:29.25 [info     ] FQE_20220420202917: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00015471425167349882, 'time_algorithm_update': 0.002064742321191832, 'loss': 0.08886185010386155, 'time_step': 0.002292442460392797, 'init_value': -8.031564712524414, 'ave_value': -8.663119664264691, 'soft_opc': nan} step=3440




2022-04-20 20:29.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202917/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:29.26 [info     ] FQE_20220420202917: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.0001504206380178762, 'time_algorithm_update': 0.0020785872326340785, 'loss': 0.10220672184774696, 'time_step': 0.0022984770841376727, 'init_value': -8.421768188476562, 'ave_value': -9.158313342123419, 'soft_opc': nan} step=3784




2022-04-20 20:29.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202917/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:29.27 [info     ] FQE_20220420202917: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00015224481737890908, 'time_algorithm_update': 0.002044934866040252, 'loss': 0.12155701251982083, 'time_step': 0.002269160608912623, 'init_value': -9.086688995361328, 'ave_value': -10.047959765228065, 'soft_opc': nan} step=4128




2022-04-20 20:29.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202917/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:29.28 [info     ] FQE_20220420202917: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.0001549505910208059, 'time_algorithm_update': 0.0020698613898698674, 'loss': 0.14271312692408386, 'time_step': 0.0022941307966099227, 'init_value': -9.693391799926758, 'ave_value': -10.772201794828918, 'soft_opc': nan} step=4472




2022-04-20 20:29.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202917/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:29.29 [info     ] FQE_20220420202917: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00015421592912008596, 'time_algorithm_update': 0.002059066018392873, 'loss': 0.16269192720633432, 'time_step': 0.0022848310858704325, 'init_value': -10.402091026306152, 'ave_value': -11.686508052883385, 'soft_opc': nan} step=4816




2022-04-20 20:29.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202917/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:29.30 [info     ] FQE_20220420202917: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00014967211457185968, 'time_algorithm_update': 0.0020636236944863964, 'loss': 0.1850056968423603, 'time_step': 0.002279880434967751, 'init_value': -10.863801002502441, 'ave_value': -12.250332773805738, 'soft_opc': nan} step=5160




2022-04-20 20:29.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202917/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:29.30 [info     ] FQE_20220420202917: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00015321997709052506, 'time_algorithm_update': 0.0020683393921963004, 'loss': 0.20630239948716966, 'time_step': 0.00229312306226686, 'init_value': -11.367203712463379, 'ave_value': -12.999420853697503, 'soft_opc': nan} step=5504




2022-04-20 20:29.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202917/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:29.31 [info     ] FQE_20220420202917: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00015367047731266466, 'time_algorithm_update': 0.0020773230597030284, 'loss': 0.2228776536892753, 'time_step': 0.002304106257682623, 'init_value': -11.655895233154297, 'ave_value': -13.437193800508975, 'soft_opc': nan} step=5848




2022-04-20 20:29.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202917/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:29.32 [info     ] FQE_20220420202917: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00015658486721127531, 'time_algorithm_update': 0.0020857772161794264, 'loss': 0.23486756211211687, 'time_step': 0.002315207276233407, 'init_value': -12.117364883422852, 'ave_value': -14.03255992996263, 'soft_opc': nan} step=6192




2022-04-20 20:29.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202917/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:29.33 [info     ] FQE_20220420202917: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00015267244605130927, 'time_algorithm_update': 0.0020921854085700457, 'loss': 0.25310686731786836, 'time_step': 0.002317802157512931, 'init_value': -12.598699569702148, 'ave_value': -14.69581504576915, 'soft_opc': nan} step=6536




2022-04-20 20:29.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202917/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:29.34 [info     ] FQE_20220420202917: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.0001520722411399664, 'time_algorithm_update': 0.0020759646282639613, 'loss': 0.266789298520835, 'time_step': 0.0022998881894488667, 'init_value': -12.849250793457031, 'ave_value': -15.22833085962244, 'soft_opc': nan} step=6880




2022-04-20 20:29.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202917/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:29.35 [info     ] FQE_20220420202917: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00015711507131887037, 'time_algorithm_update': 0.00208943389182867, 'loss': 0.28105276896683284, 'time_step': 0.002319066330443981, 'init_value': -13.415847778320312, 'ave_value': -16.03360416830123, 'soft_opc': nan} step=7224




2022-04-20 20:29.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202917/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:29.36 [info     ] FQE_20220420202917: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.0001512322314949923, 'time_algorithm_update': 0.0020581268986990284, 'loss': 0.2945340085709684, 'time_step': 0.0022808486639067185, 'init_value': -13.598867416381836, 'ave_value': -16.430689838434663, 'soft_opc': nan} step=7568




2022-04-20 20:29.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202917/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:29.37 [info     ] FQE_20220420202917: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00015656476797059525, 'time_algorithm_update': 0.002076509386994118, 'loss': 0.30737232639952455, 'time_step': 0.002309177504029385, 'init_value': -13.743518829345703, 'ave_value': -16.92993168691257, 'soft_opc': nan} step=7912




2022-04-20 20:29.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202917/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:29.37 [info     ] FQE_20220420202917: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00015366077423095703, 'time_algorithm_update': 0.002101527397022691, 'loss': 0.31474318543752267, 'time_step': 0.002326868301214174, 'init_value': -13.950895309448242, 'ave_value': -17.183941726708735, 'soft_opc': nan} step=8256




2022-04-20 20:29.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202917/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:29.38 [info     ] FQE_20220420202917: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.000154819599417753, 'time_algorithm_update': 0.0020868611890216206, 'loss': 0.3245730321464497, 'time_step': 0.0023138592409533126, 'init_value': -14.228415489196777, 'ave_value': -17.715605119354016, 'soft_opc': nan} step=8600




2022-04-20 20:29.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202917/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:29.39 [info     ] FQE_20220420202917: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00015515851420025493, 'time_algorithm_update': 0.0020978111167286716, 'loss': 0.3414611843642021, 'time_step': 0.002325737199117971, 'init_value': -14.748147964477539, 'ave_value': -18.38652463020371, 'soft_opc': nan} step=8944




2022-04-20 20:29.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202917/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:29.40 [info     ] FQE_20220420202917: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00015356512956841048, 'time_algorithm_update': 0.0020585018534993015, 'loss': 0.35805377743185257, 'time_step': 0.002279985782712005, 'init_value': -14.981729507446289, 'ave_value': -18.640249786197064, 'soft_opc': nan} step=9288




2022-04-20 20:29.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202917/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:29.41 [info     ] FQE_20220420202917: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00015367810116257777, 'time_algorithm_update': 0.0020385453867357833, 'loss': 0.36507975992335134, 'time_step': 0.0022615298282268434, 'init_value': -14.71314811706543, 'ave_value': -18.460237725942, 'soft_opc': nan} step=9632




2022-04-20 20:29.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202917/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:29.42 [info     ] FQE_20220420202917: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00015652318333470545, 'time_algorithm_update': 0.002120209294696187, 'loss': 0.37650044250496945, 'time_step': 0.00235081204148226, 'init_value': -15.19776439666748, 'ave_value': -18.908149540274998, 'soft_opc': nan} step=9976




2022-04-20 20:29.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202917/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:29.43 [info     ] FQE_20220420202917: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00015412998753924702, 'time_algorithm_update': 0.002094297908073248, 'loss': 0.38380327938817616, 'time_step': 0.0023192153420559195, 'init_value': -15.728080749511719, 'ave_value': -19.54436242529904, 'soft_opc': nan} step=10320




2022-04-20 20:29.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202917/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:29.43 [info     ] FQE_20220420202917: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00015761824541313704, 'time_algorithm_update': 0.0020956708941348764, 'loss': 0.40232429378341106, 'time_step': 0.0023265598818313243, 'init_value': -16.26578140258789, 'ave_value': -20.131501059551294, 'soft_opc': nan} step=10664




2022-04-20 20:29.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202917/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:29.44 [info     ] FQE_20220420202917: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.0001548126686451047, 'time_algorithm_update': 0.0020445938720259557, 'loss': 0.42157307381979947, 'time_step': 0.002269626356834589, 'init_value': -16.95849609375, 'ave_value': -20.77021049720881, 'soft_opc': nan} step=11008




2022-04-20 20:29.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202917/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:29.45 [info     ] FQE_20220420202917: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.000153416117956472, 'time_algorithm_update': 0.0020620691221813823, 'loss': 0.4400296361012341, 'time_step': 0.00228581941405008, 'init_value': -17.2702693939209, 'ave_value': -21.161869273247483, 'soft_opc': nan} step=11352




2022-04-20 20:29.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202917/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:29.46 [info     ] FQE_20220420202917: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00015344245489253553, 'time_algorithm_update': 0.002064469248749489, 'loss': 0.4557031172349356, 'time_step': 0.0022896160913068193, 'init_value': -17.554840087890625, 'ave_value': -21.637768308804915, 'soft_opc': nan} step=11696




2022-04-20 20:29.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202917/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:29.47 [info     ] FQE_20220420202917: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00015143530313358752, 'time_algorithm_update': 0.0020581234333127045, 'loss': 0.47644595166689957, 'time_step': 0.0022815507511759914, 'init_value': -18.250247955322266, 'ave_value': -22.51439202076143, 'soft_opc': nan} step=12040




2022-04-20 20:29.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202917/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:29.48 [info     ] FQE_20220420202917: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00015188025873760845, 'time_algorithm_update': 0.0020598651364792226, 'loss': 0.4863866874731557, 'time_step': 0.002283975828525632, 'init_value': -18.043411254882812, 'ave_value': -22.306798795818747, 'soft_opc': nan} step=12384




2022-04-20 20:29.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202917/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:29.49 [info     ] FQE_20220420202917: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00015564644059469534, 'time_algorithm_update': 0.0020948787068211755, 'loss': 0.48777060849635406, 'time_step': 0.0023227597391882607, 'init_value': -18.04682159423828, 'ave_value': -22.567419910632275, 'soft_opc': nan} step=12728




2022-04-20 20:29.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202917/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:29.49 [info     ] FQE_20220420202917: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.0001536995865577875, 'time_algorithm_update': 0.002063205075818439, 'loss': 0.5013721457541769, 'time_step': 0.0022894442081451416, 'init_value': -18.052627563476562, 'ave_value': -22.6244080256268, 'soft_opc': nan} step=13072




2022-04-20 20:29.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202917/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:29.50 [info     ] FQE_20220420202917: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00015502267105634823, 'time_algorithm_update': 0.0020639501338781314, 'loss': 0.5105583566717457, 'time_step': 0.0022928097913431566, 'init_value': -18.50766944885254, 'ave_value': -23.127640000113228, 'soft_opc': nan} step=13416




2022-04-20 20:29.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202917/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:29.51 [info     ] FQE_20220420202917: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00015650447024855504, 'time_algorithm_update': 0.002100764318954113, 'loss': 0.5224223601967529, 'time_step': 0.002332875894945721, 'init_value': -18.072418212890625, 'ave_value': -22.888720436279687, 'soft_opc': nan} step=13760




2022-04-20 20:29.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202917/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:29.52 [info     ] FQE_20220420202917: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.0001520216464996338, 'time_algorithm_update': 0.002046665479970533, 'loss': 0.5355677592438147, 'time_step': 0.0022693844728691633, 'init_value': -18.728134155273438, 'ave_value': -23.618077840031805, 'soft_opc': nan} step=14104




2022-04-20 20:29.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202917/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:29.53 [info     ] FQE_20220420202917: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00015552792438240938, 'time_algorithm_update': 0.002107111520545427, 'loss': 0.5546584272551415, 'time_step': 0.00233526909074118, 'init_value': -19.004972457885742, 'ave_value': -23.91532847341318, 'soft_opc': nan} step=14448




2022-04-20 20:29.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202917/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:29.54 [info     ] FQE_20220420202917: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00015203204265860625, 'time_algorithm_update': 0.0020705239717350448, 'loss': 0.5774318296245711, 'time_step': 0.0022952383340791214, 'init_value': -19.0255126953125, 'ave_value': -24.075382157307757, 'soft_opc': nan} step=14792




2022-04-20 20:29.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202917/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:29.55 [info     ] FQE_20220420202917: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00015333294868469238, 'time_algorithm_update': 0.002092559670293054, 'loss': 0.5841210572059851, 'time_step': 0.0023213992285173995, 'init_value': -18.918188095092773, 'ave_value': -23.97541164511454, 'soft_opc': nan} step=15136




2022-04-20 20:29.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202917/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:29.56 [info     ] FQE_20220420202917: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00015260105909303178, 'time_algorithm_update': 0.002045175363851148, 'loss': 0.5984287727732471, 'time_step': 0.0022696180399074113, 'init_value': -19.211868286132812, 'ave_value': -24.184502608010227, 'soft_opc': nan} step=15480




2022-04-20 20:29.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202917/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:29.56 [info     ] FQE_20220420202917: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00015485771866731866, 'time_algorithm_update': 0.0020724729050037474, 'loss': 0.6052562688141628, 'time_step': 0.0022989213466644287, 'init_value': -19.23198890686035, 'ave_value': -24.368365661162244, 'soft_opc': nan} step=15824




2022-04-20 20:29.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202917/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:29.57 [info     ] FQE_20220420202917: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00015474890553674034, 'time_algorithm_update': 0.0020369970521261524, 'loss': 0.618184651004497, 'time_step': 0.0022641857003056725, 'init_value': -19.165569305419922, 'ave_value': -24.126913998594826, 'soft_opc': nan} step=16168




2022-04-20 20:29.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202917/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:29.58 [info     ] FQE_20220420202917: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00015486187713090763, 'time_algorithm_update': 0.0020874939685644107, 'loss': 0.6305526678916067, 'time_step': 0.002313943103302357, 'init_value': -19.323257446289062, 'ave_value': -24.280510787429236, 'soft_opc': nan} step=16512




2022-04-20 20:29.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202917/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:29.59 [info     ] FQE_20220420202917: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00015161550322244333, 'time_algorithm_update': 0.002021639153014782, 'loss': 0.6440140366835823, 'time_step': 0.0022454046925833057, 'init_value': -19.659666061401367, 'ave_value': -24.606019319714726, 'soft_opc': nan} step=16856




2022-04-20 20:29.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202917/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:30.00 [info     ] FQE_20220420202917: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00015638734019079873, 'time_algorithm_update': 0.0020463036936382915, 'loss': 0.6755269547642837, 'time_step': 0.0022758834583814754, 'init_value': -20.015111923217773, 'ave_value': -24.86858407080442, 'soft_opc': nan} step=17200




2022-04-20 20:30.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420202917/model_17200.pt
search iteration:  37
using hyper params:  [0.009272732576456291, 0.00488134444526905, 1.1933030412417289e-05, 1]
2022-04-20 20:30.00 [debug    ] RoundIterator is selected.
2022-04-20 20:30.00 [info     ] Directory is created at d3rlpy_logs/CQL_20220420203000
2022-04-20 20:30.00 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 20:30.00 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 20:30.00 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220420203000/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.009272732576456291, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'we

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:30.06 [info     ] CQL_20220420203000: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.00030206657989680416, 'time_algorithm_update': 0.018076757241410817, 'temp_loss': 4.589763339848546, 'temp': 0.9980763449654941, 'alpha_loss': -12.711568185460498, 'alpha': 1.0158596481496132, 'critic_loss': 18.07557407457229, 'actor_loss': -1.0373108544937002, 'time_step': 0.01847630495216414, 'td_error': 4.433598001059918, 'init_value': -1.9541347026824951, 'ave_value': -0.0029259394585817783} step=342
2022-04-20 20:30.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203000/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:30.13 [info     ] CQL_20220420203000: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.00030514998742711474, 'time_algorithm_update': 0.018257163421452394, 'temp_loss': 3.6516275726563747, 'temp': 0.9943835951431453, 'alpha_loss': -4.475011545315123, 'alpha': 1.0378539827134874, 'critic_loss': 26.086365683036938, 'actor_loss': 0.935141245894439, 'time_step': 0.018663402189288223, 'td_error': 5.168474686391299, 'init_value': -3.235430955886841, 'ave_value': 0.05189405254549808} step=684
2022-04-20 20:30.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203000/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:30.20 [info     ] CQL_20220420203000: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.0003036462772659391, 'time_algorithm_update': 0.018070598094784028, 'temp_loss': 2.844209957541081, 'temp': 0.9911855853091903, 'alpha_loss': 0.2860155975495803, 'alpha': 1.0452180462971068, 'critic_loss': 47.63341091669094, 'actor_loss': 2.4674607557162904, 'time_step': 0.018470049601549295, 'td_error': 7.339257623358773, 'init_value': -6.666055202484131, 'ave_value': -0.9506470068174977} step=1026
2022-04-20 20:30.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203000/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:30.26 [info     ] CQL_20220420203000: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.00030445494846990934, 'time_algorithm_update': 0.01815811444444266, 'temp_loss': 2.320692287202467, 'temp': 0.9882728117599822, 'alpha_loss': 3.6085342620199885, 'alpha': 1.0342366092386301, 'critic_loss': 74.13325654414662, 'actor_loss': 4.012325629853366, 'time_step': 0.018563653990539195, 'td_error': 8.29871233661931, 'init_value': -8.61095905303955, 'ave_value': -1.3995767886668176} step=1368
2022-04-20 20:30.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203000/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:30.33 [info     ] CQL_20220420203000: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.00030723231577733804, 'time_algorithm_update': 0.01804633447301318, 'temp_loss': 1.9375643015604966, 'temp': 0.985554381769303, 'alpha_loss': 5.9270576461713915, 'alpha': 1.0058871159079479, 'critic_loss': 105.5670456691095, 'actor_loss': 5.629467683228833, 'time_step': 0.01845209500943011, 'td_error': 11.82436602603898, 'init_value': -12.319555282592773, 'ave_value': -2.373483168525739} step=1710
2022-04-20 20:30.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203000/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:30.39 [info     ] CQL_20220420203000: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.00030334720834653976, 'time_algorithm_update': 0.018050905556706656, 'temp_loss': 1.6185508966445923, 'temp': 0.9829946660507493, 'alpha_loss': 7.6560742074286035, 'alpha': 0.9673320302489208, 'critic_loss': 141.09628369515403, 'actor_loss': 7.324713715335779, 'time_step': 0.01845225883506195, 'td_error': 16.914118398649613, 'init_value': -16.569381713867188, 'ave_value': -3.7427729058077746} step=2052
2022-04-20 20:30.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203000/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:30.46 [info     ] CQL_20220420203000: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.0003074309979265893, 'time_algorithm_update': 0.018091566381398697, 'temp_loss': 1.3484162942707887, 'temp': 0.9805762623137201, 'alpha_loss': 8.877749614548264, 'alpha': 0.9269633504033786, 'critic_loss': 182.22981944837068, 'actor_loss': 9.305908239375778, 'time_step': 0.018500284144752903, 'td_error': 17.8419022201206, 'init_value': -20.253170013427734, 'ave_value': -5.391099628709458} step=2394
2022-04-20 20:30.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203000/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:30.52 [info     ] CQL_20220420203000: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.00027763634397272476, 'time_algorithm_update': 0.01596655413421274, 'temp_loss': 1.1092085953780084, 'temp': 0.9783110236912443, 'alpha_loss': 9.965547215868854, 'alpha': 0.8879558693247231, 'critic_loss': 228.86617493211176, 'actor_loss': 11.538961958466915, 'time_step': 0.016332878703959504, 'td_error': 25.99181312336012, 'init_value': -25.064056396484375, 'ave_value': -7.1698353219032285} step=2736
2022-04-20 20:30.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203000/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:30.58 [info     ] CQL_20220420203000: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.000298555134332668, 'time_algorithm_update': 0.017874969376458064, 'temp_loss': 0.8710699395565261, 'temp': 0.9762489290265312, 'alpha_loss': 10.533081479937012, 'alpha': 0.8512516929740794, 'critic_loss': 280.01933230572973, 'actor_loss': 13.777973805254662, 'time_step': 0.018271015401472124, 'td_error': 23.69704383168262, 'init_value': -29.089990615844727, 'ave_value': -8.86067653454102} step=3078
2022-04-20 20:30.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203000/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:31.05 [info     ] CQL_20220420203000: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.0003026277698271456, 'time_algorithm_update': 0.017970912637766342, 'temp_loss': 0.7271461122565799, 'temp': 0.9743370310953486, 'alpha_loss': 11.013433535893759, 'alpha': 0.8175827618579419, 'critic_loss': 331.27203993769416, 'actor_loss': 16.008184460868613, 'time_step': 0.01837401710755644, 'td_error': 26.125132253959425, 'init_value': -34.26752471923828, 'ave_value': -10.339471582201151} step=3420
2022-04-20 20:31.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203000/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:31.11 [info     ] CQL_20220420203000: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.0002988646602072911, 'time_algorithm_update': 0.017916111221090394, 'temp_loss': 0.5227011303847645, 'temp': 0.9726174886812243, 'alpha_loss': 11.453968220984029, 'alpha': 0.7860460769363314, 'critic_loss': 385.9669751619038, 'actor_loss': 18.538524608165897, 'time_step': 0.018311154075533326, 'td_error': 31.540001893750045, 'init_value': -38.63105010986328, 'ave_value': -13.18045536770477} step=3762
2022-04-20 20:31.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203000/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:31.18 [info     ] CQL_20220420203000: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.00030515556446990077, 'time_algorithm_update': 0.017984261289674636, 'temp_loss': 0.33993259406708487, 'temp': 0.9712899625301361, 'alpha_loss': 11.744468557904339, 'alpha': 0.75636310012717, 'critic_loss': 443.8010160211931, 'actor_loss': 21.00554255435341, 'time_step': 0.018387109912626924, 'td_error': 33.92455698570083, 'init_value': -45.7858772277832, 'ave_value': -16.17872296919157} step=4104
2022-04-20 20:31.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203000/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:31.24 [info     ] CQL_20220420203000: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.000304320402312697, 'time_algorithm_update': 0.01786836406640839, 'temp_loss': 0.18487952188833764, 'temp': 0.9703378998048124, 'alpha_loss': 12.051183009008218, 'alpha': 0.7284589320944067, 'critic_loss': 500.2042455840529, 'actor_loss': 23.411866857294452, 'time_step': 0.018269441978276124, 'td_error': 39.49341043025995, 'init_value': -49.27918243408203, 'ave_value': -17.35160249760559} step=4446
2022-04-20 20:31.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203000/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:31.31 [info     ] CQL_20220420203000: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.00030723649855942753, 'time_algorithm_update': 0.01783724486479285, 'temp_loss': 0.05197647457931474, 'temp': 0.9698348479312763, 'alpha_loss': 12.315991179984913, 'alpha': 0.7017860212172681, 'critic_loss': 559.9080201087639, 'actor_loss': 26.124936734026637, 'time_step': 0.018240979540417767, 'td_error': 55.563400670694634, 'init_value': -55.60492706298828, 'ave_value': -20.50058643143993} step=4788
2022-04-20 20:31.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203000/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:31.37 [info     ] CQL_20220420203000: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.00030337857921221105, 'time_algorithm_update': 0.017963164731075888, 'temp_loss': -0.003647869534529092, 'temp': 0.9697019708784003, 'alpha_loss': 12.216061762201855, 'alpha': 0.6769393102118844, 'critic_loss': 622.7254488760965, 'actor_loss': 28.926179015845584, 'time_step': 0.018363934511329696, 'td_error': 96.12519098991277, 'init_value': -60.3363151550293, 'ave_value': -21.512438830840694} step=5130
2022-04-20 20:31.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203000/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:31.44 [info     ] CQL_20220420203000: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.00030393000931767695, 'time_algorithm_update': 0.01788454794744302, 'temp_loss': -0.028370586218453987, 'temp': 0.9697936044798957, 'alpha_loss': 11.851540176491989, 'alpha': 0.6535380745840351, 'critic_loss': 693.7545933416712, 'actor_loss': 31.869799876073646, 'time_step': 0.018290090282060946, 'td_error': 64.39903036038628, 'init_value': -66.95814514160156, 'ave_value': -24.469421548515825} step=5472
2022-04-20 20:31.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203000/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:31.50 [info     ] CQL_20220420203000: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.0003029073190967939, 'time_algorithm_update': 0.017767871332447432, 'temp_loss': -0.15255127138627028, 'temp': 0.9704613413727075, 'alpha_loss': 12.538840383116963, 'alpha': 0.6310075043934827, 'critic_loss': 761.6329061943188, 'actor_loss': 35.02551880217435, 'time_step': 0.018171200975340012, 'td_error': 79.51153661645125, 'init_value': -69.56893920898438, 'ave_value': -25.92343416550138} step=5814
2022-04-20 20:31.50 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203000/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:31.57 [info     ] CQL_20220420203000: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.00030188044609382136, 'time_algorithm_update': 0.017969723333392227, 'temp_loss': -0.21565672004737005, 'temp': 0.9717158673450961, 'alpha_loss': 11.353408722849617, 'alpha': 0.6098496293463902, 'critic_loss': 828.7611947756761, 'actor_loss': 37.64147656424004, 'time_step': 0.018367631393566466, 'td_error': 78.93046257749104, 'init_value': -75.91860961914062, 'ave_value': -29.000098639560175} step=6156
2022-04-20 20:31.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203000/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:32.03 [info     ] CQL_20220420203000: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.0002896241974412349, 'time_algorithm_update': 0.017159461277967306, 'temp_loss': -0.23558029481237047, 'temp': 0.973636621968788, 'alpha_loss': 11.243586463537829, 'alpha': 0.5903701153066423, 'critic_loss': 895.2972092656364, 'actor_loss': 40.49010971136261, 'time_step': 0.01754201713361238, 'td_error': 72.053680482236, 'init_value': -84.76688385009766, 'ave_value': -31.847206447747375} step=6498
2022-04-20 20:32.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203000/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:32.09 [info     ] CQL_20220420203000: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.0002831450679845977, 'time_algorithm_update': 0.01649918542270772, 'temp_loss': -0.23390270940066255, 'temp': 0.9755663099693276, 'alpha_loss': 11.10693077037209, 'alpha': 0.5713903234716047, 'critic_loss': 966.3072732847337, 'actor_loss': 43.48186753925524, 'time_step': 0.01687441513552303, 'td_error': 78.30347046370943, 'init_value': -89.78943634033203, 'ave_value': -36.79981136237179} step=6840
2022-04-20 20:32.09 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203000/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:32.16 [info     ] CQL_20220420203000: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.0003039244322748909, 'time_algorithm_update': 0.018143566031205028, 'temp_loss': -0.2689090750196524, 'temp': 0.9777142583278188, 'alpha_loss': 10.931072967094288, 'alpha': 0.5532213829414189, 'critic_loss': 1035.8813137478298, 'actor_loss': 46.46829785798725, 'time_step': 0.018547321620740388, 'td_error': 80.54383049643018, 'init_value': -92.67996215820312, 'ave_value': -37.07016076239917} step=7182
2022-04-20 20:32.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203000/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:32.22 [info     ] CQL_20220420203000: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.0003063678741455078, 'time_algorithm_update': 0.01843764558870193, 'temp_loss': -0.3393698632335889, 'temp': 0.9805151837611059, 'alpha_loss': 11.524458549176043, 'alpha': 0.5350398473572313, 'critic_loss': 1093.4296862507424, 'actor_loss': 48.94327598705626, 'time_step': 0.01884324160235667, 'td_error': 85.41708022005088, 'init_value': -99.74813079833984, 'ave_value': -40.12287342094086} step=7524
2022-04-20 20:32.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203000/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:32.29 [info     ] CQL_20220420203000: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.0003018957829614829, 'time_algorithm_update': 0.01810989951529698, 'temp_loss': -0.37429572690866497, 'temp': 0.98373899508638, 'alpha_loss': 11.161881561167757, 'alpha': 0.5172496518196418, 'critic_loss': 1155.8522756476152, 'actor_loss': 51.74794662207888, 'time_step': 0.018512087955809477, 'td_error': 136.5245149526508, 'init_value': -105.6378402709961, 'ave_value': -41.82785149746113} step=7866
2022-04-20 20:32.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203000/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:32.35 [info     ] CQL_20220420203000: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.0003106587114389877, 'time_algorithm_update': 0.01824122423317, 'temp_loss': -0.42356661576450916, 'temp': 0.987241350070775, 'alpha_loss': 11.165893933926409, 'alpha': 0.5001789120554226, 'critic_loss': 1224.7637427257516, 'actor_loss': 54.8019751275492, 'time_step': 0.01864969451525058, 'td_error': 121.70069882132012, 'init_value': -110.5370864868164, 'ave_value': -44.38157607060295} step=8208
2022-04-20 20:32.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203000/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:32.42 [info     ] CQL_20220420203000: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.00030187556618138363, 'time_algorithm_update': 0.01833364698621962, 'temp_loss': -0.45395284190730395, 'temp': 0.9907655423147637, 'alpha_loss': 11.62097033840871, 'alpha': 0.48350238852333605, 'critic_loss': 1287.7495192142956, 'actor_loss': 57.39534189547712, 'time_step': 0.018734635665402774, 'td_error': 174.8146298562622, 'init_value': -119.9193115234375, 'ave_value': -47.7890087061315} step=8550
2022-04-20 20:32.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203000/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:32.49 [info     ] CQL_20220420203000: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.0003068635338231137, 'time_algorithm_update': 0.018274546366686014, 'temp_loss': -0.4883562680116, 'temp': 0.9944636115902349, 'alpha_loss': 11.198161436103241, 'alpha': 0.4672787476178498, 'critic_loss': 1358.279849403783, 'actor_loss': 60.42515987820096, 'time_step': 0.01868356738174171, 'td_error': 189.4323769591705, 'init_value': -123.34110260009766, 'ave_value': -50.5390321639207} step=8892
2022-04-20 20:32.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203000/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:32.55 [info     ] CQL_20220420203000: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.00030348872580723455, 'time_algorithm_update': 0.018305019328468723, 'temp_loss': -0.49165395427255604, 'temp': 0.9982148712141472, 'alpha_loss': 11.680891251703452, 'alpha': 0.4515363425887816, 'critic_loss': 1431.2712680749726, 'actor_loss': 63.55651469537389, 'time_step': 0.018707684606139422, 'td_error': 188.56514110086334, 'init_value': -129.8890380859375, 'ave_value': -52.73751841776006} step=9234
2022-04-20 20:32.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203000/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:33.02 [info     ] CQL_20220420203000: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.00030388748436643365, 'time_algorithm_update': 0.018132552068833022, 'temp_loss': -0.4963676822725792, 'temp': 1.0017884690859165, 'alpha_loss': 11.513301245650354, 'alpha': 0.43634991670212553, 'critic_loss': 1504.6335327862298, 'actor_loss': 66.83300196775916, 'time_step': 0.01853272719689977, 'td_error': 200.6400678808649, 'init_value': -139.05166625976562, 'ave_value': -57.188773963569524} step=9576
2022-04-20 20:33.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203000/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:33.09 [info     ] CQL_20220420203000: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.000311882872330515, 'time_algorithm_update': 0.0182628575821369, 'temp_loss': -0.5048154696538957, 'temp': 1.005455053689187, 'alpha_loss': 11.08142162066454, 'alpha': 0.42200191780837654, 'critic_loss': 1577.2509851288378, 'actor_loss': 69.88376589546426, 'time_step': 0.01867690560413383, 'td_error': 208.1068560747723, 'init_value': -146.82162475585938, 'ave_value': -60.0064170653326} step=9918
2022-04-20 20:33.09 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203000/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:33.15 [info     ] CQL_20220420203000: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.00030556965989676137, 'time_algorithm_update': 0.018041239844428167, 'temp_loss': -0.5286125996264449, 'temp': 1.0090700597093816, 'alpha_loss': 11.205734906837954, 'alpha': 0.4082318102581459, 'critic_loss': 1656.490167628952, 'actor_loss': 73.52586117125394, 'time_step': 0.018446626718978437, 'td_error': 275.76578564488267, 'init_value': -151.46536254882812, 'ave_value': -62.197817078162956} step=10260
2022-04-20 20:33.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203000/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:33.22 [info     ] CQL_20220420203000: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.00030754532730370237, 'time_algorithm_update': 0.0181896512271368, 'temp_loss': -0.5850648231564732, 'temp': 1.0129830174975925, 'alpha_loss': 11.395902990597731, 'alpha': 0.3946873739100339, 'critic_loss': 1729.2943443610654, 'actor_loss': 76.48999791953996, 'time_step': 0.018596518806546752, 'td_error': 246.79381258480288, 'init_value': -161.5626220703125, 'ave_value': -65.98883717476785} step=10602
2022-04-20 20:33.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203000/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:33.28 [info     ] CQL_20220420203000: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.000308057020979318, 'time_algorithm_update': 0.018231219715542264, 'temp_loss': -0.4890945533628178, 'temp': 1.0165985331200718, 'alpha_loss': 9.968609645352727, 'alpha': 0.3821430353567614, 'critic_loss': 1806.0805646215963, 'actor_loss': 79.52022114134671, 'time_step': 0.018640107578701444, 'td_error': 167.03592257147798, 'init_value': -156.28219604492188, 'ave_value': -64.3368471648242} step=10944
2022-04-20 20:33.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203000/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:33.35 [info     ] CQL_20220420203000: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.00030633371475844356, 'time_algorithm_update': 0.01831226111852635, 'temp_loss': -0.4805564634088013, 'temp': 1.0197609666495295, 'alpha_loss': 10.44042262138679, 'alpha': 0.37059587763066876, 'critic_loss': 1882.887939453125, 'actor_loss': 83.07003978260776, 'time_step': 0.018718285170214916, 'td_error': 244.92754510525472, 'init_value': -168.72613525390625, 'ave_value': -68.94964682035618} step=11286
2022-04-20 20:33.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203000/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:33.42 [info     ] CQL_20220420203000: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.0003025838506152058, 'time_algorithm_update': 0.01824070277966951, 'temp_loss': -0.4991231389365524, 'temp': 1.0231939651812727, 'alpha_loss': 10.283917236049273, 'alpha': 0.358571671080171, 'critic_loss': 1967.8756874486019, 'actor_loss': 86.76689306337234, 'time_step': 0.018643457987155134, 'td_error': 246.71915061854224, 'init_value': -178.770263671875, 'ave_value': -74.80304499001116} step=11628
2022-04-20 20:33.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203000/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:33.48 [info     ] CQL_20220420203000: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.0003036964706510131, 'time_algorithm_update': 0.018125845674882856, 'temp_loss': -0.5048214631558162, 'temp': 1.0266612496292382, 'alpha_loss': 10.15232199116757, 'alpha': 0.34725724648662476, 'critic_loss': 2048.2901054516174, 'actor_loss': 90.06584113940858, 'time_step': 0.018527233112625212, 'td_error': 317.2471063985832, 'init_value': -189.37850952148438, 'ave_value': -79.44569434155216} step=11970
2022-04-20 20:33.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203000/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:33.55 [info     ] CQL_20220420203000: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.00030103900976348344, 'time_algorithm_update': 0.01763215608764113, 'temp_loss': -0.5154506518282214, 'temp': 1.030258503922245, 'alpha_loss': 10.045399156927365, 'alpha': 0.33602378757027856, 'critic_loss': 2130.3340072185674, 'actor_loss': 93.657252239205, 'time_step': 0.018030177082931788, 'td_error': 233.61895914409445, 'init_value': -192.5677032470703, 'ave_value': -80.3871333410611} step=12312
2022-04-20 20:33.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203000/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:34.01 [info     ] CQL_20220420203000: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.0002962706381814522, 'time_algorithm_update': 0.017709808042872022, 'temp_loss': -0.4955822357047371, 'temp': 1.033764307959038, 'alpha_loss': 9.024880425971851, 'alpha': 0.325689937897593, 'critic_loss': 2219.822377344321, 'actor_loss': 97.46398274382652, 'time_step': 0.018104706591332866, 'td_error': 326.8280917777174, 'init_value': -202.94332885742188, 'ave_value': -86.45294189862302} step=12654
2022-04-20 20:34.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203000/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:34.08 [info     ] CQL_20220420203000: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.0002990256973177369, 'time_algorithm_update': 0.017723757621140507, 'temp_loss': -0.5399012409956657, 'temp': 1.037222653453113, 'alpha_loss': 9.461005733724226, 'alpha': 0.3159040085404937, 'critic_loss': 2307.940137718156, 'actor_loss': 101.46554108112178, 'time_step': 0.01812279224395752, 'td_error': 396.51740802719365, 'init_value': -207.89126586914062, 'ave_value': -88.873767541604} step=12996
2022-04-20 20:34.08 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203000/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:34.14 [info     ] CQL_20220420203000: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.0002985063352082905, 'time_algorithm_update': 0.017642645110860902, 'temp_loss': -0.5160181527706789, 'temp': 1.0409239980212428, 'alpha_loss': 9.020260582193297, 'alpha': 0.3058966531565315, 'critic_loss': 2389.7641608701115, 'actor_loss': 105.01619292142098, 'time_step': 0.018037327548913788, 'td_error': 321.82429267556233, 'init_value': -215.9938201904297, 'ave_value': -90.06700286492571} step=13338
2022-04-20 20:34.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203000/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:34.20 [info     ] CQL_20220420203000: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.0002994934717814128, 'time_algorithm_update': 0.01772602678042406, 'temp_loss': -0.5160289246654302, 'temp': 1.0443726415522614, 'alpha_loss': 8.374151982759175, 'alpha': 0.29668895717252763, 'critic_loss': 2472.8010539450843, 'actor_loss': 108.44871043601232, 'time_step': 0.018120468011376453, 'td_error': 301.89833861655393, 'init_value': -219.795654296875, 'ave_value': -93.75166313092988} step=13680
2022-04-20 20:34.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203000/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:34.27 [info     ] CQL_20220420203000: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.00029669937334562605, 'time_algorithm_update': 0.017678172267668428, 'temp_loss': -0.5051679915982729, 'temp': 1.0480238397916157, 'alpha_loss': 8.608993268152426, 'alpha': 0.28767189906354534, 'critic_loss': 2552.754455923337, 'actor_loss': 112.06599591349998, 'time_step': 0.018072450370119328, 'td_error': 335.4057366341738, 'init_value': -228.61117553710938, 'ave_value': -98.37263233140783} step=14022
2022-04-20 20:34.27 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203000/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:34.33 [info     ] CQL_20220420203000: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.00030352637084604005, 'time_algorithm_update': 0.01776245741816292, 'temp_loss': -0.49902826616791085, 'temp': 1.0514842107979179, 'alpha_loss': 9.381324234064559, 'alpha': 0.27848719530983973, 'critic_loss': 2642.5599600808664, 'actor_loss': 116.24590691906667, 'time_step': 0.018167319353560956, 'td_error': 528.8392029411314, 'init_value': -240.98397827148438, 'ave_value': -103.2797838184855} step=14364
2022-04-20 20:34.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203000/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:34.40 [info     ] CQL_20220420203000: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.0003014098831087525, 'time_algorithm_update': 0.0177907323279576, 'temp_loss': -0.48929267821081895, 'temp': 1.0548729711805869, 'alpha_loss': 7.8925785335183845, 'alpha': 0.26944122912242396, 'critic_loss': 2745.7334198533445, 'actor_loss': 120.45324073479189, 'time_step': 0.01818923782884029, 'td_error': 365.9980911784825, 'init_value': -249.8245086669922, 'ave_value': -106.9000496350967} step=14706
2022-04-20 20:34.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203000/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:34.46 [info     ] CQL_20220420203000: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.0003057341826589484, 'time_algorithm_update': 0.017974357158817046, 'temp_loss': -0.45189404823103846, 'temp': 1.0581865003931592, 'alpha_loss': 8.056559564077366, 'alpha': 0.2616114619879695, 'critic_loss': 2831.453063607913, 'actor_loss': 124.1100967585692, 'time_step': 0.018381162693626003, 'td_error': 541.4745905153414, 'init_value': -250.0167236328125, 'ave_value': -109.17258579344363} step=15048
2022-04-20 20:34.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203000/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:34.53 [info     ] CQL_20220420203000: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.000303280980963456, 'time_algorithm_update': 0.0179032798399005, 'temp_loss': -0.44163112128316834, 'temp': 1.0613398583311784, 'alpha_loss': 7.649858226553041, 'alpha': 0.2533533168379326, 'critic_loss': 2924.7042114971673, 'actor_loss': 128.3278260481985, 'time_step': 0.018305685087951305, 'td_error': 486.8614675383817, 'init_value': -255.4463653564453, 'ave_value': -108.23974820517205} step=15390
2022-04-20 20:34.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203000/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:34.59 [info     ] CQL_20220420203000: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.00030512349647388124, 'time_algorithm_update': 0.017924372912847507, 'temp_loss': -0.42247881951649285, 'temp': 1.0645320656006796, 'alpha_loss': 7.353959519960727, 'alpha': 0.2459783917020636, 'critic_loss': 3005.886306851928, 'actor_loss': 131.71460242020456, 'time_step': 0.018330173882824635, 'td_error': 473.94211283088697, 'init_value': -269.5731201171875, 'ave_value': -115.27069969530578} step=15732
2022-04-20 20:34.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203000/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:35.06 [info     ] CQL_20220420203000: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.00030416494224503725, 'time_algorithm_update': 0.018044385296559474, 'temp_loss': -0.43732997140035645, 'temp': 1.067623035949573, 'alpha_loss': 6.900813019066526, 'alpha': 0.2388051670284299, 'critic_loss': 3083.7142976459704, 'actor_loss': 134.90260973032454, 'time_step': 0.018448016099762498, 'td_error': 587.0112909483369, 'init_value': -275.45208740234375, 'ave_value': -119.93806022458249} step=16074
2022-04-20 20:35.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203000/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:35.12 [info     ] CQL_20220420203000: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.0003073640734131573, 'time_algorithm_update': 0.01791322579857899, 'temp_loss': -0.4161999546841048, 'temp': 1.070883665865625, 'alpha_loss': 7.327568639788711, 'alpha': 0.2315234694390269, 'critic_loss': 3165.3098430075843, 'actor_loss': 138.80762068988287, 'time_step': 0.01832135100113718, 'td_error': 408.038107017548, 'init_value': -283.51226806640625, 'ave_value': -123.87547957919739} step=16416
2022-04-20 20:35.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203000/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:35.19 [info     ] CQL_20220420203000: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.00030209097945899295, 'time_algorithm_update': 0.017929306504322073, 'temp_loss': -0.4043265173527581, 'temp': 1.0739776342235812, 'alpha_loss': 6.95846230384202, 'alpha': 0.22447797534061453, 'critic_loss': 3254.750114217836, 'actor_loss': 142.94674002217968, 'time_step': 0.01833345039546141, 'td_error': 453.26742768950686, 'init_value': -294.66009521484375, 'ave_value': -126.91310617743312} step=16758
2022-04-20 20:35.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203000/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:35.25 [info     ] CQL_20220420203000: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.00030503914370174295, 'time_algorithm_update': 0.017934313991613555, 'temp_loss': -0.3482237335577694, 'temp': 1.0770861990967688, 'alpha_loss': 6.891591923975805, 'alpha': 0.2173475693715246, 'critic_loss': 3347.4895861887794, 'actor_loss': 146.95613339072779, 'time_step': 0.018338545024046422, 'td_error': 506.86661487151775, 'init_value': -301.0392150878906, 'ave_value': -131.22857675094863} step=17100
2022-04-20 20:35.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203000/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 20:35.26 [info     ] FQE_20220420203526: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.0001433312174785568, 'time_algorithm_update': 0.0020657490534954762, 'loss': 0.004829268406021847, 'time_step': 0.002275754170245435, 'init_value': -0.12367407977581024, 'ave_value': -0.07323894193731584, 'soft_opc': nan} step=166




2022-04-20 20:35.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203526/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:35.26 [info     ] FQE_20220420203526: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.0001443265432334808, 'time_algorithm_update': 0.0020847952509500892, 'loss': 0.0035934992511868925, 'time_step': 0.002295912030231522, 'init_value': -0.25170451402664185, 'ave_value': -0.15968106352128425, 'soft_opc': nan} step=332




2022-04-20 20:35.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203526/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:35.27 [info     ] FQE_20220420203526: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00014524431113737175, 'time_algorithm_update': 0.0020706179630325503, 'loss': 0.003353015187263758, 'time_step': 0.002284160579543516, 'init_value': -0.31921347975730896, 'ave_value': -0.19099547475542838, 'soft_opc': nan} step=498




2022-04-20 20:35.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203526/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:35.27 [info     ] FQE_20220420203526: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00014553443495049533, 'time_algorithm_update': 0.002071235553327813, 'loss': 0.003580182252160038, 'time_step': 0.0022818970392985516, 'init_value': -0.4113590717315674, 'ave_value': -0.24407667411273126, 'soft_opc': nan} step=664




2022-04-20 20:35.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203526/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:35.28 [info     ] FQE_20220420203526: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.0001430511474609375, 'time_algorithm_update': 0.002008646367544151, 'loss': 0.003485922604877816, 'time_step': 0.002217749515211726, 'init_value': -0.4942273795604706, 'ave_value': -0.3020979591225786, 'soft_opc': nan} step=830




2022-04-20 20:35.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203526/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:35.28 [info     ] FQE_20220420203526: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00014580588742911098, 'time_algorithm_update': 0.0021179354334452064, 'loss': 0.003542671605652894, 'time_step': 0.0023327290293682053, 'init_value': -0.5088479518890381, 'ave_value': -0.30286057592864646, 'soft_opc': nan} step=996




2022-04-20 20:35.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203526/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:35.29 [info     ] FQE_20220420203526: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00014799186982304217, 'time_algorithm_update': 0.0021332674715892376, 'loss': 0.0036416214887972876, 'time_step': 0.002352947212127318, 'init_value': -0.5760972499847412, 'ave_value': -0.35227333506367897, 'soft_opc': nan} step=1162




2022-04-20 20:35.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203526/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:35.30 [info     ] FQE_20220420203526: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00020796563251909003, 'time_algorithm_update': 0.0054908531257905156, 'loss': 0.003659931682639318, 'time_step': 0.005765496966350509, 'init_value': -0.6792360544204712, 'ave_value': -0.4282375565180471, 'soft_opc': nan} step=1328




2022-04-20 20:35.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203526/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:35.30 [info     ] FQE_20220420203526: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00014686297221356128, 'time_algorithm_update': 0.0020511624324752622, 'loss': 0.0035875455380967223, 'time_step': 0.0022708306829613374, 'init_value': -0.6784340143203735, 'ave_value': -0.4169668978155666, 'soft_opc': nan} step=1494




2022-04-20 20:35.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203526/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:35.30 [info     ] FQE_20220420203526: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00014202422406300004, 'time_algorithm_update': 0.0020759191857763083, 'loss': 0.003567273250966142, 'time_step': 0.0022844621934086443, 'init_value': -0.7355326414108276, 'ave_value': -0.4555634610036069, 'soft_opc': nan} step=1660




2022-04-20 20:35.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203526/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:35.31 [info     ] FQE_20220420203526: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.0001472076737737081, 'time_algorithm_update': 0.0020824828779841043, 'loss': 0.0036250805104970484, 'time_step': 0.0022980434348784298, 'init_value': -0.7914599180221558, 'ave_value': -0.48788133959562796, 'soft_opc': nan} step=1826




2022-04-20 20:35.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203526/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:35.31 [info     ] FQE_20220420203526: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00014796314469302994, 'time_algorithm_update': 0.0022076454507299215, 'loss': 0.0034933113969907344, 'time_step': 0.002425580139619758, 'init_value': -0.8314365148544312, 'ave_value': -0.5065205942285624, 'soft_opc': nan} step=1992




2022-04-20 20:35.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203526/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:35.32 [info     ] FQE_20220420203526: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.0001475092876388366, 'time_algorithm_update': 0.002080035496907062, 'loss': 0.0035915891261187843, 'time_step': 0.0022935220994145036, 'init_value': -0.9108412265777588, 'ave_value': -0.5544374376598343, 'soft_opc': nan} step=2158




2022-04-20 20:35.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203526/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:35.32 [info     ] FQE_20220420203526: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00014564933547054428, 'time_algorithm_update': 0.0020186340952494056, 'loss': 0.0035999715482501918, 'time_step': 0.002234701650688447, 'init_value': -0.9600166082382202, 'ave_value': -0.5863421024234445, 'soft_opc': nan} step=2324




2022-04-20 20:35.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203526/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:35.33 [info     ] FQE_20220420203526: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00014750067009983292, 'time_algorithm_update': 0.0020843442664088972, 'loss': 0.0035853681080211626, 'time_step': 0.0022998559905822017, 'init_value': -1.0201988220214844, 'ave_value': -0.6312353663414986, 'soft_opc': nan} step=2490




2022-04-20 20:35.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203526/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:35.33 [info     ] FQE_20220420203526: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.000150353075510048, 'time_algorithm_update': 0.002103977892772261, 'loss': 0.0035918481880798936, 'time_step': 0.0023261308670043945, 'init_value': -1.0564556121826172, 'ave_value': -0.6428931164160908, 'soft_opc': nan} step=2656




2022-04-20 20:35.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203526/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:35.33 [info     ] FQE_20220420203526: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00014298220714890813, 'time_algorithm_update': 0.0020329880427165203, 'loss': 0.0036620637689470255, 'time_step': 0.0022468294005796134, 'init_value': -1.132840871810913, 'ave_value': -0.6854247936584592, 'soft_opc': nan} step=2822




2022-04-20 20:35.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203526/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:35.34 [info     ] FQE_20220420203526: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00014964212854224514, 'time_algorithm_update': 0.0022100928318069643, 'loss': 0.0036335822877878077, 'time_step': 0.002427135605409921, 'init_value': -1.1608631610870361, 'ave_value': -0.7037938804441207, 'soft_opc': nan} step=2988




2022-04-20 20:35.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203526/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:35.34 [info     ] FQE_20220420203526: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00015212110726230116, 'time_algorithm_update': 0.0021300157868718527, 'loss': 0.0037017152188850723, 'time_step': 0.002350912036665951, 'init_value': -1.2320659160614014, 'ave_value': -0.7532163096340121, 'soft_opc': nan} step=3154




2022-04-20 20:35.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203526/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:35.35 [info     ] FQE_20220420203526: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00014524861990687358, 'time_algorithm_update': 0.002071939319013113, 'loss': 0.0037741309953348554, 'time_step': 0.00229013253407306, 'init_value': -1.307471513748169, 'ave_value': -0.8087431401433842, 'soft_opc': nan} step=3320




2022-04-20 20:35.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203526/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:35.35 [info     ] FQE_20220420203526: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00015051393623811653, 'time_algorithm_update': 0.0021849425442247504, 'loss': 0.00394944647601974, 'time_step': 0.0024062739797385342, 'init_value': -1.3667904138565063, 'ave_value': -0.8440062810037587, 'soft_opc': nan} step=3486




2022-04-20 20:35.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203526/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:35.36 [info     ] FQE_20220420203526: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00014564789921404366, 'time_algorithm_update': 0.002085807811783021, 'loss': 0.004068051917192313, 'time_step': 0.002299792795296175, 'init_value': -1.4835453033447266, 'ave_value': -0.9294072407625011, 'soft_opc': nan} step=3652




2022-04-20 20:35.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203526/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:35.36 [info     ] FQE_20220420203526: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.0001499624137418816, 'time_algorithm_update': 0.0021129487508750825, 'loss': 0.004382821148491453, 'time_step': 0.0023316288568887367, 'init_value': -1.5419061183929443, 'ave_value': -0.973123350736123, 'soft_opc': nan} step=3818




2022-04-20 20:35.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203526/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:35.36 [info     ] FQE_20220420203526: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.0001516615051821054, 'time_algorithm_update': 0.002129969826663833, 'loss': 0.004351016451711261, 'time_step': 0.0023489271301821053, 'init_value': -1.574971318244934, 'ave_value': -0.9772268057968635, 'soft_opc': nan} step=3984




2022-04-20 20:35.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203526/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:35.37 [info     ] FQE_20220420203526: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.000147315393011254, 'time_algorithm_update': 0.0020429154476487494, 'loss': 0.004578007948977581, 'time_step': 0.0022611603679427183, 'init_value': -1.6332818269729614, 'ave_value': -1.008470596822801, 'soft_opc': nan} step=4150




2022-04-20 20:35.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203526/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:35.37 [info     ] FQE_20220420203526: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.0001491968890270555, 'time_algorithm_update': 0.0021343532815037004, 'loss': 0.004772259698092309, 'time_step': 0.002354781311678599, 'init_value': -1.758162498474121, 'ave_value': -1.092252850584132, 'soft_opc': nan} step=4316




2022-04-20 20:35.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203526/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:35.38 [info     ] FQE_20220420203526: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00015114876161138695, 'time_algorithm_update': 0.002079568713544363, 'loss': 0.004902864891912295, 'time_step': 0.0022977231496787934, 'init_value': -1.7946197986602783, 'ave_value': -1.123902167531301, 'soft_opc': nan} step=4482




2022-04-20 20:35.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203526/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:35.38 [info     ] FQE_20220420203526: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00014703388673713408, 'time_algorithm_update': 0.0021646252597670957, 'loss': 0.005343979689522635, 'time_step': 0.0023818087865071125, 'init_value': -1.8817410469055176, 'ave_value': -1.1906951986240677, 'soft_opc': nan} step=4648




2022-04-20 20:35.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203526/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:35.39 [info     ] FQE_20220420203526: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00014845290815973855, 'time_algorithm_update': 0.002152247601244823, 'loss': 0.005509602854095669, 'time_step': 0.0023721916129790157, 'init_value': -2.0057990550994873, 'ave_value': -1.2511557323044284, 'soft_opc': nan} step=4814




2022-04-20 20:35.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203526/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:35.39 [info     ] FQE_20220420203526: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.0001499250710728657, 'time_algorithm_update': 0.0021400308034506188, 'loss': 0.0059316326241311895, 'time_step': 0.0023616265101605153, 'init_value': -2.0582847595214844, 'ave_value': -1.2876434605224645, 'soft_opc': nan} step=4980




2022-04-20 20:35.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203526/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:35.39 [info     ] FQE_20220420203526: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.0001501218382134495, 'time_algorithm_update': 0.0021507811833576984, 'loss': 0.006033266891012843, 'time_step': 0.002369274576026273, 'init_value': -2.146721124649048, 'ave_value': -1.3620528391595963, 'soft_opc': nan} step=5146




2022-04-20 20:35.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203526/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:35.40 [info     ] FQE_20220420203526: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00014796888971903237, 'time_algorithm_update': 0.00218214758907456, 'loss': 0.0061992691249126595, 'time_step': 0.002405501273741205, 'init_value': -2.2735347747802734, 'ave_value': -1.4272204273736926, 'soft_opc': nan} step=5312




2022-04-20 20:35.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203526/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:35.40 [info     ] FQE_20220420203526: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00014921125159206162, 'time_algorithm_update': 0.002165204071136842, 'loss': 0.006775966122415455, 'time_step': 0.0023858346134783275, 'init_value': -2.281877279281616, 'ave_value': -1.429739940731979, 'soft_opc': nan} step=5478




2022-04-20 20:35.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203526/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:35.41 [info     ] FQE_20220420203526: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00014988629214734915, 'time_algorithm_update': 0.002195625420076301, 'loss': 0.0070629682404505295, 'time_step': 0.002415022218083761, 'init_value': -2.4406898021698, 'ave_value': -1.5643692322931178, 'soft_opc': nan} step=5644




2022-04-20 20:35.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203526/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:35.41 [info     ] FQE_20220420203526: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00014596243938767766, 'time_algorithm_update': 0.002116897019995264, 'loss': 0.007251526917056012, 'time_step': 0.0023299699806305298, 'init_value': -2.5601248741149902, 'ave_value': -1.6293747573549784, 'soft_opc': nan} step=5810




2022-04-20 20:35.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203526/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:35.42 [info     ] FQE_20220420203526: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00014609313872923334, 'time_algorithm_update': 0.0020684477794601255, 'loss': 0.007752469951898443, 'time_step': 0.002282834914793451, 'init_value': -2.6052913665771484, 'ave_value': -1.6437252663084314, 'soft_opc': nan} step=5976




2022-04-20 20:35.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203526/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:35.42 [info     ] FQE_20220420203526: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00014856206365378507, 'time_algorithm_update': 0.002108515027057694, 'loss': 0.008150126829922917, 'time_step': 0.0023275513246834995, 'init_value': -2.6630537509918213, 'ave_value': -1.670261430465994, 'soft_opc': nan} step=6142




2022-04-20 20:35.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203526/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:35.43 [info     ] FQE_20220420203526: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00014572689332157732, 'time_algorithm_update': 0.002126429454389825, 'loss': 0.008567583105521253, 'time_step': 0.0023410018668117293, 'init_value': -2.7345499992370605, 'ave_value': -1.7115935147865802, 'soft_opc': nan} step=6308




2022-04-20 20:35.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203526/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:35.43 [info     ] FQE_20220420203526: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00014802490372255625, 'time_algorithm_update': 0.0021639186215687946, 'loss': 0.008433680801865566, 'time_step': 0.002380878092294716, 'init_value': -2.8371119499206543, 'ave_value': -1.7741450717748217, 'soft_opc': nan} step=6474




2022-04-20 20:35.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203526/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:35.43 [info     ] FQE_20220420203526: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00014887660382741904, 'time_algorithm_update': 0.0021113717412374108, 'loss': 0.009478549056417164, 'time_step': 0.0023297344345644296, 'init_value': -2.928354263305664, 'ave_value': -1.8216317983528785, 'soft_opc': nan} step=6640




2022-04-20 20:35.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203526/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:35.44 [info     ] FQE_20220420203526: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00015085289277226092, 'time_algorithm_update': 0.002143552504390119, 'loss': 0.009882706682427776, 'time_step': 0.0023631618683596692, 'init_value': -2.977382183074951, 'ave_value': -1.8461346587879366, 'soft_opc': nan} step=6806




2022-04-20 20:35.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203526/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:35.44 [info     ] FQE_20220420203526: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00014879186469388296, 'time_algorithm_update': 0.0021078012075768896, 'loss': 0.010243386278456324, 'time_step': 0.0023237552987523825, 'init_value': -3.078988552093506, 'ave_value': -1.9167025732791034, 'soft_opc': nan} step=6972




2022-04-20 20:35.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203526/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:35.45 [info     ] FQE_20220420203526: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00014415993747940982, 'time_algorithm_update': 0.0021374800119055323, 'loss': 0.010627286572497428, 'time_step': 0.0023541235062013188, 'init_value': -3.213839530944824, 'ave_value': -2.008809171538582, 'soft_opc': nan} step=7138




2022-04-20 20:35.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203526/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:35.45 [info     ] FQE_20220420203526: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00014960765838623047, 'time_algorithm_update': 0.0021223907011101045, 'loss': 0.01095605985800096, 'time_step': 0.0023428905441100338, 'init_value': -3.2570152282714844, 'ave_value': -2.057393036393432, 'soft_opc': nan} step=7304




2022-04-20 20:35.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203526/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:35.46 [info     ] FQE_20220420203526: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00014800910490104952, 'time_algorithm_update': 0.002051782895283527, 'loss': 0.010909294677303588, 'time_step': 0.0022690511611570797, 'init_value': -3.3402888774871826, 'ave_value': -2.1053606073111966, 'soft_opc': nan} step=7470




2022-04-20 20:35.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203526/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:35.46 [info     ] FQE_20220420203526: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00014966510864625494, 'time_algorithm_update': 0.0021425370710441865, 'loss': 0.011473044694866985, 'time_step': 0.002359680382602186, 'init_value': -3.3932294845581055, 'ave_value': -2.1187952827821643, 'soft_opc': nan} step=7636




2022-04-20 20:35.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203526/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:35.46 [info     ] FQE_20220420203526: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00014539224555693478, 'time_algorithm_update': 0.0021215533635702477, 'loss': 0.01175584204506174, 'time_step': 0.0023344999336334595, 'init_value': -3.460491418838501, 'ave_value': -2.165893315176924, 'soft_opc': nan} step=7802




2022-04-20 20:35.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203526/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:35.47 [info     ] FQE_20220420203526: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.0001460644135992211, 'time_algorithm_update': 0.0020834336797875093, 'loss': 0.01236611203899248, 'time_step': 0.002297220459903579, 'init_value': -3.5411388874053955, 'ave_value': -2.21502284354589, 'soft_opc': nan} step=7968




2022-04-20 20:35.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203526/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:35.47 [info     ] FQE_20220420203526: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00014978575419230633, 'time_algorithm_update': 0.0021331784236861997, 'loss': 0.012884929410114616, 'time_step': 0.002353054931364864, 'init_value': -3.634503126144409, 'ave_value': -2.2833047677418805, 'soft_opc': nan} step=8134




2022-04-20 20:35.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203526/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:35.48 [info     ] FQE_20220420203526: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00015031142407153026, 'time_algorithm_update': 0.0020931011222931274, 'loss': 0.013444016526273665, 'time_step': 0.002313859491463167, 'init_value': -3.6569316387176514, 'ave_value': -2.280043157892352, 'soft_opc': nan} step=8300




2022-04-20 20:35.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203526/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

2022-04-20 20:35.48 [info     ] Directory is created at d3rlpy_logs/FQE_20220420203548
2022-04-20 20:35.48 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 20:35.48 [debug    ] Building models...
2022-04-20 20:35.48 [debug    ] Models have been built.
2022-04-20 20:35.48 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420203548/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 20:35.49 [info     ] FQE_20220420203548: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00016151333964148232, 'time_algorithm_update': 0.0021252784618111544, 'loss': 0.025962578468457904, 'time_step': 0.0023585038129673446, 'init_value': -1.1175627708435059, 'ave_value': -1.1305597525324906, 'soft_opc': nan} step=344




2022-04-20 20:35.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203548/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:35.50 [info     ] FQE_20220420203548: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00016121046487675158, 'time_algorithm_update': 0.002068758010864258, 'loss': 0.023996770587684804, 'time_step': 0.0023021898990453677, 'init_value': -1.9039256572723389, 'ave_value': -1.9299195811018213, 'soft_opc': nan} step=688




2022-04-20 20:35.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203548/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:35.51 [info     ] FQE_20220420203548: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00016063728997873705, 'time_algorithm_update': 0.0021062035893284997, 'loss': 0.02718552035214596, 'time_step': 0.002337769713512687, 'init_value': -2.8985915184020996, 'ave_value': -2.9492313355341686, 'soft_opc': nan} step=1032




2022-04-20 20:35.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203548/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:35.52 [info     ] FQE_20220420203548: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00015900162763373796, 'time_algorithm_update': 0.0021004725334256196, 'loss': 0.030586755747885205, 'time_step': 0.0023315133050430654, 'init_value': -3.7327239513397217, 'ave_value': -3.8013641833990546, 'soft_opc': nan} step=1376




2022-04-20 20:35.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203548/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:35.53 [info     ] FQE_20220420203548: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00016065600306488747, 'time_algorithm_update': 0.0020914895589961564, 'loss': 0.03715308709882286, 'time_step': 0.0023217457671498142, 'init_value': -4.773097991943359, 'ave_value': -4.87052630513638, 'soft_opc': nan} step=1720




2022-04-20 20:35.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203548/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:35.53 [info     ] FQE_20220420203548: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00016089650087578353, 'time_algorithm_update': 0.002040140850599422, 'loss': 0.04293625007199427, 'time_step': 0.002273924128953801, 'init_value': -5.316792964935303, 'ave_value': -5.449697974136284, 'soft_opc': nan} step=2064




2022-04-20 20:35.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203548/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:35.54 [info     ] FQE_20220420203548: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.0001608015492905018, 'time_algorithm_update': 0.0020715587360914363, 'loss': 0.052464595400230137, 'time_step': 0.002307704714841621, 'init_value': -6.3028669357299805, 'ave_value': -6.5000150756256, 'soft_opc': nan} step=2408




2022-04-20 20:35.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203548/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:35.55 [info     ] FQE_20220420203548: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00016295494035232898, 'time_algorithm_update': 0.0021796718586322874, 'loss': 0.06228191166206501, 'time_step': 0.0024154872395271477, 'init_value': -6.733060359954834, 'ave_value': -7.007464396631396, 'soft_opc': nan} step=2752




2022-04-20 20:35.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203548/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:35.56 [info     ] FQE_20220420203548: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00015981114187905955, 'time_algorithm_update': 0.002096692490023236, 'loss': 0.07434358442104747, 'time_step': 0.0023266132487807165, 'init_value': -7.200597763061523, 'ave_value': -7.580844295159117, 'soft_opc': nan} step=3096




2022-04-20 20:35.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203548/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:35.57 [info     ] FQE_20220420203548: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.0001622119615244311, 'time_algorithm_update': 0.0021419843962026197, 'loss': 0.08615563398357047, 'time_step': 0.002380407826845036, 'init_value': -7.820025444030762, 'ave_value': -8.34990582311744, 'soft_opc': nan} step=3440




2022-04-20 20:35.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203548/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:35.58 [info     ] FQE_20220420203548: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00015825864880584007, 'time_algorithm_update': 0.0020691073218057324, 'loss': 0.09761186510725163, 'time_step': 0.0023012272147245184, 'init_value': -8.294048309326172, 'ave_value': -8.91211461363075, 'soft_opc': nan} step=3784




2022-04-20 20:35.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203548/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:35.59 [info     ] FQE_20220420203548: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.0001638337623241336, 'time_algorithm_update': 0.0021698953107345937, 'loss': 0.11046350748971277, 'time_step': 0.0024081316105155058, 'init_value': -8.936262130737305, 'ave_value': -9.740804519403625, 'soft_opc': nan} step=4128




2022-04-20 20:35.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203548/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:36.00 [info     ] FQE_20220420203548: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00016377069229303406, 'time_algorithm_update': 0.0021387449530668036, 'loss': 0.12123547853945299, 'time_step': 0.0023742844892102617, 'init_value': -9.306964874267578, 'ave_value': -10.330737795083373, 'soft_opc': nan} step=4472




2022-04-20 20:36.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203548/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:36.00 [info     ] FQE_20220420203548: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00016104828479678133, 'time_algorithm_update': 0.002080013585645099, 'loss': 0.13412511570725677, 'time_step': 0.002312633880349093, 'init_value': -9.654400825500488, 'ave_value': -10.987994716893713, 'soft_opc': nan} step=4816




2022-04-20 20:36.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203548/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:36.01 [info     ] FQE_20220420203548: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00016100739323815635, 'time_algorithm_update': 0.0021340985630833825, 'loss': 0.14366543607049903, 'time_step': 0.002365853897360868, 'init_value': -9.869482040405273, 'ave_value': -11.391634340705098, 'soft_opc': nan} step=5160




2022-04-20 20:36.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203548/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:36.02 [info     ] FQE_20220420203548: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00016147729962371116, 'time_algorithm_update': 0.002167799445085747, 'loss': 0.15304807325023725, 'time_step': 0.0023995658685994703, 'init_value': -10.199972152709961, 'ave_value': -12.095491227843203, 'soft_opc': nan} step=5504




2022-04-20 20:36.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203548/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:36.03 [info     ] FQE_20220420203548: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00016250790551651355, 'time_algorithm_update': 0.0021531831386477447, 'loss': 0.1662782450412335, 'time_step': 0.00238648957984392, 'init_value': -10.065789222717285, 'ave_value': -12.32418916593893, 'soft_opc': nan} step=5848




2022-04-20 20:36.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203548/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:36.04 [info     ] FQE_20220420203548: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00016341514365617618, 'time_algorithm_update': 0.002131236153979634, 'loss': 0.17344818043869076, 'time_step': 0.002364178036534509, 'init_value': -10.347430229187012, 'ave_value': -12.900361221159498, 'soft_opc': nan} step=6192




2022-04-20 20:36.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203548/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:36.05 [info     ] FQE_20220420203548: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00016159235044967298, 'time_algorithm_update': 0.0021221762479737747, 'loss': 0.18308735610700624, 'time_step': 0.0023553281329398933, 'init_value': -10.578166007995605, 'ave_value': -13.611128252294955, 'soft_opc': nan} step=6536




2022-04-20 20:36.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203548/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:36.06 [info     ] FQE_20220420203548: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00016436050104540447, 'time_algorithm_update': 0.0021590264730675275, 'loss': 0.19006644631194514, 'time_step': 0.0023963298908499784, 'init_value': -10.743270874023438, 'ave_value': -14.03921462542436, 'soft_opc': nan} step=6880




2022-04-20 20:36.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203548/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:36.07 [info     ] FQE_20220420203548: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00016172542128452036, 'time_algorithm_update': 0.0021504066711248355, 'loss': 0.20301235748216684, 'time_step': 0.002382287452387255, 'init_value': -10.892078399658203, 'ave_value': -14.475837755673103, 'soft_opc': nan} step=7224




2022-04-20 20:36.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203548/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:36.08 [info     ] FQE_20220420203548: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00016231938850047977, 'time_algorithm_update': 0.002092931159707003, 'loss': 0.20965529417315887, 'time_step': 0.002326618793398835, 'init_value': -10.877756118774414, 'ave_value': -14.666127311465171, 'soft_opc': nan} step=7568




2022-04-20 20:36.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203548/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:36.08 [info     ] FQE_20220420203548: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00016287315723507903, 'time_algorithm_update': 0.0021726787090301514, 'loss': 0.21946377587656296, 'time_step': 0.002404185228569563, 'init_value': -11.220702171325684, 'ave_value': -15.288244708741571, 'soft_opc': nan} step=7912




2022-04-20 20:36.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203548/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:36.09 [info     ] FQE_20220420203548: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00016391069390052972, 'time_algorithm_update': 0.0021202390970185745, 'loss': 0.2293016372010285, 'time_step': 0.0023548492165498956, 'init_value': -11.162482261657715, 'ave_value': -15.468786700580035, 'soft_opc': nan} step=8256




2022-04-20 20:36.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203548/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:36.10 [info     ] FQE_20220420203548: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00016104689864225165, 'time_algorithm_update': 0.0021135862483534704, 'loss': 0.23289466915154006, 'time_step': 0.002348039039345675, 'init_value': -11.07905387878418, 'ave_value': -15.61601974349301, 'soft_opc': nan} step=8600




2022-04-20 20:36.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203548/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:36.11 [info     ] FQE_20220420203548: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00016081610391306323, 'time_algorithm_update': 0.0021378612795541455, 'loss': 0.2410112097805236, 'time_step': 0.002370302760323813, 'init_value': -11.453929901123047, 'ave_value': -16.20812742146286, 'soft_opc': nan} step=8944




2022-04-20 20:36.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203548/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:36.12 [info     ] FQE_20220420203548: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00016065115152403365, 'time_algorithm_update': 0.0021220480286797813, 'loss': 0.2569229577797963, 'time_step': 0.0023522009683209794, 'init_value': -11.483699798583984, 'ave_value': -16.399321577624157, 'soft_opc': nan} step=9288




2022-04-20 20:36.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203548/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:36.13 [info     ] FQE_20220420203548: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00016063659690147223, 'time_algorithm_update': 0.002119372057360272, 'loss': 0.2661173831844746, 'time_step': 0.002350612435229989, 'init_value': -11.476310729980469, 'ave_value': -16.725342941391574, 'soft_opc': nan} step=9632




2022-04-20 20:36.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203548/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:36.14 [info     ] FQE_20220420203548: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00016261810480162154, 'time_algorithm_update': 0.002126471940861192, 'loss': 0.28074236722598067, 'time_step': 0.002362714257351188, 'init_value': -11.6181640625, 'ave_value': -17.20573436572208, 'soft_opc': nan} step=9976




2022-04-20 20:36.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203548/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:36.15 [info     ] FQE_20220420203548: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00016147660654644634, 'time_algorithm_update': 0.0021027936491855356, 'loss': 0.2949079682801439, 'time_step': 0.002334314030270244, 'init_value': -11.725552558898926, 'ave_value': -17.626872099895735, 'soft_opc': nan} step=10320




2022-04-20 20:36.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203548/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:36.16 [info     ] FQE_20220420203548: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.0001652102137720862, 'time_algorithm_update': 0.002096223969792211, 'loss': 0.30546847299326124, 'time_step': 0.0023322833138842915, 'init_value': -11.469409942626953, 'ave_value': -17.60389194547593, 'soft_opc': nan} step=10664




2022-04-20 20:36.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203548/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:36.16 [info     ] FQE_20220420203548: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.0001614391803741455, 'time_algorithm_update': 0.0021058168522147245, 'loss': 0.31599103392448363, 'time_step': 0.0023403507332469143, 'init_value': -11.507158279418945, 'ave_value': -17.975580626135475, 'soft_opc': nan} step=11008




2022-04-20 20:36.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203548/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:36.17 [info     ] FQE_20220420203548: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00016025817671487497, 'time_algorithm_update': 0.0021196319613345834, 'loss': 0.3280486118259601, 'time_step': 0.002353253059608992, 'init_value': -11.593124389648438, 'ave_value': -18.242559165567965, 'soft_opc': nan} step=11352




2022-04-20 20:36.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203548/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:36.18 [info     ] FQE_20220420203548: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.0001605347145435422, 'time_algorithm_update': 0.0020841990792474082, 'loss': 0.33976826070656263, 'time_step': 0.0023148655891418457, 'init_value': -11.482290267944336, 'ave_value': -18.345043361160133, 'soft_opc': nan} step=11696




2022-04-20 20:36.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203548/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:36.19 [info     ] FQE_20220420203548: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.0001605513483978981, 'time_algorithm_update': 0.0021259313405946243, 'loss': 0.35293029373251766, 'time_step': 0.0023582522259202113, 'init_value': -11.75067138671875, 'ave_value': -18.9561473620099, 'soft_opc': nan} step=12040




2022-04-20 20:36.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203548/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:36.20 [info     ] FQE_20220420203548: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.000162089979925821, 'time_algorithm_update': 0.002109413230141928, 'loss': 0.3756299410502665, 'time_step': 0.002346331296965133, 'init_value': -12.04623794555664, 'ave_value': -19.54661767525716, 'soft_opc': nan} step=12384




2022-04-20 20:36.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203548/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:36.21 [info     ] FQE_20220420203548: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00016175245129784873, 'time_algorithm_update': 0.0021009493705838227, 'loss': 0.39352594487109155, 'time_step': 0.002336651086807251, 'init_value': -12.225196838378906, 'ave_value': -19.984854053296484, 'soft_opc': nan} step=12728




2022-04-20 20:36.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203548/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:36.22 [info     ] FQE_20220420203548: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00016731855481169944, 'time_algorithm_update': 0.0021154346854187723, 'loss': 0.4057458197795461, 'time_step': 0.0023579853911732517, 'init_value': -12.391965866088867, 'ave_value': -20.524570727375178, 'soft_opc': nan} step=13072




2022-04-20 20:36.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203548/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:36.23 [info     ] FQE_20220420203548: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00016248642012130384, 'time_algorithm_update': 0.0020713320998258368, 'loss': 0.4244467449309521, 'time_step': 0.002306798169779223, 'init_value': -12.521669387817383, 'ave_value': -20.895657511498477, 'soft_opc': nan} step=13416




2022-04-20 20:36.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203548/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:36.24 [info     ] FQE_20220420203548: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00016456495883852937, 'time_algorithm_update': 0.0021813962348671847, 'loss': 0.43783611331005085, 'time_step': 0.002423815255941347, 'init_value': -12.521002769470215, 'ave_value': -21.062225419685646, 'soft_opc': nan} step=13760




2022-04-20 20:36.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203548/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:36.24 [info     ] FQE_20220420203548: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.000163024248078812, 'time_algorithm_update': 0.002086183359456617, 'loss': 0.45769207293254344, 'time_step': 0.002320139214049938, 'init_value': -12.479447364807129, 'ave_value': -21.322925831310386, 'soft_opc': nan} step=14104




2022-04-20 20:36.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203548/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:36.25 [info     ] FQE_20220420203548: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00016227156616920648, 'time_algorithm_update': 0.0020796088285224383, 'loss': 0.4779036823676379, 'time_step': 0.002312543780304665, 'init_value': -13.054474830627441, 'ave_value': -22.012153055377908, 'soft_opc': nan} step=14448




2022-04-20 20:36.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203548/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:36.26 [info     ] FQE_20220420203548: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00016010500663934753, 'time_algorithm_update': 0.0020576805569404778, 'loss': 0.49847536192405534, 'time_step': 0.0022886014261911084, 'init_value': -13.332319259643555, 'ave_value': -22.60987195939094, 'soft_opc': nan} step=14792




2022-04-20 20:36.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203548/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:36.27 [info     ] FQE_20220420203548: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00016160066737685094, 'time_algorithm_update': 0.002130405154339103, 'loss': 0.5246615211024533, 'time_step': 0.0023630656475244565, 'init_value': -13.364843368530273, 'ave_value': -22.692048594543525, 'soft_opc': nan} step=15136




2022-04-20 20:36.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203548/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:36.28 [info     ] FQE_20220420203548: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00016429812409156976, 'time_algorithm_update': 0.002129685740138209, 'loss': 0.5363684129348935, 'time_step': 0.0023663217245146286, 'init_value': -13.767608642578125, 'ave_value': -23.23211076563006, 'soft_opc': nan} step=15480




2022-04-20 20:36.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203548/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:36.29 [info     ] FQE_20220420203548: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00016003223352654037, 'time_algorithm_update': 0.0020986691463825316, 'loss': 0.5486890725726479, 'time_step': 0.0023321024207181708, 'init_value': -13.92831039428711, 'ave_value': -23.555919242334795, 'soft_opc': nan} step=15824




2022-04-20 20:36.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203548/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:36.30 [info     ] FQE_20220420203548: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00016264860020127406, 'time_algorithm_update': 0.00208835199821827, 'loss': 0.5694060028844707, 'time_step': 0.002323652422705362, 'init_value': -14.05256462097168, 'ave_value': -23.56891464695737, 'soft_opc': nan} step=16168




2022-04-20 20:36.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203548/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:36.31 [info     ] FQE_20220420203548: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00016100254169730253, 'time_algorithm_update': 0.002097393191137979, 'loss': 0.5764940420035706, 'time_step': 0.002329596253328545, 'init_value': -14.240233421325684, 'ave_value': -23.832327799673553, 'soft_opc': nan} step=16512




2022-04-20 20:36.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203548/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:36.31 [info     ] FQE_20220420203548: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00016196522601815157, 'time_algorithm_update': 0.0021195120589677678, 'loss': 0.6035119973794493, 'time_step': 0.0023520859174950177, 'init_value': -14.64870548248291, 'ave_value': -24.320186157618558, 'soft_opc': nan} step=16856




2022-04-20 20:36.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203548/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:36.32 [info     ] FQE_20220420203548: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00016479713972224745, 'time_algorithm_update': 0.0022009936876075213, 'loss': 0.6045918490623968, 'time_step': 0.002439301374346711, 'init_value': -14.668191909790039, 'ave_value': -24.28381642460286, 'soft_opc': nan} step=17200




2022-04-20 20:36.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420203548/model_17200.pt
search iteration:  38
using hyper params:  [0.0008288831520532805, 0.003934260713329634, 4.6195344911552404e-05, 7]
2022-04-20 20:36.32 [debug    ] RoundIterator is selected.
2022-04-20 20:36.32 [info     ] Directory is created at d3rlpy_logs/CQL_20220420203632
2022-04-20 20:36.32 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 20:36.32 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 20:36.32 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220420203632/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0008288831520532805, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 

Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:36.39 [info     ] CQL_20220420203632: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.00038092805628190963, 'time_algorithm_update': 0.01827642722436559, 'temp_loss': 4.513249695649621, 'temp': 0.9923488719072956, 'alpha_loss': -18.13695978421217, 'alpha': 1.0174714354743735, 'critic_loss': 64.8095612665366, 'actor_loss': 5.488517332669587, 'time_step': 0.018757770633139804, 'td_error': 4.022080278070404, 'init_value': -12.468806266784668, 'ave_value': -9.047140506629471} step=342
2022-04-20 20:36.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203632/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:36.46 [info     ] CQL_20220420203632: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.0003766358247277332, 'time_algorithm_update': 0.017937584927207546, 'temp_loss': 4.317378963643347, 'temp': 0.9770878171711638, 'alpha_loss': -8.820915862133628, 'alpha': 1.0438900621313798, 'critic_loss': 32.09615379467345, 'actor_loss': 12.961177561018202, 'time_step': 0.018415626726652447, 'td_error': 6.912938510966391, 'init_value': -24.54899024963379, 'ave_value': -15.741412562672888} step=684
2022-04-20 20:36.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203632/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:36.52 [info     ] CQL_20220420203632: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.0003801981608072917, 'time_algorithm_update': 0.018028775153801455, 'temp_loss': 3.694012352597644, 'temp': 0.9630073908476802, 'alpha_loss': -5.437237776510897, 'alpha': 1.0620064637814348, 'critic_loss': 41.29647338041785, 'actor_loss': 20.655907313028973, 'time_step': 0.018511749847590575, 'td_error': 8.528709989810695, 'init_value': -34.875450134277344, 'ave_value': -22.144869135010417} step=1026
2022-04-20 20:36.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203632/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:36.59 [info     ] CQL_20220420203632: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.0003835520549127233, 'time_algorithm_update': 0.01799177635482877, 'temp_loss': 3.2565600391019855, 'temp': 0.9501541347531547, 'alpha_loss': -3.4503213829464383, 'alpha': 1.077137703784028, 'critic_loss': 56.65622775875337, 'actor_loss': 27.81135778817517, 'time_step': 0.018478516249628794, 'td_error': 12.288940985178387, 'init_value': -44.31935501098633, 'ave_value': -27.812153916091802} step=1368
2022-04-20 20:36.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203632/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:37.05 [info     ] CQL_20220420203632: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.0003880157805325692, 'time_algorithm_update': 0.018001366079899304, 'temp_loss': 2.8740346027396577, 'temp': 0.9380810180602716, 'alpha_loss': -1.7191937994797393, 'alpha': 1.0880070046374672, 'critic_loss': 75.70728032073082, 'actor_loss': 34.3992864653381, 'time_step': 0.01849092377556695, 'td_error': 16.396706663024645, 'init_value': -53.422950744628906, 'ave_value': -33.530725124336044} step=1710
2022-04-20 20:37.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203632/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:37.12 [info     ] CQL_20220420203632: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.00037780282093070406, 'time_algorithm_update': 0.01818795859465125, 'temp_loss': 2.5463483835521497, 'temp': 0.926624931264342, 'alpha_loss': -0.19238364435623437, 'alpha': 1.0929985893400092, 'critic_loss': 95.79112638506973, 'actor_loss': 40.26588455557126, 'time_step': 0.018667151356301113, 'td_error': 20.503492875040628, 'init_value': -61.59401321411133, 'ave_value': -38.33912626775401} step=2052
2022-04-20 20:37.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203632/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:37.19 [info     ] CQL_20220420203632: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.00038076632204111556, 'time_algorithm_update': 0.018188380358511943, 'temp_loss': 2.3215456754840607, 'temp': 0.9155811188165207, 'alpha_loss': 1.0792648664604851, 'alpha': 1.0900282744775738, 'critic_loss': 116.66664877551341, 'actor_loss': 45.661246561864665, 'time_step': 0.018671462410374692, 'td_error': 25.54424525856694, 'init_value': -69.57835388183594, 'ave_value': -42.52504380365683} step=2394
2022-04-20 20:37.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203632/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:37.25 [info     ] CQL_20220420203632: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.0003803515294839067, 'time_algorithm_update': 0.018119527582536665, 'temp_loss': 2.0785289514831633, 'temp': 0.9048202030839976, 'alpha_loss': 2.2184041916714077, 'alpha': 1.077441906719877, 'critic_loss': 136.71528498331705, 'actor_loss': 50.49557882163957, 'time_step': 0.01860119306553177, 'td_error': 29.457210347729855, 'init_value': -76.69544982910156, 'ave_value': -47.29555265509934} step=2736
2022-04-20 20:37.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203632/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:37.32 [info     ] CQL_20220420203632: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.0003832906310321295, 'time_algorithm_update': 0.018176462915208604, 'temp_loss': 1.888361146226961, 'temp': 0.8944534021511412, 'alpha_loss': 3.1163541046978787, 'alpha': 1.0541555027515568, 'critic_loss': 157.12393612331815, 'actor_loss': 54.92966958653857, 'time_step': 0.018660961535938998, 'td_error': 33.40829417082946, 'init_value': -82.8329849243164, 'ave_value': -51.06281678475104} step=3078
2022-04-20 20:37.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203632/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:37.38 [info     ] CQL_20220420203632: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.0003784874029326857, 'time_algorithm_update': 0.018061421070879664, 'temp_loss': 1.7136751022952341, 'temp': 0.8842382171349219, 'alpha_loss': 3.823817359076606, 'alpha': 1.0237791119960316, 'critic_loss': 177.32807418198612, 'actor_loss': 58.814920336182354, 'time_step': 0.018542696858010098, 'td_error': 37.15070399775243, 'init_value': -88.66056060791016, 'ave_value': -53.67215021351906} step=3420
2022-04-20 20:37.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203632/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:37.45 [info     ] CQL_20220420203632: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.0003850188171654417, 'time_algorithm_update': 0.018343233225638408, 'temp_loss': 1.5399783685890556, 'temp': 0.8743265977031306, 'alpha_loss': 4.411401814884609, 'alpha': 0.9885509645032604, 'critic_loss': 195.51926310578284, 'actor_loss': 62.210023768464026, 'time_step': 0.01883034957082648, 'td_error': 40.03809030461738, 'init_value': -92.46258544921875, 'ave_value': -56.549230115590866} step=3762
2022-04-20 20:37.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203632/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:37.52 [info     ] CQL_20220420203632: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.0003780147485565721, 'time_algorithm_update': 0.01809081208636189, 'temp_loss': 1.4135102528577659, 'temp': 0.8646814701152824, 'alpha_loss': 4.812062771696794, 'alpha': 0.9515454532110204, 'critic_loss': 213.7329492847822, 'actor_loss': 65.30854211773789, 'time_step': 0.018570285791542098, 'td_error': 41.891854181532615, 'init_value': -97.19548034667969, 'ave_value': -59.437173089802535} step=4104
2022-04-20 20:37.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203632/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:37.58 [info     ] CQL_20220420203632: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.0003826409055475603, 'time_algorithm_update': 0.018004718579744037, 'temp_loss': 1.3036037972796033, 'temp': 0.85507096550618, 'alpha_loss': 5.056020259160047, 'alpha': 0.9157375998315755, 'critic_loss': 231.75218718233165, 'actor_loss': 68.05192859270419, 'time_step': 0.01848964872416, 'td_error': 44.52467657408425, 'init_value': -99.99382781982422, 'ave_value': -61.9732583491672} step=4446
2022-04-20 20:37.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203632/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:38.05 [info     ] CQL_20220420203632: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.0003784874029326857, 'time_algorithm_update': 0.018093694720351904, 'temp_loss': 1.182825411620893, 'temp': 0.845564108320147, 'alpha_loss': 5.150409649687203, 'alpha': 0.8822912958978909, 'critic_loss': 247.5761599624366, 'actor_loss': 70.4781886875978, 'time_step': 0.018570152639645583, 'td_error': 45.27802460863729, 'init_value': -102.9162826538086, 'ave_value': -63.04023044444688} step=4788
2022-04-20 20:38.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203632/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:38.11 [info     ] CQL_20220420203632: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.0003796599761784425, 'time_algorithm_update': 0.018111335603814376, 'temp_loss': 1.1000193925628885, 'temp': 0.8362197753978752, 'alpha_loss': 5.2125391269985, 'alpha': 0.8504571309912274, 'critic_loss': 262.6487972415679, 'actor_loss': 72.84248707866111, 'time_step': 0.01859376235314977, 'td_error': 47.098828247917346, 'init_value': -107.167724609375, 'ave_value': -66.3440461597068} step=5130
2022-04-20 20:38.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203632/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:38.18 [info     ] CQL_20220420203632: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.0003779875604729903, 'time_algorithm_update': 0.0180385572868481, 'temp_loss': 0.9938692230055903, 'temp': 0.8270087534921211, 'alpha_loss': 5.311881991854885, 'alpha': 0.8203039075198927, 'critic_loss': 275.1651871441401, 'actor_loss': 74.74920705605669, 'time_step': 0.018518072819849202, 'td_error': 47.59505490342924, 'init_value': -110.6735610961914, 'ave_value': -68.53261888123244} step=5472
2022-04-20 20:38.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203632/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:38.25 [info     ] CQL_20220420203632: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.0003850927129823562, 'time_algorithm_update': 0.018174727060641462, 'temp_loss': 0.8925906892931252, 'temp': 0.8179817466359389, 'alpha_loss': 5.425952371798064, 'alpha': 0.7913046203858671, 'critic_loss': 287.10879953841714, 'actor_loss': 76.54717022633692, 'time_step': 0.018661753476014613, 'td_error': 49.50236705050957, 'init_value': -114.6285629272461, 'ave_value': -69.94351326320219} step=5814
2022-04-20 20:38.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203632/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:38.31 [info     ] CQL_20220420203632: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.000382506359390348, 'time_algorithm_update': 0.01802061384881449, 'temp_loss': 0.8512152383724848, 'temp': 0.8090474026942114, 'alpha_loss': 5.31325120849219, 'alpha': 0.7638413201995761, 'critic_loss': 298.9255089118467, 'actor_loss': 78.31150541249771, 'time_step': 0.018504020066289175, 'td_error': 48.46190689994329, 'init_value': -115.14595794677734, 'ave_value': -71.15967249173369} step=6156
2022-04-20 20:38.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203632/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:38.38 [info     ] CQL_20220420203632: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.00037852225945009824, 'time_algorithm_update': 0.018206920540123656, 'temp_loss': 0.7716915539482183, 'temp': 0.8001834694405048, 'alpha_loss': 5.344809030231676, 'alpha': 0.7379645658166785, 'critic_loss': 308.95275093658626, 'actor_loss': 79.7545994764183, 'time_step': 0.018686792306732713, 'td_error': 47.31524806285537, 'init_value': -115.99674987792969, 'ave_value': -71.78096827685296} step=6498
2022-04-20 20:38.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203632/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:38.44 [info     ] CQL_20220420203632: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.0003814655437804105, 'time_algorithm_update': 0.018159415986802842, 'temp_loss': 0.7081235578795623, 'temp': 0.7912926952741299, 'alpha_loss': 5.358596175037629, 'alpha': 0.7131420307689242, 'critic_loss': 319.2792413722702, 'actor_loss': 81.1312214923881, 'time_step': 0.018640908581471584, 'td_error': 47.76030346079302, 'init_value': -117.1462631225586, 'ave_value': -72.92996453677178} step=6840
2022-04-20 20:38.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203632/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:38.51 [info     ] CQL_20220420203632: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.000380391265913757, 'time_algorithm_update': 0.018012332637407626, 'temp_loss': 0.6457508028599254, 'temp': 0.7827809583722499, 'alpha_loss': 5.398145242044103, 'alpha': 0.6890275861784728, 'critic_loss': 328.1889353969641, 'actor_loss': 82.3557959885625, 'time_step': 0.018492699366563944, 'td_error': 48.95968733180088, 'init_value': -121.99015808105469, 'ave_value': -74.3095580240923} step=7182
2022-04-20 20:38.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203632/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:38.58 [info     ] CQL_20220420203632: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.00037800847438343785, 'time_algorithm_update': 0.018263772217153806, 'temp_loss': 0.5772173680931504, 'temp': 0.7742915629294881, 'alpha_loss': 5.378760696851719, 'alpha': 0.6658303192135884, 'critic_loss': 337.813923529017, 'actor_loss': 83.50677425540678, 'time_step': 0.01874508007228026, 'td_error': 48.29539033984476, 'init_value': -122.87663269042969, 'ave_value': -76.42306770049505} step=7524
2022-04-20 20:38.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203632/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:39.04 [info     ] CQL_20220420203632: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.0003857807806360791, 'time_algorithm_update': 0.018087348045661436, 'temp_loss': 0.5201781520957661, 'temp': 0.7663133414516672, 'alpha_loss': 5.2893488476848045, 'alpha': 0.6437862072771753, 'critic_loss': 346.89719323944627, 'actor_loss': 84.47499648869386, 'time_step': 0.018575961129707202, 'td_error': 49.28585677799792, 'init_value': -122.87004089355469, 'ave_value': -76.04496429798537} step=7866
2022-04-20 20:39.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203632/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:39.11 [info     ] CQL_20220420203632: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.0003787815919396473, 'time_algorithm_update': 0.018102507842214483, 'temp_loss': 0.44096966635174395, 'temp': 0.7586692850492154, 'alpha_loss': 5.280253636906719, 'alpha': 0.6225717240258267, 'critic_loss': 355.90086587827807, 'actor_loss': 85.35990461829113, 'time_step': 0.018584123828954864, 'td_error': 49.96567759816364, 'init_value': -122.49964904785156, 'ave_value': -76.62417355839264} step=8208
2022-04-20 20:39.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203632/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:39.18 [info     ] CQL_20220420203632: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.0003788094771535773, 'time_algorithm_update': 0.018207415502670913, 'temp_loss': 0.40203129993588255, 'temp': 0.7512839276191087, 'alpha_loss': 5.194500834621184, 'alpha': 0.6021003852113646, 'critic_loss': 363.0078176754957, 'actor_loss': 86.18843393158494, 'time_step': 0.01868647092964217, 'td_error': 49.93513144335248, 'init_value': -125.27288818359375, 'ave_value': -77.53561830630174} step=8550
2022-04-20 20:39.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203632/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:39.24 [info     ] CQL_20220420203632: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.0003777282279834413, 'time_algorithm_update': 0.01807344656938698, 'temp_loss': 0.3605228297368825, 'temp': 0.7441585874348356, 'alpha_loss': 5.12414548829285, 'alpha': 0.5826559246283526, 'critic_loss': 371.00917927702966, 'actor_loss': 86.8070953547606, 'time_step': 0.018551470243442825, 'td_error': 48.36677382470479, 'init_value': -127.43266296386719, 'ave_value': -78.4115230287645} step=8892
2022-04-20 20:39.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203632/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:39.31 [info     ] CQL_20220420203632: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.000384376062984355, 'time_algorithm_update': 0.018038281920360544, 'temp_loss': 0.3433389776380874, 'temp': 0.7369328479320683, 'alpha_loss': 5.036054913063495, 'alpha': 0.5638073121595104, 'critic_loss': 378.2944473355834, 'actor_loss': 87.53385798136394, 'time_step': 0.018524904697262055, 'td_error': 47.781984839203076, 'init_value': -126.6159439086914, 'ave_value': -78.50193543159143} step=9234
2022-04-20 20:39.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203632/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:39.37 [info     ] CQL_20220420203632: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.00037141571268003586, 'time_algorithm_update': 0.017968568188405178, 'temp_loss': 0.29562368717218873, 'temp': 0.7299587554053256, 'alpha_loss': 4.944807120931078, 'alpha': 0.5456961661751507, 'critic_loss': 384.9199230350249, 'actor_loss': 88.12156505473176, 'time_step': 0.018442264077259084, 'td_error': 49.08331879502853, 'init_value': -128.11013793945312, 'ave_value': -79.58099211890418} step=9576
2022-04-20 20:39.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203632/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:39.44 [info     ] CQL_20220420203632: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.0003763451213725129, 'time_algorithm_update': 0.018039130327994365, 'temp_loss': 0.2558073635534411, 'temp': 0.7233622628345824, 'alpha_loss': 4.862054696557117, 'alpha': 0.528348788183335, 'critic_loss': 390.38091416386834, 'actor_loss': 88.50537575615778, 'time_step': 0.018522171249166566, 'td_error': 47.82699897569235, 'init_value': -127.888427734375, 'ave_value': -80.63671747676425} step=9918
2022-04-20 20:39.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203632/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:39.51 [info     ] CQL_20220420203632: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.00038337777232566074, 'time_algorithm_update': 0.018227988516378125, 'temp_loss': 0.2585634341387198, 'temp': 0.7169046584974256, 'alpha_loss': 4.675438438242639, 'alpha': 0.5116185878801067, 'critic_loss': 395.84461421855013, 'actor_loss': 88.94036309760914, 'time_step': 0.018711486057928432, 'td_error': 49.070304235856284, 'init_value': -129.55612182617188, 'ave_value': -80.43391150389638} step=10260
2022-04-20 20:39.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203632/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:39.57 [info     ] CQL_20220420203632: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.0003788729160152681, 'time_algorithm_update': 0.018025783767477113, 'temp_loss': 0.23648719354505426, 'temp': 0.7100825766373796, 'alpha_loss': 4.594998195157413, 'alpha': 0.4956626114789505, 'critic_loss': 399.9917328594721, 'actor_loss': 89.34711822152835, 'time_step': 0.018507877985636394, 'td_error': 45.61305230011932, 'init_value': -125.16300201416016, 'ave_value': -79.17708339756405} step=10602
2022-04-20 20:39.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203632/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:40.04 [info     ] CQL_20220420203632: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.0003807893273426078, 'time_algorithm_update': 0.017865047120211416, 'temp_loss': 0.1939850030630304, 'temp': 0.7039969148691635, 'alpha_loss': 4.470460995596055, 'alpha': 0.48018409608051793, 'critic_loss': 403.1177609984638, 'actor_loss': 89.50679752282929, 'time_step': 0.018347353265996565, 'td_error': 47.577580696099915, 'init_value': -128.29576110839844, 'ave_value': -79.48660827048727} step=10944
2022-04-20 20:40.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203632/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:40.10 [info     ] CQL_20220420203632: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.0003814718179535448, 'time_algorithm_update': 0.017965426221925613, 'temp_loss': 0.1576771431177724, 'temp': 0.6984593918797566, 'alpha_loss': 4.3548114348573295, 'alpha': 0.4652902758435199, 'critic_loss': 405.72362121225103, 'actor_loss': 89.68622109485649, 'time_step': 0.018448755057931643, 'td_error': 44.942011529647125, 'init_value': -128.05831909179688, 'ave_value': -80.62235646231687} step=11286
2022-04-20 20:40.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203632/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:40.17 [info     ] CQL_20220420203632: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.00037410524156358506, 'time_algorithm_update': 0.018216195162276776, 'temp_loss': 0.13972554276887958, 'temp': 0.6933935023190683, 'alpha_loss': 4.239853366773728, 'alpha': 0.4509525195374126, 'critic_loss': 408.9251821417558, 'actor_loss': 89.89135358486956, 'time_step': 0.01869239096055951, 'td_error': 49.08423597120378, 'init_value': -128.76126098632812, 'ave_value': -81.4017692865183} step=11628
2022-04-20 20:40.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203632/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:40.23 [info     ] CQL_20220420203632: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.0003622059236493027, 'time_algorithm_update': 0.01707034571128979, 'temp_loss': 0.12771810573247963, 'temp': 0.6885898636098493, 'alpha_loss': 4.136067550781875, 'alpha': 0.4370200954682646, 'critic_loss': 410.75466205083836, 'actor_loss': 90.0105925889043, 'time_step': 0.01752817282202648, 'td_error': 47.75978371680468, 'init_value': -128.7679443359375, 'ave_value': -81.39869723330679} step=11970
2022-04-20 20:40.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203632/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:40.29 [info     ] CQL_20220420203632: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.0003591748008951109, 'time_algorithm_update': 0.01718170810164067, 'temp_loss': 0.11750981228545918, 'temp': 0.6840832702249114, 'alpha_loss': 3.9946348667144775, 'alpha': 0.4235750136145374, 'critic_loss': 411.51961432562933, 'actor_loss': 90.10934138158609, 'time_step': 0.017640242102550486, 'td_error': 47.914368954597684, 'init_value': -127.6486587524414, 'ave_value': -81.09360845315832} step=12312
2022-04-20 20:40.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203632/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:40.36 [info     ] CQL_20220420203632: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.0003773385321187694, 'time_algorithm_update': 0.018137005337497646, 'temp_loss': 0.09857425886091956, 'temp': 0.6798847593062105, 'alpha_loss': 3.8764137880146854, 'alpha': 0.41067829257563543, 'critic_loss': 412.7149918762564, 'actor_loss': 90.15916672645257, 'time_step': 0.018614827540882846, 'td_error': 48.278115565674966, 'init_value': -127.70140075683594, 'ave_value': -81.06543760963321} step=12654
2022-04-20 20:40.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203632/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:40.42 [info     ] CQL_20220420203632: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.00038525305296245376, 'time_algorithm_update': 0.01800218799657989, 'temp_loss': 0.10082839700605786, 'temp': 0.6757391011505797, 'alpha_loss': 3.6977609824716, 'alpha': 0.39824138991316854, 'critic_loss': 413.48868029856544, 'actor_loss': 90.24680796840735, 'time_step': 0.018486902030587893, 'td_error': 43.85653914144505, 'init_value': -124.2308578491211, 'ave_value': -80.4470784519982} step=12996
2022-04-20 20:40.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203632/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:40.49 [info     ] CQL_20220420203632: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.00038024556567097266, 'time_algorithm_update': 0.01803749067741528, 'temp_loss': 0.08397865977290778, 'temp': 0.6721924496324438, 'alpha_loss': 3.535769715992331, 'alpha': 0.38631541312438006, 'critic_loss': 413.1303315636707, 'actor_loss': 90.09371859009502, 'time_step': 0.018516012102539777, 'td_error': 46.19742039818591, 'init_value': -127.3469009399414, 'ave_value': -82.07755068180126} step=13338
2022-04-20 20:40.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203632/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:40.56 [info     ] CQL_20220420203632: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.00038188870190179833, 'time_algorithm_update': 0.01799634325574016, 'temp_loss': 0.07280903254031089, 'temp': 0.6683598692305603, 'alpha_loss': 3.408925599870626, 'alpha': 0.3749016159982012, 'critic_loss': 411.2787293551261, 'actor_loss': 90.01926966438516, 'time_step': 0.018481299193979006, 'td_error': 43.2461311942333, 'init_value': -123.51933288574219, 'ave_value': -80.42397898625717} step=13680
2022-04-20 20:40.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203632/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:41.02 [info     ] CQL_20220420203632: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.00037939297525506273, 'time_algorithm_update': 0.01795375416850486, 'temp_loss': 0.05252674493219769, 'temp': 0.6655234004321852, 'alpha_loss': 3.3187767967843174, 'alpha': 0.3638003286388185, 'critic_loss': 410.039429424799, 'actor_loss': 90.00143501772519, 'time_step': 0.018434585883603458, 'td_error': 43.66797359540613, 'init_value': -124.8774642944336, 'ave_value': -80.83396253395456} step=14022
2022-04-20 20:41.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203632/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:41.09 [info     ] CQL_20220420203632: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.00037368138631184894, 'time_algorithm_update': 0.01790850064907855, 'temp_loss': 0.06135742408250682, 'temp': 0.6624722466831319, 'alpha_loss': 3.172977560793447, 'alpha': 0.35297847151407724, 'critic_loss': 407.27200121070905, 'actor_loss': 89.93229371901842, 'time_step': 0.018384862364384167, 'td_error': 43.96478265688287, 'init_value': -121.56541442871094, 'ave_value': -80.35876398573353} step=14364
2022-04-20 20:41.09 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203632/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:41.15 [info     ] CQL_20220420203632: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.00037958120044909025, 'time_algorithm_update': 0.017943249111287078, 'temp_loss': 0.047467860077338955, 'temp': 0.6600809879818855, 'alpha_loss': 3.075010413314864, 'alpha': 0.34254477029306846, 'critic_loss': 405.52011242247465, 'actor_loss': 89.72283928854424, 'time_step': 0.01842410243742647, 'td_error': 44.92489612560252, 'init_value': -123.05140686035156, 'ave_value': -81.32976812589544} step=14706
2022-04-20 20:41.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203632/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:41.22 [info     ] CQL_20220420203632: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.00037749468931677746, 'time_algorithm_update': 0.018098160537362795, 'temp_loss': 0.05444481278829581, 'temp': 0.6579402659023017, 'alpha_loss': 2.9487018654918113, 'alpha': 0.3323342273806968, 'critic_loss': 402.7301633065207, 'actor_loss': 89.59811742682206, 'time_step': 0.018577009613750972, 'td_error': 43.84975344102103, 'init_value': -121.6536865234375, 'ave_value': -80.66142940907562} step=15048
2022-04-20 20:41.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203632/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:41.28 [info     ] CQL_20220420203632: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.00037119890514173006, 'time_algorithm_update': 0.017883990940294768, 'temp_loss': 0.03738582165290912, 'temp': 0.6553650569846059, 'alpha_loss': 2.8399090453198084, 'alpha': 0.32255557205593377, 'critic_loss': 401.00089937622784, 'actor_loss': 89.44122992621527, 'time_step': 0.018356125954298946, 'td_error': 43.43728280377089, 'init_value': -118.2171859741211, 'ave_value': -79.98523853163529} step=15390
2022-04-20 20:41.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203632/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:41.35 [info     ] CQL_20220420203632: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.00037633745293868214, 'time_algorithm_update': 0.017901404559263707, 'temp_loss': 0.03280949972199592, 'temp': 0.6534743697671165, 'alpha_loss': 2.7366446904968798, 'alpha': 0.31307927863291135, 'critic_loss': 397.74877653066176, 'actor_loss': 89.25608917147096, 'time_step': 0.018375660243787263, 'td_error': 43.46423231367271, 'init_value': -118.6242904663086, 'ave_value': -80.25326140091796} step=15732
2022-04-20 20:41.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203632/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:41.42 [info     ] CQL_20220420203632: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.0003812877755416067, 'time_algorithm_update': 0.01825804110856084, 'temp_loss': 0.051964487265032985, 'temp': 0.6515494736663082, 'alpha_loss': 2.6620735052036264, 'alpha': 0.30376381501119737, 'critic_loss': 394.9845056366502, 'actor_loss': 89.14527366593568, 'time_step': 0.018740048882556936, 'td_error': 40.55889451451674, 'init_value': -116.19227600097656, 'ave_value': -79.13432486004623} step=16074
2022-04-20 20:41.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203632/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:41.48 [info     ] CQL_20220420203632: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.00035325267858672563, 'time_algorithm_update': 0.01687957180870904, 'temp_loss': 0.0585066748085862, 'temp': 0.6485573177100622, 'alpha_loss': 2.5685497079676356, 'alpha': 0.29478026281671915, 'critic_loss': 394.68319318447897, 'actor_loss': 89.03299119737413, 'time_step': 0.01732610749919512, 'td_error': 43.56213618078536, 'init_value': -118.7022933959961, 'ave_value': -79.84187132888852} step=16416
2022-04-20 20:41.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203632/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:41.54 [info     ] CQL_20220420203632: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.00035663585216678376, 'time_algorithm_update': 0.017204741288346852, 'temp_loss': 0.05265435750176858, 'temp': 0.6464297808053201, 'alpha_loss': 2.4517554727911253, 'alpha': 0.28603223733037536, 'critic_loss': 393.9214287473444, 'actor_loss': 88.81920958401864, 'time_step': 0.01765732807025575, 'td_error': 43.162294507778455, 'init_value': -116.71888732910156, 'ave_value': -78.8653215247339} step=16758
2022-04-20 20:41.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203632/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:42.01 [info     ] CQL_20220420203632: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.00037304351204320004, 'time_algorithm_update': 0.01833211608797486, 'temp_loss': 0.06001566866772216, 'temp': 0.6436613562859987, 'alpha_loss': 2.3625583073549103, 'alpha': 0.2776247002402244, 'critic_loss': 393.1939351042809, 'actor_loss': 88.63320420917712, 'time_step': 0.01880972148382176, 'td_error': 43.42670536326623, 'init_value': -116.7887954711914, 'ave_value': -79.76198019913188} step=17100
2022-04-20 20:42.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420203632/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.5191

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 20:42.01 [info     ] FQE_20220420204201: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00014881628105439335, 'time_algorithm_update': 0.0020906077810080655, 'loss': 0.007687475574371147, 'time_step': 0.0023083758641438312, 'init_value': 0.16347463428974152, 'ave_value': 0.19632000806783367, 'soft_opc': nan} step=166




2022-04-20 20:42.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204201/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:42.02 [info     ] FQE_20220420204201: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.0001473900783492858, 'time_algorithm_update': 0.002149298966649067, 'loss': 0.00614944594794413, 'time_step': 0.0023601744548383966, 'init_value': 0.025763938203454018, 'ave_value': 0.11817596180632979, 'soft_opc': nan} step=332




2022-04-20 20:42.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204201/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:42.02 [info     ] FQE_20220420204201: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.0001523336732243917, 'time_algorithm_update': 0.0020819485905658767, 'loss': 0.005641789723984359, 'time_step': 0.0023037238293383494, 'init_value': -0.034033484756946564, 'ave_value': 0.08754750007495016, 'soft_opc': nan} step=498




2022-04-20 20:42.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204201/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:42.03 [info     ] FQE_20220420204201: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00014920694282255978, 'time_algorithm_update': 0.0020680154662534415, 'loss': 0.005548002672415361, 'time_step': 0.0022848916341023273, 'init_value': -0.12378165125846863, 'ave_value': 0.047088463504605736, 'soft_opc': nan} step=664




2022-04-20 20:42.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204201/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:42.03 [info     ] FQE_20220420204201: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00014939796493714116, 'time_algorithm_update': 0.0021152539425585643, 'loss': 0.005077768546383244, 'time_step': 0.002331061535570995, 'init_value': -0.19900302588939667, 'ave_value': 0.0253300422617981, 'soft_opc': nan} step=830




2022-04-20 20:42.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204201/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:42.03 [info     ] FQE_20220420204201: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.0001481053340865905, 'time_algorithm_update': 0.002107628856796816, 'loss': 0.004667667935725795, 'time_step': 0.0023219729044351234, 'init_value': -0.23437240719795227, 'ave_value': 0.002161451210981017, 'soft_opc': nan} step=996




2022-04-20 20:42.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204201/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:42.04 [info     ] FQE_20220420204201: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00015018072472997458, 'time_algorithm_update': 0.002074458512915186, 'loss': 0.004627367502628231, 'time_step': 0.0022943034229508364, 'init_value': -0.2979277968406677, 'ave_value': -0.007422186753100103, 'soft_opc': nan} step=1162




2022-04-20 20:42.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204201/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:42.04 [info     ] FQE_20220420204201: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.0001512392457709255, 'time_algorithm_update': 0.0020821223776024507, 'loss': 0.004263495362969677, 'time_step': 0.002304281096860587, 'init_value': -0.36977168917655945, 'ave_value': -0.045604376504952845, 'soft_opc': nan} step=1328




2022-04-20 20:42.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204201/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:42.05 [info     ] FQE_20220420204201: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00015158538358757295, 'time_algorithm_update': 0.0021044044609529427, 'loss': 0.003915486876931356, 'time_step': 0.0023289157683590808, 'init_value': -0.38466697931289673, 'ave_value': -0.038420459391498886, 'soft_opc': nan} step=1494




2022-04-20 20:42.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204201/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:42.05 [info     ] FQE_20220420204201: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.0001504765935691006, 'time_algorithm_update': 0.002157909324370235, 'loss': 0.004051336900089833, 'time_step': 0.0023754705865699126, 'init_value': -0.46339550614356995, 'ave_value': -0.0911874718796294, 'soft_opc': nan} step=1660




2022-04-20 20:42.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204201/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:42.06 [info     ] FQE_20220420204201: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.0001523939959974174, 'time_algorithm_update': 0.00216360264513866, 'loss': 0.003910695423557232, 'time_step': 0.0023884026401014215, 'init_value': -0.5038225650787354, 'ave_value': -0.09999837032645136, 'soft_opc': nan} step=1826




2022-04-20 20:42.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204201/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:42.06 [info     ] FQE_20220420204201: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00015229345804237458, 'time_algorithm_update': 0.002115669020687241, 'loss': 0.003956513832805745, 'time_step': 0.0023402018719408885, 'init_value': -0.5561982989311218, 'ave_value': -0.13807822262012476, 'soft_opc': nan} step=1992




2022-04-20 20:42.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204201/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:42.06 [info     ] FQE_20220420204201: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00015072219343070523, 'time_algorithm_update': 0.0021155095962156734, 'loss': 0.004153115026854786, 'time_step': 0.002333636743476592, 'init_value': -0.6095092296600342, 'ave_value': -0.168302450662999, 'soft_opc': nan} step=2158




2022-04-20 20:42.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204201/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:42.07 [info     ] FQE_20220420204201: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00015065468937517648, 'time_algorithm_update': 0.0021249400563986905, 'loss': 0.004097792682524052, 'time_step': 0.002343640270003353, 'init_value': -0.6533358097076416, 'ave_value': -0.18888352010063492, 'soft_opc': nan} step=2324




2022-04-20 20:42.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204201/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:42.07 [info     ] FQE_20220420204201: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00015047372105609938, 'time_algorithm_update': 0.002104635698249541, 'loss': 0.0041663064684505655, 'time_step': 0.0023291125354996645, 'init_value': -0.6889719367027283, 'ave_value': -0.208169168906773, 'soft_opc': nan} step=2490




2022-04-20 20:42.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204201/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:42.08 [info     ] FQE_20220420204201: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00015751855919160038, 'time_algorithm_update': 0.0022186198866510965, 'loss': 0.004476686311234911, 'time_step': 0.002446744815412774, 'init_value': -0.7493218183517456, 'ave_value': -0.2599941505530329, 'soft_opc': nan} step=2656




2022-04-20 20:42.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204201/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:42.08 [info     ] FQE_20220420204201: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.0001480450113135648, 'time_algorithm_update': 0.0020335208938782475, 'loss': 0.004807561964879419, 'time_step': 0.0022496918597853326, 'init_value': -0.8601689338684082, 'ave_value': -0.3444435176380851, 'soft_opc': nan} step=2822




2022-04-20 20:42.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204201/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:42.09 [info     ] FQE_20220420204201: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00014815847557711313, 'time_algorithm_update': 0.002086207091090191, 'loss': 0.0051990971565302685, 'time_step': 0.002306337816169463, 'init_value': -0.8461145162582397, 'ave_value': -0.32509786744039876, 'soft_opc': nan} step=2988




2022-04-20 20:42.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204201/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:42.09 [info     ] FQE_20220420204201: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.0001475839729768684, 'time_algorithm_update': 0.0020627774387957103, 'loss': 0.005798121215775609, 'time_step': 0.002281206199921757, 'init_value': -0.9235219955444336, 'ave_value': -0.38489088322547776, 'soft_opc': nan} step=3154




2022-04-20 20:42.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204201/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:42.10 [info     ] FQE_20220420204201: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00015508266816656273, 'time_algorithm_update': 0.0020987657179315404, 'loss': 0.0061460552443974616, 'time_step': 0.0023208282080041357, 'init_value': -0.9250497817993164, 'ave_value': -0.3943192852585501, 'soft_opc': nan} step=3320




2022-04-20 20:42.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204201/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:42.10 [info     ] FQE_20220420204201: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00015117317797189736, 'time_algorithm_update': 0.0020828993923692816, 'loss': 0.006689969068028437, 'time_step': 0.002302444124796304, 'init_value': -0.9738432168960571, 'ave_value': -0.4143686923659868, 'soft_opc': nan} step=3486




2022-04-20 20:42.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204201/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:42.10 [info     ] FQE_20220420204201: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.0001494166362716491, 'time_algorithm_update': 0.0020915514015289673, 'loss': 0.007135840864285423, 'time_step': 0.0023068232708666698, 'init_value': -0.9882068037986755, 'ave_value': -0.41283911169917736, 'soft_opc': nan} step=3652




2022-04-20 20:42.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204201/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:42.11 [info     ] FQE_20220420204201: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00015512144709207924, 'time_algorithm_update': 0.0021663573851068334, 'loss': 0.007802103343139212, 'time_step': 0.002389857567936541, 'init_value': -1.0183334350585938, 'ave_value': -0.4541337062193601, 'soft_opc': nan} step=3818




2022-04-20 20:42.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204201/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:42.11 [info     ] FQE_20220420204201: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.0001501333282654544, 'time_algorithm_update': 0.0020987326840320266, 'loss': 0.008736828454287655, 'time_step': 0.0023148763610656, 'init_value': -1.0961251258850098, 'ave_value': -0.49942903132891064, 'soft_opc': nan} step=3984




2022-04-20 20:42.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204201/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:42.12 [info     ] FQE_20220420204201: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00015327585748879305, 'time_algorithm_update': 0.002101409866149167, 'loss': 0.009196236318661776, 'time_step': 0.002325356724750565, 'init_value': -1.1031224727630615, 'ave_value': -0.4877289762527489, 'soft_opc': nan} step=4150




2022-04-20 20:42.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204201/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:42.12 [info     ] FQE_20220420204201: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00014894698039594903, 'time_algorithm_update': 0.002115814082593803, 'loss': 0.009963451123779572, 'time_step': 0.002336951623480004, 'init_value': -1.1257152557373047, 'ave_value': -0.5116607131436467, 'soft_opc': nan} step=4316




2022-04-20 20:42.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204201/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:42.13 [info     ] FQE_20220420204201: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00015099364590932088, 'time_algorithm_update': 0.0021342197096491434, 'loss': 0.01059514709751133, 'time_step': 0.0023543576160109186, 'init_value': -1.1585865020751953, 'ave_value': -0.5310856034903711, 'soft_opc': nan} step=4482




2022-04-20 20:42.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204201/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:42.13 [info     ] FQE_20220420204201: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00014847301575074713, 'time_algorithm_update': 0.0021226147571242, 'loss': 0.011676291822237018, 'time_step': 0.0023374112255601996, 'init_value': -1.2135000228881836, 'ave_value': -0.57654161270926, 'soft_opc': nan} step=4648




2022-04-20 20:42.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204201/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:42.13 [info     ] FQE_20220420204201: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00014837965907820737, 'time_algorithm_update': 0.002071103417729757, 'loss': 0.01211453943530165, 'time_step': 0.0022900923188910426, 'init_value': -1.2267404794692993, 'ave_value': -0.571895267245536, 'soft_opc': nan} step=4814




2022-04-20 20:42.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204201/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:42.14 [info     ] FQE_20220420204201: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00015283779925610646, 'time_algorithm_update': 0.0020984712853489153, 'loss': 0.013117485397764627, 'time_step': 0.0023209703973976962, 'init_value': -1.2720129489898682, 'ave_value': -0.6332756834289244, 'soft_opc': nan} step=4980




2022-04-20 20:42.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204201/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:42.14 [info     ] FQE_20220420204201: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00015439470130276968, 'time_algorithm_update': 0.0020774215100759484, 'loss': 0.01468318328401635, 'time_step': 0.0023005525749849984, 'init_value': -1.4192776679992676, 'ave_value': -0.7465269053354859, 'soft_opc': nan} step=5146




2022-04-20 20:42.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204201/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:42.15 [info     ] FQE_20220420204201: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00015136132757347752, 'time_algorithm_update': 0.002097743103303105, 'loss': 0.015289369029419339, 'time_step': 0.0023211585469992764, 'init_value': -1.3884245157241821, 'ave_value': -0.6922337960468622, 'soft_opc': nan} step=5312




2022-04-20 20:42.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204201/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:42.15 [info     ] FQE_20220420204201: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00015003279031041157, 'time_algorithm_update': 0.002126666436712426, 'loss': 0.01637677554495588, 'time_step': 0.00234670811388866, 'init_value': -1.4576829671859741, 'ave_value': -0.7681954685917443, 'soft_opc': nan} step=5478




2022-04-20 20:42.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204201/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:42.16 [info     ] FQE_20220420204201: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.0001551286283745823, 'time_algorithm_update': 0.002165530101362481, 'loss': 0.016990236334884203, 'time_step': 0.002390737993171416, 'init_value': -1.4751323461532593, 'ave_value': -0.8007861372385476, 'soft_opc': nan} step=5644




2022-04-20 20:42.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204201/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:42.16 [info     ] FQE_20220420204201: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.0001563293388090938, 'time_algorithm_update': 0.0021680449864950524, 'loss': 0.017810038917877496, 'time_step': 0.002395611211477992, 'init_value': -1.5373058319091797, 'ave_value': -0.8636763408343803, 'soft_opc': nan} step=5810




2022-04-20 20:42.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204201/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:42.16 [info     ] FQE_20220420204201: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00015416346400617118, 'time_algorithm_update': 0.0021337988864944643, 'loss': 0.018823890093326884, 'time_step': 0.0023603898933134883, 'init_value': -1.5454038381576538, 'ave_value': -0.863653581435079, 'soft_opc': nan} step=5976




2022-04-20 20:42.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204201/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:42.17 [info     ] FQE_20220420204201: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00015143314039850808, 'time_algorithm_update': 0.0020563789160854846, 'loss': 0.02012614565494706, 'time_step': 0.002277108560125512, 'init_value': -1.6070435047149658, 'ave_value': -0.9465396962086629, 'soft_opc': nan} step=6142




2022-04-20 20:42.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204201/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:42.17 [info     ] FQE_20220420204201: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00015729881194700678, 'time_algorithm_update': 0.0021359403449368766, 'loss': 0.021402994318758643, 'time_step': 0.00236382111009345, 'init_value': -1.5899896621704102, 'ave_value': -0.9041429948086875, 'soft_opc': nan} step=6308




2022-04-20 20:42.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204201/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:42.18 [info     ] FQE_20220420204201: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.0001521527049053146, 'time_algorithm_update': 0.0021138737000614763, 'loss': 0.0222248479763203, 'time_step': 0.0023316805621227585, 'init_value': -1.6398085355758667, 'ave_value': -0.9352300415839161, 'soft_opc': nan} step=6474




2022-04-20 20:42.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204201/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:42.18 [info     ] FQE_20220420204201: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00015325287738478328, 'time_algorithm_update': 0.0020719594266041218, 'loss': 0.0235380008592304, 'time_step': 0.00229212749435241, 'init_value': -1.6349679231643677, 'ave_value': -0.9112935301537315, 'soft_opc': nan} step=6640




2022-04-20 20:42.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204201/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:42.19 [info     ] FQE_20220420204201: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00015517602483910252, 'time_algorithm_update': 0.0021087275930197842, 'loss': 0.02415289077557152, 'time_step': 0.0023345444575849786, 'init_value': -1.7191100120544434, 'ave_value': -1.0030370154199904, 'soft_opc': nan} step=6806




2022-04-20 20:42.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204201/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:42.19 [info     ] FQE_20220420204201: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.0001523939959974174, 'time_algorithm_update': 0.0021780327142003073, 'loss': 0.02627253008190071, 'time_step': 0.0023993943111006036, 'init_value': -1.739930272102356, 'ave_value': -1.0495301581080163, 'soft_opc': nan} step=6972




2022-04-20 20:42.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204201/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:42.20 [info     ] FQE_20220420204201: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.0001551516084785921, 'time_algorithm_update': 0.0021478957440479694, 'loss': 0.02700780979315582, 'time_step': 0.0023751890802957924, 'init_value': -1.6927697658538818, 'ave_value': -0.9867945462213578, 'soft_opc': nan} step=7138




2022-04-20 20:42.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204201/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:42.20 [info     ] FQE_20220420204201: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00015073799225221197, 'time_algorithm_update': 0.0020832067512604126, 'loss': 0.02873920647256323, 'time_step': 0.0023040196981774755, 'init_value': -1.851493000984192, 'ave_value': -1.1166967203890956, 'soft_opc': nan} step=7304




2022-04-20 20:42.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204201/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:42.20 [info     ] FQE_20220420204201: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.0001500141189759036, 'time_algorithm_update': 0.002099193722368723, 'loss': 0.029871378190736902, 'time_step': 0.00232273986540645, 'init_value': -1.7930490970611572, 'ave_value': -1.032356134283583, 'soft_opc': nan} step=7470




2022-04-20 20:42.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204201/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:42.21 [info     ] FQE_20220420204201: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00015491318989949054, 'time_algorithm_update': 0.002165024539074266, 'loss': 0.03176936194306262, 'time_step': 0.0023936622114066617, 'init_value': -1.82808518409729, 'ave_value': -1.0548646761658225, 'soft_opc': nan} step=7636




2022-04-20 20:42.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204201/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:42.21 [info     ] FQE_20220420204201: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00015413617513265954, 'time_algorithm_update': 0.0021095620580466397, 'loss': 0.03256713073554797, 'time_step': 0.00233349168157003, 'init_value': -1.98430335521698, 'ave_value': -1.229845508555504, 'soft_opc': nan} step=7802




2022-04-20 20:42.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204201/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:42.22 [info     ] FQE_20220420204201: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00015483275953545627, 'time_algorithm_update': 0.0021261149142161907, 'loss': 0.035378230804386714, 'time_step': 0.0023518527846738517, 'init_value': -1.971974492073059, 'ave_value': -1.2165186383702733, 'soft_opc': nan} step=7968




2022-04-20 20:42.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204201/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:42.22 [info     ] FQE_20220420204201: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00015322846102427287, 'time_algorithm_update': 0.0021275612245123072, 'loss': 0.036543593232811665, 'time_step': 0.002352612564362675, 'init_value': -1.9824951887130737, 'ave_value': -1.1936056672494757, 'soft_opc': nan} step=8134




2022-04-20 20:42.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204201/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:42.23 [info     ] FQE_20220420204201: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00015807439045733716, 'time_algorithm_update': 0.0021443639893129647, 'loss': 0.038092728709824476, 'time_step': 0.0023728063307612776, 'init_value': -1.8939176797866821, 'ave_value': -1.1227988684214316, 'soft_opc': nan} step=8300




2022-04-20 20:42.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204201/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

2022-04-20 20:42.23 [info     ] Directory is created at d3rlpy_logs/FQE_20220420204223
2022-04-20 20:42.23 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 20:42.23 [debug    ] Building models...
2022-04-20 20:42.23 [debug    ] Models have been built.
2022-04-20 20:42.23 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420204223/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 20:42.24 [info     ] FQE_20220420204223: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.0001564642717671949, 'time_algorithm_update': 0.0020939686963724535, 'loss': 0.025408314018466965, 'time_step': 0.002319954855497493, 'init_value': -1.1049385070800781, 'ave_value': -1.1409545322125023, 'soft_opc': nan} step=344




2022-04-20 20:42.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204223/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:42.25 [info     ] FQE_20220420204223: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.0001551668311274329, 'time_algorithm_update': 0.0020945557328157648, 'loss': 0.022460172063844323, 'time_step': 0.0023204067418741625, 'init_value': -1.4907748699188232, 'ave_value': -1.5577135792253791, 'soft_opc': nan} step=688




2022-04-20 20:42.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204223/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:42.26 [info     ] FQE_20220420204223: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00015815953875696935, 'time_algorithm_update': 0.002117402331773625, 'loss': 0.025584621395547548, 'time_step': 0.002348380726437236, 'init_value': -1.8956233263015747, 'ave_value': -1.9785392332184422, 'soft_opc': nan} step=1032




2022-04-20 20:42.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204223/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:42.26 [info     ] FQE_20220420204223: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00015824409418327865, 'time_algorithm_update': 0.0021145960619283278, 'loss': 0.028388706203766687, 'time_step': 0.00234640545623247, 'init_value': -2.116544723510742, 'ave_value': -2.1870494529890183, 'soft_opc': nan} step=1376




2022-04-20 20:42.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204223/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:42.27 [info     ] FQE_20220420204223: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00015323661094488098, 'time_algorithm_update': 0.002044661100520644, 'loss': 0.04076066324730988, 'time_step': 0.00227019468019175, 'init_value': -2.42887806892395, 'ave_value': -2.4841518529107676, 'soft_opc': nan} step=1720




2022-04-20 20:42.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204223/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:42.28 [info     ] FQE_20220420204223: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00015590911687806596, 'time_algorithm_update': 0.0020806532959605373, 'loss': 0.05215157751741191, 'time_step': 0.002312791208888209, 'init_value': -2.5653738975524902, 'ave_value': -2.5972047327392573, 'soft_opc': nan} step=2064




2022-04-20 20:42.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204223/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:42.29 [info     ] FQE_20220420204223: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.0001541112744530966, 'time_algorithm_update': 0.002116325289704079, 'loss': 0.06735640509747141, 'time_step': 0.0023418463939844174, 'init_value': -2.888324737548828, 'ave_value': -2.9047915010190746, 'soft_opc': nan} step=2408




2022-04-20 20:42.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204223/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:42.30 [info     ] FQE_20220420204223: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00015672417574150619, 'time_algorithm_update': 0.0021162740019864813, 'loss': 0.0858235280543869, 'time_step': 0.0023416918377543606, 'init_value': -2.949190616607666, 'ave_value': -2.935109585127164, 'soft_opc': nan} step=2752




2022-04-20 20:42.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204223/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:42.31 [info     ] FQE_20220420204223: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00015422701835632324, 'time_algorithm_update': 0.0020976122035536657, 'loss': 0.10627401313950242, 'time_step': 0.002326353344806405, 'init_value': -3.1155142784118652, 'ave_value': -3.0899585652941215, 'soft_opc': nan} step=3096




2022-04-20 20:42.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204223/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:42.32 [info     ] FQE_20220420204223: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.0001559728799864303, 'time_algorithm_update': 0.0020991543004679125, 'loss': 0.12897400096665287, 'time_step': 0.002328128315681635, 'init_value': -3.1943745613098145, 'ave_value': -3.23759577826236, 'soft_opc': nan} step=3440




2022-04-20 20:42.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204223/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:42.33 [info     ] FQE_20220420204223: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00015527009963989258, 'time_algorithm_update': 0.0021115368188813675, 'loss': 0.15692588797402243, 'time_step': 0.0023394178512484527, 'init_value': -3.121835470199585, 'ave_value': -3.2183709640225797, 'soft_opc': nan} step=3784




2022-04-20 20:42.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204223/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:42.33 [info     ] FQE_20220420204223: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00015430741531904355, 'time_algorithm_update': 0.0020643126132876373, 'loss': 0.1806055817612295, 'time_step': 0.0022925339465917544, 'init_value': -2.8904531002044678, 'ave_value': -3.1957976430316815, 'soft_opc': nan} step=4128




2022-04-20 20:42.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204223/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:42.34 [info     ] FQE_20220420204223: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.0001542096914247025, 'time_algorithm_update': 0.002159126276193663, 'loss': 0.2043956911386272, 'time_step': 0.0023841254932935848, 'init_value': -2.6477599143981934, 'ave_value': -3.059937927918928, 'soft_opc': nan} step=4472




2022-04-20 20:42.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204223/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:42.35 [info     ] FQE_20220420204223: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00015441345614056255, 'time_algorithm_update': 0.002085077901219213, 'loss': 0.23186323062386796, 'time_step': 0.002306660247403522, 'init_value': -2.4689903259277344, 'ave_value': -3.0777308264881507, 'soft_opc': nan} step=4816




2022-04-20 20:42.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204223/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:42.36 [info     ] FQE_20220420204223: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.0001548979171486788, 'time_algorithm_update': 0.0021090368891871253, 'loss': 0.2560543365679075, 'time_step': 0.0023353612700174023, 'init_value': -2.173102617263794, 'ave_value': -2.936214399787488, 'soft_opc': nan} step=5160




2022-04-20 20:42.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204223/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:42.37 [info     ] FQE_20220420204223: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00015457979468412177, 'time_algorithm_update': 0.0021180579828661544, 'loss': 0.2653579809865373, 'time_step': 0.0023453949495803477, 'init_value': -1.8046008348464966, 'ave_value': -2.72861118795002, 'soft_opc': nan} step=5504




2022-04-20 20:42.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204223/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:42.38 [info     ] FQE_20220420204223: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00015554178592770598, 'time_algorithm_update': 0.002136438391929449, 'loss': 0.2830552764800052, 'time_step': 0.0023614175097886906, 'init_value': -1.7457709312438965, 'ave_value': -2.82825937647255, 'soft_opc': nan} step=5848




2022-04-20 20:42.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204223/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:42.39 [info     ] FQE_20220420204223: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00015648021254428598, 'time_algorithm_update': 0.0020982976569685827, 'loss': 0.3019806605839539, 'time_step': 0.0023247717424880626, 'init_value': -1.4387328624725342, 'ave_value': -2.621686232992792, 'soft_opc': nan} step=6192




2022-04-20 20:42.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204223/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:42.40 [info     ] FQE_20220420204223: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.0001565585302752118, 'time_algorithm_update': 0.002086696929709856, 'loss': 0.31287721741056546, 'time_step': 0.002315166384674782, 'init_value': -1.181812047958374, 'ave_value': -2.4223851678104285, 'soft_opc': nan} step=6536




2022-04-20 20:42.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204223/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:42.40 [info     ] FQE_20220420204223: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00016012441280276277, 'time_algorithm_update': 0.00212468380151793, 'loss': 0.32594784284768585, 'time_step': 0.002359511547310408, 'init_value': -1.1404473781585693, 'ave_value': -2.3741766072474086, 'soft_opc': nan} step=6880




2022-04-20 20:42.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204223/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:42.41 [info     ] FQE_20220420204223: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.0001539456289868022, 'time_algorithm_update': 0.002148174962332082, 'loss': 0.3472575215287073, 'time_step': 0.0023766686750012773, 'init_value': -1.301030158996582, 'ave_value': -2.640034863131272, 'soft_opc': nan} step=7224




2022-04-20 20:42.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204223/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:42.42 [info     ] FQE_20220420204223: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00015508158262385877, 'time_algorithm_update': 0.0020887096260869225, 'loss': 0.36150122374339505, 'time_step': 0.002314864202987316, 'init_value': -1.0489997863769531, 'ave_value': -2.415362663070361, 'soft_opc': nan} step=7568




2022-04-20 20:42.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204223/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:42.43 [info     ] FQE_20220420204223: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00015834597654120866, 'time_algorithm_update': 0.0021243441936581635, 'loss': 0.379466766318263, 'time_step': 0.0023566027020299157, 'init_value': -0.5695892572402954, 'ave_value': -1.7630924596677762, 'soft_opc': nan} step=7912




2022-04-20 20:42.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204223/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:42.44 [info     ] FQE_20220420204223: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00015969817028489223, 'time_algorithm_update': 0.002072857562885728, 'loss': 0.3938326716888696, 'time_step': 0.0023058257823766666, 'init_value': -0.49701690673828125, 'ave_value': -1.5593859933518075, 'soft_opc': nan} step=8256




2022-04-20 20:42.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204223/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:42.45 [info     ] FQE_20220420204223: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00015662298646084097, 'time_algorithm_update': 0.002123656661011452, 'loss': 0.40397665574930086, 'time_step': 0.0023520789867223697, 'init_value': -0.25164276361465454, 'ave_value': -1.346993050356773, 'soft_opc': nan} step=8600




2022-04-20 20:42.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204223/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:42.46 [info     ] FQE_20220420204223: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00015599783076796422, 'time_algorithm_update': 0.00208447769630787, 'loss': 0.4187559853145463, 'time_step': 0.002313509236934573, 'init_value': -0.22726064920425415, 'ave_value': -1.2017262117073007, 'soft_opc': nan} step=8944




2022-04-20 20:42.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204223/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:42.47 [info     ] FQE_20220420204223: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00015537614046141158, 'time_algorithm_update': 0.0021257608435874763, 'loss': 0.43274255378475024, 'time_step': 0.002352251562961312, 'init_value': -0.14674288034439087, 'ave_value': -0.8940378316854303, 'soft_opc': nan} step=9288




2022-04-20 20:42.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204223/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:42.48 [info     ] FQE_20220420204223: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.0001590695492056913, 'time_algorithm_update': 0.002150994400645411, 'loss': 0.4508308764915307, 'time_step': 0.002385801354119944, 'init_value': 0.18509522080421448, 'ave_value': -0.35189308934920543, 'soft_opc': nan} step=9632




2022-04-20 20:42.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204223/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:42.48 [info     ] FQE_20220420204223: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00015993104424587515, 'time_algorithm_update': 0.0021961442259855047, 'loss': 0.4645145996582023, 'time_step': 0.002428664024486098, 'init_value': -0.027703970670700073, 'ave_value': -0.252319702210727, 'soft_opc': nan} step=9976




2022-04-20 20:42.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204223/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:42.49 [info     ] FQE_20220420204223: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00016161383584488269, 'time_algorithm_update': 0.0021616088789562847, 'loss': 0.4713821361155444, 'time_step': 0.0023944856122482656, 'init_value': -0.021726906299591064, 'ave_value': -0.08190962538995722, 'soft_opc': nan} step=10320




2022-04-20 20:42.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204223/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:42.50 [info     ] FQE_20220420204223: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00015500395797019782, 'time_algorithm_update': 0.002163674942282743, 'loss': 0.4766719155555028, 'time_step': 0.002389005450315254, 'init_value': -0.12475645542144775, 'ave_value': -0.01988402574012677, 'soft_opc': nan} step=10664




2022-04-20 20:42.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204223/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:42.51 [info     ] FQE_20220420204223: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00015569426292596863, 'time_algorithm_update': 0.0021006596642871235, 'loss': 0.47822479707687054, 'time_step': 0.002328015344087468, 'init_value': -0.29544875025749207, 'ave_value': -0.04528379167706983, 'soft_opc': nan} step=11008




2022-04-20 20:42.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204223/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:42.52 [info     ] FQE_20220420204223: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00015597080075463584, 'time_algorithm_update': 0.002112824556439422, 'loss': 0.4892410055171092, 'time_step': 0.0023417313431584558, 'init_value': -0.4418659210205078, 'ave_value': -0.08926811697515281, 'soft_opc': nan} step=11352




2022-04-20 20:42.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204223/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:42.53 [info     ] FQE_20220420204223: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00015667773956476257, 'time_algorithm_update': 0.002093105122100475, 'loss': 0.4975549965977755, 'time_step': 0.0023230674654938456, 'init_value': -0.25385212898254395, 'ave_value': 0.34752519202359894, 'soft_opc': nan} step=11696




2022-04-20 20:42.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204223/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:42.54 [info     ] FQE_20220420204223: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.0001549921756566957, 'time_algorithm_update': 0.0021029329577157666, 'loss': 0.5088300692827202, 'time_step': 0.002331882022147955, 'init_value': -0.776366651058197, 'ave_value': -0.09572574419749749, 'soft_opc': nan} step=12040




2022-04-20 20:42.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204223/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:42.55 [info     ] FQE_20220420204223: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.0001563519932502924, 'time_algorithm_update': 0.002111122358676999, 'loss': 0.5134503212324235, 'time_step': 0.0023399085499519524, 'init_value': -0.6522790193557739, 'ave_value': 0.06330096031967047, 'soft_opc': nan} step=12384




2022-04-20 20:42.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204223/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:42.55 [info     ] FQE_20220420204223: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00015304878700611203, 'time_algorithm_update': 0.002080459234326385, 'loss': 0.5255462796167406, 'time_step': 0.0023050024065860483, 'init_value': -0.7012829184532166, 'ave_value': 0.3157789701679805, 'soft_opc': nan} step=12728




2022-04-20 20:42.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204223/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:42.56 [info     ] FQE_20220420204223: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00015837924424992052, 'time_algorithm_update': 0.0021222587241682897, 'loss': 0.5351002785420522, 'time_step': 0.0023529037486675172, 'init_value': -0.7896387577056885, 'ave_value': 0.4028922621966214, 'soft_opc': nan} step=13072




2022-04-20 20:42.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204223/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:42.57 [info     ] FQE_20220420204223: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.0001554017843202103, 'time_algorithm_update': 0.0020847036394962045, 'loss': 0.5376903571697429, 'time_step': 0.002311461886694265, 'init_value': -1.1568830013275146, 'ave_value': 0.26797177976181913, 'soft_opc': nan} step=13416




2022-04-20 20:42.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204223/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:42.58 [info     ] FQE_20220420204223: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00015650308409402537, 'time_algorithm_update': 0.0021058210106783136, 'loss': 0.5453976805235238, 'time_step': 0.002335056316020877, 'init_value': -1.3203790187835693, 'ave_value': 0.1992503950389119, 'soft_opc': nan} step=13760




2022-04-20 20:42.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204223/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:42.59 [info     ] FQE_20220420204223: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00015626050705133484, 'time_algorithm_update': 0.0021119970221852146, 'loss': 0.5532561646514508, 'time_step': 0.0023413619329763014, 'init_value': -1.4683661460876465, 'ave_value': 0.2656040355026185, 'soft_opc': nan} step=14104




2022-04-20 20:42.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204223/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:43.00 [info     ] FQE_20220420204223: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00015766398851261584, 'time_algorithm_update': 0.002136804336725279, 'loss': 0.5387534518173874, 'time_step': 0.0023691896782364955, 'init_value': -1.7715561389923096, 'ave_value': 0.2761482816692945, 'soft_opc': nan} step=14448




2022-04-20 20:43.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204223/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:43.01 [info     ] FQE_20220420204223: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00015597426614095999, 'time_algorithm_update': 0.002086157715597818, 'loss': 0.5487008404560647, 'time_step': 0.002311964367711267, 'init_value': -1.9666733741760254, 'ave_value': 0.18180391648196959, 'soft_opc': nan} step=14792




2022-04-20 20:43.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204223/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:43.02 [info     ] FQE_20220420204223: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00015817755876585494, 'time_algorithm_update': 0.0021332453849703765, 'loss': 0.5501172619568574, 'time_step': 0.0023641842742298923, 'init_value': -2.4375059604644775, 'ave_value': 0.026574149703489498, 'soft_opc': nan} step=15136




2022-04-20 20:43.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204223/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:43.03 [info     ] FQE_20220420204223: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00015745121379231298, 'time_algorithm_update': 0.0021450616592584653, 'loss': 0.5424699047417921, 'time_step': 0.0023746254832245585, 'init_value': -3.055284023284912, 'ave_value': -0.45623009335039905, 'soft_opc': nan} step=15480




2022-04-20 20:43.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204223/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:43.03 [info     ] FQE_20220420204223: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.000157124774400578, 'time_algorithm_update': 0.0021198336468186488, 'loss': 0.5396251296624541, 'time_step': 0.002350168172703233, 'init_value': -3.2411866188049316, 'ave_value': -0.4024560906605476, 'soft_opc': nan} step=15824




2022-04-20 20:43.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204223/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:43.04 [info     ] FQE_20220420204223: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00015722249829491905, 'time_algorithm_update': 0.0021050309025964073, 'loss': 0.5305129690691396, 'time_step': 0.0023364937582681347, 'init_value': -3.7414684295654297, 'ave_value': -0.7055711440439966, 'soft_opc': nan} step=16168




2022-04-20 20:43.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204223/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:43.05 [info     ] FQE_20220420204223: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00015607684157615485, 'time_algorithm_update': 0.0021314225917638736, 'loss': 0.5295059053108183, 'time_step': 0.0023636492185814435, 'init_value': -4.064108848571777, 'ave_value': -0.8702171831559558, 'soft_opc': nan} step=16512




2022-04-20 20:43.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204223/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:43.06 [info     ] FQE_20220420204223: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.0001558730768602948, 'time_algorithm_update': 0.002057900955510694, 'loss': 0.5274605329614133, 'time_step': 0.00228675784066666, 'init_value': -4.425532817840576, 'ave_value': -1.0794479079192152, 'soft_opc': nan} step=16856




2022-04-20 20:43.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204223/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:43.07 [info     ] FQE_20220420204223: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00015810201334398846, 'time_algorithm_update': 0.002136937407560127, 'loss': 0.526995743706086, 'time_step': 0.002367960159168687, 'init_value': -5.060743808746338, 'ave_value': -1.447446051273948, 'soft_opc': nan} step=17200




2022-04-20 20:43.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204223/model_17200.pt
most optimal hyper params for cql at this point:  [0.0008288831520532805, 0.003934260713329634, 4.6195344911552404e-05, 7]
search iteration:  39
using hyper params:  [0.0043377297956846165, 0.004594351658022128, 9.794945016470112e-05, 1]
2022-04-20 20:43.07 [debug    ] RoundIterator is selected.
2022-04-20 20:43.07 [info     ] Directory is created at d3rlpy_logs/CQL_20220420204307
2022-04-20 20:43.07 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 20:43.07 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 20:43.07 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220420204307/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor

  minimum = torch.tensor(
  maximum = torch.tensor(


Epoch 1/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:43.14 [info     ] CQL_20220420204307: epoch=1 step=342 epoch=1 metrics={'time_sample_batch': 0.00031302825749269006, 'time_algorithm_update': 0.018134950197231003, 'temp_loss': 4.620554194812886, 'temp': 0.9836180067201804, 'alpha_loss': -13.10701606984724, 'alpha': 1.0159025502483747, 'critic_loss': 18.21795411974366, 'actor_loss': -1.2119604970126996, 'time_step': 0.018547589318794116, 'td_error': 4.008076277202475, 'init_value': -1.4636212587356567, 'ave_value': 0.2690121893275965} step=342
2022-04-20 20:43.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420204307/model_342.pt


Epoch 2/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:43.20 [info     ] CQL_20220420204307: epoch=2 step=684 epoch=2 metrics={'time_sample_batch': 0.0003045288442868238, 'time_algorithm_update': 0.01810313944231, 'temp_loss': 3.617967468953272, 'temp': 0.9542008432728505, 'alpha_loss': -5.65027450051224, 'alpha': 1.039713986435829, 'critic_loss': 25.31833301231875, 'actor_loss': 0.35853952944431217, 'time_step': 0.018508583481548824, 'td_error': 3.5100903232677885, 'init_value': -2.895637035369873, 'ave_value': -0.08872349047647403} step=684
2022-04-20 20:43.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420204307/model_684.pt


Epoch 3/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:43.27 [info     ] CQL_20220420204307: epoch=3 step=1026 epoch=3 metrics={'time_sample_batch': 0.0003037585152520074, 'time_algorithm_update': 0.017634958551641097, 'temp_loss': 2.733764966328939, 'temp': 0.9296721838719664, 'alpha_loss': -1.0483136407578941, 'alpha': 1.051378403490747, 'critic_loss': 44.68870285658809, 'actor_loss': 1.7699850348875537, 'time_step': 0.01803680121550086, 'td_error': 5.285053472313124, 'init_value': -5.183452129364014, 'ave_value': -0.2449631720633657} step=1026
2022-04-20 20:43.27 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420204307/model_1026.pt


Epoch 4/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:43.33 [info     ] CQL_20220420204307: epoch=4 step=1368 epoch=4 metrics={'time_sample_batch': 0.0003021697551883452, 'time_algorithm_update': 0.017667998347366064, 'temp_loss': 2.1658206498413755, 'temp': 0.9082957187242675, 'alpha_loss': 2.389972844994382, 'alpha': 1.047434909650457, 'critic_loss': 69.69530608640079, 'actor_loss': 3.141040563583374, 'time_step': 0.018067573943333318, 'td_error': 8.093184944726517, 'init_value': -7.730065822601318, 'ave_value': -1.4776925973527066} step=1368
2022-04-20 20:43.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420204307/model_1368.pt


Epoch 5/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:43.39 [info     ] CQL_20220420204307: epoch=5 step=1710 epoch=5 metrics={'time_sample_batch': 0.0002972870542292009, 'time_algorithm_update': 0.01777062360306232, 'temp_loss': 1.7086800685402943, 'temp': 0.8892689535492345, 'alpha_loss': 4.996556734481053, 'alpha': 1.0249461942946005, 'critic_loss': 98.57723280700327, 'actor_loss': 4.414181436014454, 'time_step': 0.018164613790679397, 'td_error': 9.340269656103235, 'init_value': -10.597719192504883, 'ave_value': -1.939700258736675} step=1710
2022-04-20 20:43.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420204307/model_1710.pt


Epoch 6/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:43.46 [info     ] CQL_20220420204307: epoch=6 step=2052 epoch=6 metrics={'time_sample_batch': 0.00030319174827887996, 'time_algorithm_update': 0.017711092156973498, 'temp_loss': 1.3415480142796945, 'temp': 0.8723874703833931, 'alpha_loss': 7.0119251270740355, 'alpha': 0.9888580408361223, 'critic_loss': 130.37722042150665, 'actor_loss': 5.847277971039041, 'time_step': 0.01811225023883128, 'td_error': 11.426912865291726, 'init_value': -11.971664428710938, 'ave_value': -1.7168770162801485} step=2052
2022-04-20 20:43.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420204307/model_2052.pt


Epoch 7/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:43.52 [info     ] CQL_20220420204307: epoch=7 step=2394 epoch=7 metrics={'time_sample_batch': 0.0003023886541176958, 'time_algorithm_update': 0.017609297880652356, 'temp_loss': 1.0106877631437012, 'temp': 0.857641750783251, 'alpha_loss': 8.617803888711316, 'alpha': 0.9474746346125129, 'critic_loss': 164.02812266210367, 'actor_loss': 7.316794699395609, 'time_step': 0.018006373567190783, 'td_error': 16.09796356825495, 'init_value': -14.658198356628418, 'ave_value': -2.764812350611429} step=2394
2022-04-20 20:43.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420204307/model_2394.pt


Epoch 8/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:43.59 [info     ] CQL_20220420204307: epoch=8 step=2736 epoch=8 metrics={'time_sample_batch': 0.00030491505449975443, 'time_algorithm_update': 0.017711474881534687, 'temp_loss': 0.7345830992474194, 'temp': 0.845188772992084, 'alpha_loss': 9.750861503924543, 'alpha': 0.9066489504443275, 'critic_loss': 200.90513735765603, 'actor_loss': 8.880045846191763, 'time_step': 0.018113718395344695, 'td_error': 21.306187740032048, 'init_value': -19.24074935913086, 'ave_value': -5.441953399723714} step=2736
2022-04-20 20:43.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420204307/model_2736.pt


Epoch 9/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:44.05 [info     ] CQL_20220420204307: epoch=9 step=3078 epoch=9 metrics={'time_sample_batch': 0.00030210631632665444, 'time_algorithm_update': 0.01768725099619369, 'temp_loss': 0.4999594786885189, 'temp': 0.8348995506414894, 'alpha_loss': 10.842075975317703, 'alpha': 0.867907233579814, 'critic_loss': 242.32329171163994, 'actor_loss': 10.747495461625663, 'time_step': 0.01808538562373111, 'td_error': 18.744851445566024, 'init_value': -23.11768913269043, 'ave_value': -6.3605175308171695} step=3078
2022-04-20 20:44.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420204307/model_3078.pt


Epoch 10/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:44.11 [info     ] CQL_20220420204307: epoch=10 step=3420 epoch=10 metrics={'time_sample_batch': 0.00029557141644215724, 'time_algorithm_update': 0.017567883458053858, 'temp_loss': 0.2823683635048481, 'temp': 0.8275170181578363, 'alpha_loss': 11.634777657469812, 'alpha': 0.8317789790923136, 'critic_loss': 285.25994471499797, 'actor_loss': 12.668350950319168, 'time_step': 0.017959082335756535, 'td_error': 33.90925121460004, 'init_value': -27.38739585876465, 'ave_value': -8.127418653900559} step=3420
2022-04-20 20:44.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420204307/model_3420.pt


Epoch 11/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:44.18 [info     ] CQL_20220420204307: epoch=11 step=3762 epoch=11 metrics={'time_sample_batch': 0.000306212414077848, 'time_algorithm_update': 0.017928159027768856, 'temp_loss': 0.14830194528579538, 'temp': 0.8231600789647353, 'alpha_loss': 12.361474829110486, 'alpha': 0.7978989699779199, 'critic_loss': 334.95526756598935, 'actor_loss': 14.962209258163185, 'time_step': 0.018332875262924105, 'td_error': 38.299016408995655, 'init_value': -32.899681091308594, 'ave_value': -11.493016170988211} step=3762
2022-04-20 20:44.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420204307/model_3762.pt


Epoch 12/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:44.24 [info     ] CQL_20220420204307: epoch=12 step=4104 epoch=12 metrics={'time_sample_batch': 0.000302297330042075, 'time_algorithm_update': 0.01785783530675877, 'temp_loss': -0.003042533650602165, 'temp': 0.8211876499025446, 'alpha_loss': 12.754561909458094, 'alpha': 0.7667663005011821, 'critic_loss': 387.77006093521567, 'actor_loss': 17.351393273002223, 'time_step': 0.018260066969352857, 'td_error': 53.31393363392533, 'init_value': -34.73249053955078, 'ave_value': -11.827546357453407} step=4104
2022-04-20 20:44.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420204307/model_4104.pt


Epoch 13/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:44.31 [info     ] CQL_20220420204307: epoch=13 step=4446 epoch=13 metrics={'time_sample_batch': 0.00030010415796647994, 'time_algorithm_update': 0.01788270822045399, 'temp_loss': -0.12540621845302177, 'temp': 0.8230264988559032, 'alpha_loss': 13.141981936337656, 'alpha': 0.7375763717450594, 'critic_loss': 444.27543782909015, 'actor_loss': 19.87230438377425, 'time_step': 0.018276995385599414, 'td_error': 48.40588150530521, 'init_value': -40.440826416015625, 'ave_value': -14.686627881978009} step=4446
2022-04-20 20:44.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420204307/model_4446.pt


Epoch 14/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:44.37 [info     ] CQL_20220420204307: epoch=14 step=4788 epoch=14 metrics={'time_sample_batch': 0.00030591543654949347, 'time_algorithm_update': 0.017768936547619556, 'temp_loss': -0.17929060971200989, 'temp': 0.8289646515023639, 'alpha_loss': 12.929365902616267, 'alpha': 0.710441181534215, 'critic_loss': 501.44392546715096, 'actor_loss': 22.470306530333403, 'time_step': 0.018173523116530033, 'td_error': 56.921179048735084, 'init_value': -44.507877349853516, 'ave_value': -15.09230070461561} step=4788
2022-04-20 20:44.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420204307/model_4788.pt


Epoch 15/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:44.44 [info     ] CQL_20220420204307: epoch=15 step=5130 epoch=15 metrics={'time_sample_batch': 0.00030317432002017374, 'time_algorithm_update': 0.01767204797755905, 'temp_loss': -0.21627731319538682, 'temp': 0.8372855364230641, 'alpha_loss': 12.58262876878705, 'alpha': 0.6856210475776627, 'critic_loss': 556.7623503389415, 'actor_loss': 24.787013843045596, 'time_step': 0.01807205927999396, 'td_error': 52.685373970265246, 'init_value': -52.02540969848633, 'ave_value': -18.914011267794145} step=5130
2022-04-20 20:44.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420204307/model_5130.pt


Epoch 16/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:44.50 [info     ] CQL_20220420204307: epoch=16 step=5472 epoch=16 metrics={'time_sample_batch': 0.00030015713987294694, 'time_algorithm_update': 0.017968199406450953, 'temp_loss': -0.2696309792073934, 'temp': 0.8483619137465606, 'alpha_loss': 12.621534760235345, 'alpha': 0.6623855088537897, 'critic_loss': 613.525596306338, 'actor_loss': 27.518742299219323, 'time_step': 0.018366296388949568, 'td_error': 71.91101554915481, 'init_value': -59.79253387451172, 'ave_value': -21.3846364737094} step=5472
2022-04-20 20:44.50 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420204307/model_5472.pt


Epoch 17/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:44.57 [info     ] CQL_20220420204307: epoch=17 step=5814 epoch=17 metrics={'time_sample_batch': 0.0003087248718529417, 'time_algorithm_update': 0.018467207401119477, 'temp_loss': -0.2955363773528421, 'temp': 0.8639313102814189, 'alpha_loss': 12.452646489728961, 'alpha': 0.6399730677493134, 'critic_loss': 674.6127139085914, 'actor_loss': 30.33081295058044, 'time_step': 0.018875375128628916, 'td_error': 87.57540270545067, 'init_value': -67.19523620605469, 'ave_value': -24.569062272176012} step=5814
2022-04-20 20:44.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420204307/model_5814.pt


Epoch 18/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:45.04 [info     ] CQL_20220420204307: epoch=18 step=6156 epoch=18 metrics={'time_sample_batch': 0.0003098395832797937, 'time_algorithm_update': 0.019380437700371993, 'temp_loss': -0.296324126122857, 'temp': 0.8807932973256585, 'alpha_loss': 12.393147111636155, 'alpha': 0.618630277831652, 'critic_loss': 740.8276888306378, 'actor_loss': 33.429028455276935, 'time_step': 0.019793116558364958, 'td_error': 67.92368995148493, 'init_value': -72.62901306152344, 'ave_value': -26.94841120482028} step=6156
2022-04-20 20:45.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420204307/model_6156.pt


Epoch 19/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:45.11 [info     ] CQL_20220420204307: epoch=19 step=6498 epoch=19 metrics={'time_sample_batch': 0.00031390733886183355, 'time_algorithm_update': 0.019659429962872066, 'temp_loss': -0.2952594377797598, 'temp': 0.8997488224018387, 'alpha_loss': 11.981718244608382, 'alpha': 0.5982905879006748, 'critic_loss': 806.5110379046167, 'actor_loss': 36.40324904904728, 'time_step': 0.020074596181947586, 'td_error': 106.0525142994013, 'init_value': -78.61607360839844, 'ave_value': -30.00371236959556} step=6498
2022-04-20 20:45.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420204307/model_6498.pt


Epoch 20/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:45.18 [info     ] CQL_20220420204307: epoch=20 step=6840 epoch=20 metrics={'time_sample_batch': 0.00030421931841220074, 'time_algorithm_update': 0.018347938158358747, 'temp_loss': -0.29720654752519393, 'temp': 0.9185425944147054, 'alpha_loss': 11.61214062066106, 'alpha': 0.5792479839241296, 'critic_loss': 868.378590544762, 'actor_loss': 39.20343776613648, 'time_step': 0.018751561293127942, 'td_error': 75.2638415185733, 'init_value': -87.784912109375, 'ave_value': -33.03389839228209} step=6840
2022-04-20 20:45.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420204307/model_6840.pt


Epoch 21/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:45.25 [info     ] CQL_20220420204307: epoch=21 step=7182 epoch=21 metrics={'time_sample_batch': 0.00030341622425101656, 'time_algorithm_update': 0.018274478047911883, 'temp_loss': -0.28439228580883374, 'temp': 0.9392959442403581, 'alpha_loss': 11.68431556015684, 'alpha': 0.5605703994893191, 'critic_loss': 933.9157247264483, 'actor_loss': 42.218680526778016, 'time_step': 0.018676665094163682, 'td_error': 131.06256538678943, 'init_value': -93.78254699707031, 'ave_value': -35.53041627883911} step=7182
2022-04-20 20:45.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420204307/model_7182.pt


Epoch 22/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:45.31 [info     ] CQL_20220420204307: epoch=22 step=7524 epoch=22 metrics={'time_sample_batch': 0.00029980927182917007, 'time_algorithm_update': 0.018164935864900287, 'temp_loss': -0.2955016753097114, 'temp': 0.9599845057342484, 'alpha_loss': 11.550958222115947, 'alpha': 0.5425109206235896, 'critic_loss': 996.4924869648894, 'actor_loss': 45.12735366821289, 'time_step': 0.01855926973777905, 'td_error': 101.30518873083011, 'init_value': -98.73106384277344, 'ave_value': -36.59127196408607} step=7524
2022-04-20 20:45.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420204307/model_7524.pt


Epoch 23/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:45.38 [info     ] CQL_20220420204307: epoch=23 step=7866 epoch=23 metrics={'time_sample_batch': 0.00030007069570976393, 'time_algorithm_update': 0.01778839694129096, 'temp_loss': -0.2954839435977894, 'temp': 0.9826486326797664, 'alpha_loss': 11.650690227921245, 'alpha': 0.5247907288241804, 'critic_loss': 1063.046999747293, 'actor_loss': 48.282669346234954, 'time_step': 0.01818553955234282, 'td_error': 104.54597604385243, 'init_value': -107.49497985839844, 'ave_value': -40.57628317139707} step=7866
2022-04-20 20:45.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420204307/model_7866.pt


Epoch 24/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:45.44 [info     ] CQL_20220420204307: epoch=24 step=8208 epoch=24 metrics={'time_sample_batch': 0.0002970228418272141, 'time_algorithm_update': 0.017726675111647935, 'temp_loss': -0.2641766883258583, 'temp': 1.0043608256947925, 'alpha_loss': 11.819979395782738, 'alpha': 0.5073476073860425, 'critic_loss': 1134.047405756008, 'actor_loss': 51.40257394801804, 'time_step': 0.018119869176407306, 'td_error': 154.52198686236534, 'init_value': -118.65301513671875, 'ave_value': -44.675475718217925} step=8208
2022-04-20 20:45.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420204307/model_8208.pt


Epoch 25/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:45.51 [info     ] CQL_20220420204307: epoch=25 step=8550 epoch=25 metrics={'time_sample_batch': 0.0003002707721197117, 'time_algorithm_update': 0.018391395870007966, 'temp_loss': -0.22246353305353408, 'temp': 1.0239854601391576, 'alpha_loss': 12.261963676988033, 'alpha': 0.49017700282802357, 'critic_loss': 1205.2614249960025, 'actor_loss': 54.80188274941249, 'time_step': 0.0187872752808688, 'td_error': 175.6489748684901, 'init_value': -122.7525863647461, 'ave_value': -46.89187246861221} step=8550
2022-04-20 20:45.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420204307/model_8550.pt


Epoch 26/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:45.57 [info     ] CQL_20220420204307: epoch=26 step=8892 epoch=26 metrics={'time_sample_batch': 0.00029976256409583734, 'time_algorithm_update': 0.01820267501630281, 'temp_loss': -0.21189495342734613, 'temp': 1.039825906181893, 'alpha_loss': 12.040768456040768, 'alpha': 0.4735536009770388, 'critic_loss': 1284.6503918742576, 'actor_loss': 58.33747569301672, 'time_step': 0.018596718185826352, 'td_error': 109.09548358316097, 'init_value': -129.54434204101562, 'ave_value': -50.48177635837246} step=8892
2022-04-20 20:45.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420204307/model_8892.pt


Epoch 27/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:46.04 [info     ] CQL_20220420204307: epoch=27 step=9234 epoch=27 metrics={'time_sample_batch': 0.0003026472894768966, 'time_algorithm_update': 0.017895179882384184, 'temp_loss': -0.18141674150151815, 'temp': 1.0566185158595705, 'alpha_loss': 11.460587278444168, 'alpha': 0.4584634910550034, 'critic_loss': 1361.431749488875, 'actor_loss': 61.4283569402862, 'time_step': 0.018297389236807127, 'td_error': 217.64301217096124, 'init_value': -140.5879364013672, 'ave_value': -53.893312944281206} step=9234
2022-04-20 20:46.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420204307/model_9234.pt


Epoch 28/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:46.10 [info     ] CQL_20220420204307: epoch=28 step=9576 epoch=28 metrics={'time_sample_batch': 0.00030172847167790287, 'time_algorithm_update': 0.01791094966799195, 'temp_loss': -0.10047265417242085, 'temp': 1.0708845979289006, 'alpha_loss': 10.562460133903905, 'alpha': 0.4438291065699873, 'critic_loss': 1437.8458137735288, 'actor_loss': 64.60704242416293, 'time_step': 0.018310747648540297, 'td_error': 219.02407211581874, 'init_value': -142.63406372070312, 'ave_value': -54.716244250106385} step=9576
2022-04-20 20:46.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420204307/model_9576.pt


Epoch 29/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:46.17 [info     ] CQL_20220420204307: epoch=29 step=9918 epoch=29 metrics={'time_sample_batch': 0.00030315619463111923, 'time_algorithm_update': 0.018069702979416874, 'temp_loss': -0.07763705943977972, 'temp': 1.0794779679231477, 'alpha_loss': 10.630839646211145, 'alpha': 0.43018656820930234, 'critic_loss': 1510.345323707625, 'actor_loss': 67.7293980247096, 'time_step': 0.018472623406795032, 'td_error': 285.17646548115164, 'init_value': -152.14071655273438, 'ave_value': -58.72705369137429} step=9918
2022-04-20 20:46.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420204307/model_9918.pt


Epoch 30/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:46.23 [info     ] CQL_20220420204307: epoch=30 step=10260 epoch=30 metrics={'time_sample_batch': 0.0003030446537753992, 'time_algorithm_update': 0.01807884932958592, 'temp_loss': -0.0753543428771677, 'temp': 1.0864217961043643, 'alpha_loss': 10.412504175253082, 'alpha': 0.41684138147454514, 'critic_loss': 1585.7529650236431, 'actor_loss': 71.03231161240248, 'time_step': 0.018482376260367053, 'td_error': 277.1183539022946, 'init_value': -161.3791961669922, 'ave_value': -60.92711021158609} step=10260
2022-04-20 20:46.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420204307/model_10260.pt


Epoch 31/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:46.30 [info     ] CQL_20220420204307: epoch=31 step=10602 epoch=31 metrics={'time_sample_batch': 0.00030098951350875764, 'time_algorithm_update': 0.01782808178349545, 'temp_loss': -0.025566143305068127, 'temp': 1.0909582599561813, 'alpha_loss': 10.04837809529221, 'alpha': 0.40388323401498516, 'critic_loss': 1659.6849640071043, 'actor_loss': 74.22604566429094, 'time_step': 0.018227860941524395, 'td_error': 289.5439671101725, 'init_value': -169.586181640625, 'ave_value': -64.6272249679415} step=10602
2022-04-20 20:46.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420204307/model_10602.pt


Epoch 32/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:46.36 [info     ] CQL_20220420204307: epoch=32 step=10944 epoch=32 metrics={'time_sample_batch': 0.00029671052743119805, 'time_algorithm_update': 0.01770249444838853, 'temp_loss': -0.0002455447217700077, 'temp': 1.093371043428343, 'alpha_loss': 9.66426510281033, 'alpha': 0.3916243199716535, 'critic_loss': 1731.6519793237162, 'actor_loss': 77.22793710719772, 'time_step': 0.01809663312476978, 'td_error': 305.721966016097, 'init_value': -175.44740295410156, 'ave_value': -67.86321435307597} step=10944
2022-04-20 20:46.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420204307/model_10944.pt


Epoch 33/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:46.43 [info     ] CQL_20220420204307: epoch=33 step=11286 epoch=33 metrics={'time_sample_batch': 0.0003031771085415667, 'time_algorithm_update': 0.017969999397010133, 'temp_loss': -0.014200498518800874, 'temp': 1.0952384161670305, 'alpha_loss': 10.105863494482653, 'alpha': 0.3792986207538181, 'critic_loss': 1808.5180389225832, 'actor_loss': 80.62512949893349, 'time_step': 0.01837170960610373, 'td_error': 283.0998996520508, 'init_value': -182.56588745117188, 'ave_value': -70.90526967668319} step=11286
2022-04-20 20:46.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420204307/model_11286.pt


Epoch 34/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:46.49 [info     ] CQL_20220420204307: epoch=34 step=11628 epoch=34 metrics={'time_sample_batch': 0.00029846659877844024, 'time_algorithm_update': 0.01778345010433978, 'temp_loss': -0.023300581736958516, 'temp': 1.096231708401128, 'alpha_loss': 9.585820001468324, 'alpha': 0.3675704261190013, 'critic_loss': 1878.4722422103437, 'actor_loss': 83.3922029461777, 'time_step': 0.018178792027702107, 'td_error': 356.3275730625838, 'init_value': -186.34188842773438, 'ave_value': -72.7461137579368} step=11628
2022-04-20 20:46.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420204307/model_11628.pt


Epoch 35/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:46.56 [info     ] CQL_20220420204307: epoch=35 step=11970 epoch=35 metrics={'time_sample_batch': 0.0002986868919684873, 'time_algorithm_update': 0.017787102370234262, 'temp_loss': -0.03673124958200064, 'temp': 1.0985680686102972, 'alpha_loss': 10.32383578423171, 'alpha': 0.3555889605430135, 'critic_loss': 1947.512295550073, 'actor_loss': 86.58614710757607, 'time_step': 0.018184507102297062, 'td_error': 259.4853657190459, 'init_value': -198.2908477783203, 'ave_value': -77.34517750955916} step=11970
2022-04-20 20:46.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420204307/model_11970.pt


Epoch 36/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:47.03 [info     ] CQL_20220420204307: epoch=36 step=12312 epoch=36 metrics={'time_sample_batch': 0.000303381367733604, 'time_algorithm_update': 0.0186515586418018, 'temp_loss': -0.05433903578995613, 'temp': 1.1033580041768258, 'alpha_loss': 8.753878378728677, 'alpha': 0.34445056702658444, 'critic_loss': 2017.4165563750685, 'actor_loss': 89.39683059781615, 'time_step': 0.019052300536841676, 'td_error': 263.4876358244429, 'init_value': -202.02230834960938, 'ave_value': -79.30464787481067} step=12312
2022-04-20 20:47.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420204307/model_12312.pt


Epoch 37/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:47.09 [info     ] CQL_20220420204307: epoch=37 step=12654 epoch=37 metrics={'time_sample_batch': 0.0003022046117057577, 'time_algorithm_update': 0.017863155805576614, 'temp_loss': 0.030575278743404394, 'temp': 1.1054920146339817, 'alpha_loss': 8.316786080075984, 'alpha': 0.33453014998408087, 'critic_loss': 2085.0769228572735, 'actor_loss': 92.33255459969504, 'time_step': 0.01825911329503645, 'td_error': 241.51700021935773, 'init_value': -212.78494262695312, 'ave_value': -82.43009868722778} step=12654
2022-04-20 20:47.09 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420204307/model_12654.pt


Epoch 38/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:47.16 [info     ] CQL_20220420204307: epoch=38 step=12996 epoch=38 metrics={'time_sample_batch': 0.0003015625546550193, 'time_algorithm_update': 0.017985839592783076, 'temp_loss': -0.06777451171577849, 'temp': 1.1052893184081853, 'alpha_loss': 8.402073380542777, 'alpha': 0.32470111662184287, 'critic_loss': 2145.181295472976, 'actor_loss': 94.82251346878141, 'time_step': 0.01838288390845583, 'td_error': 354.3947999411011, 'init_value': -215.3174591064453, 'ave_value': -84.5754708973137} step=12996
2022-04-20 20:47.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420204307/model_12996.pt


Epoch 39/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:47.22 [info     ] CQL_20220420204307: epoch=39 step=13338 epoch=39 metrics={'time_sample_batch': 0.00030243187619928726, 'time_algorithm_update': 0.01846647680851451, 'temp_loss': -0.028179575444052096, 'temp': 1.1109222574540747, 'alpha_loss': 9.146469311407435, 'alpha': 0.31421878620197896, 'critic_loss': 2210.588518109238, 'actor_loss': 97.79131020439996, 'time_step': 0.018867164327387224, 'td_error': 314.87943874488036, 'init_value': -223.96560668945312, 'ave_value': -87.4202536753169} step=13338
2022-04-20 20:47.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420204307/model_13338.pt


Epoch 40/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:47.29 [info     ] CQL_20220420204307: epoch=40 step=13680 epoch=40 metrics={'time_sample_batch': 0.00030340925294753405, 'time_algorithm_update': 0.01843248891551592, 'temp_loss': -0.08176629966375423, 'temp': 1.116113034953848, 'alpha_loss': 8.553781056264688, 'alpha': 0.3044142296091158, 'critic_loss': 2277.2336796989216, 'actor_loss': 100.66225843819959, 'time_step': 0.018834277203208523, 'td_error': 462.88791802358554, 'init_value': -232.0176239013672, 'ave_value': -91.46425728359738} step=13680
2022-04-20 20:47.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420204307/model_13680.pt


Epoch 41/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:47.36 [info     ] CQL_20220420204307: epoch=41 step=14022 epoch=41 metrics={'time_sample_batch': 0.0003083100792957328, 'time_algorithm_update': 0.018241604169209797, 'temp_loss': -0.03743365718520176, 'temp': 1.1237747477509126, 'alpha_loss': 8.767483463064272, 'alpha': 0.2945072914947543, 'critic_loss': 2345.6812419333655, 'actor_loss': 103.60610451056944, 'time_step': 0.018654488680655498, 'td_error': 521.4396024246345, 'init_value': -242.03128051757812, 'ave_value': -95.01455639242052} step=14022
2022-04-20 20:47.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420204307/model_14022.pt


Epoch 42/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:47.42 [info     ] CQL_20220420204307: epoch=42 step=14364 epoch=42 metrics={'time_sample_batch': 0.00030462923105697184, 'time_algorithm_update': 0.018324483904922216, 'temp_loss': 0.01206714289578778, 'temp': 1.125488169012014, 'alpha_loss': 9.098018554219028, 'alpha': 0.2847819024184991, 'critic_loss': 2415.274844877901, 'actor_loss': 106.8349383616308, 'time_step': 0.018725913867615816, 'td_error': 521.7754645696953, 'init_value': -243.2427215576172, 'ave_value': -95.22171155673963} step=14364
2022-04-20 20:47.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420204307/model_14364.pt


Epoch 43/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:47.49 [info     ] CQL_20220420204307: epoch=43 step=14706 epoch=43 metrics={'time_sample_batch': 0.000304236746670907, 'time_algorithm_update': 0.01808229036498488, 'temp_loss': 0.003037635836074924, 'temp': 1.1227633914752313, 'alpha_loss': 8.74002854865894, 'alpha': 0.27541324391699673, 'critic_loss': 2494.066317374246, 'actor_loss': 110.11295619763825, 'time_step': 0.01848662457270929, 'td_error': 517.7216557409573, 'init_value': -255.91677856445312, 'ave_value': -100.57069572740846} step=14706
2022-04-20 20:47.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420204307/model_14706.pt


Epoch 44/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:47.55 [info     ] CQL_20220420204307: epoch=44 step=15048 epoch=44 metrics={'time_sample_batch': 0.0003025559654012758, 'time_algorithm_update': 0.01829686569191559, 'temp_loss': -0.03164447818430718, 'temp': 1.125490256917407, 'alpha_loss': 9.104450266263639, 'alpha': 0.26631734428698556, 'critic_loss': 2568.87574919762, 'actor_loss': 113.45326719228287, 'time_step': 0.018698925860444006, 'td_error': 508.8212589229357, 'init_value': -261.02337646484375, 'ave_value': -102.02470501887906} step=15048
2022-04-20 20:47.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420204307/model_15048.pt


Epoch 45/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:48.02 [info     ] CQL_20220420204307: epoch=45 step=15390 epoch=45 metrics={'time_sample_batch': 0.00030177169375949435, 'time_algorithm_update': 0.018143962698373182, 'temp_loss': 0.011913746862252902, 'temp': 1.1258513958830583, 'alpha_loss': 8.372472463295473, 'alpha': 0.2575096344215828, 'critic_loss': 2648.708382946706, 'actor_loss': 116.82462692260742, 'time_step': 0.01854597406777722, 'td_error': 520.9199841367914, 'init_value': -266.10601806640625, 'ave_value': -103.72507648258596} step=15390
2022-04-20 20:48.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420204307/model_15390.pt


Epoch 46/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:48.09 [info     ] CQL_20220420204307: epoch=46 step=15732 epoch=46 metrics={'time_sample_batch': 0.0003005119792202063, 'time_algorithm_update': 0.01818371516222145, 'temp_loss': 0.03132604477567631, 'temp': 1.1233915756320396, 'alpha_loss': 6.911024583013434, 'alpha': 0.25014302430794255, 'critic_loss': 2717.287170767087, 'actor_loss': 119.52826367205347, 'time_step': 0.01858278743007727, 'td_error': 405.2969784141221, 'init_value': -271.83990478515625, 'ave_value': -108.03602106003073} step=15732
2022-04-20 20:48.09 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420204307/model_15732.pt


Epoch 47/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:48.16 [info     ] CQL_20220420204307: epoch=47 step=16074 epoch=47 metrics={'time_sample_batch': 0.00031607332285384684, 'time_algorithm_update': 0.020865666238885177, 'temp_loss': 0.013792029652766317, 'temp': 1.1214517849927756, 'alpha_loss': 7.510198390274717, 'alpha': 0.243217521025772, 'critic_loss': 2780.89678419682, 'actor_loss': 122.22401834231371, 'time_step': 0.021290406846163564, 'td_error': 506.7732232060448, 'init_value': -285.3987731933594, 'ave_value': -112.64392141128445} step=16074
2022-04-20 20:48.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420204307/model_16074.pt


Epoch 48/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:48.24 [info     ] CQL_20220420204307: epoch=48 step=16416 epoch=48 metrics={'time_sample_batch': 0.0003298339787979572, 'time_algorithm_update': 0.021731015534428823, 'temp_loss': 0.1751979821748291, 'temp': 1.1121259314274927, 'alpha_loss': 7.56456510504784, 'alpha': 0.23557430680035152, 'critic_loss': 2878.4202131304824, 'actor_loss': 126.18082211589255, 'time_step': 0.022172703380473176, 'td_error': 728.6052649161016, 'init_value': -289.5535583496094, 'ave_value': -115.2508102534698} step=16416
2022-04-20 20:48.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420204307/model_16416.pt


Epoch 49/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:48.31 [info     ] CQL_20220420204307: epoch=49 step=16758 epoch=49 metrics={'time_sample_batch': 0.00033139206512629635, 'time_algorithm_update': 0.020534866037424545, 'temp_loss': 0.10303773810509702, 'temp': 1.0968167987483286, 'alpha_loss': 7.21294179157904, 'alpha': 0.22839905800875168, 'critic_loss': 2965.3794459292762, 'actor_loss': 129.64298783687124, 'time_step': 0.020976642419023122, 'td_error': 550.4662908019177, 'init_value': -297.96551513671875, 'ave_value': -120.61910918404391} step=16758
2022-04-20 20:48.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420204307/model_16758.pt


Epoch 50/50:   0%|          | 0/342 [00:00<?, ?it/s]

2022-04-20 20:48.39 [info     ] CQL_20220420204307: epoch=50 step=17100 epoch=50 metrics={'time_sample_batch': 0.00032591122632835343, 'time_algorithm_update': 0.020304343156647264, 'temp_loss': 0.06076712808326671, 'temp': 1.0901636578883345, 'alpha_loss': 6.924559264155159, 'alpha': 0.22141623392439724, 'critic_loss': 3028.920592904788, 'actor_loss': 132.32055599368803, 'time_step': 0.020735987445764374, 'td_error': 544.8595407397763, 'init_value': -298.84112548828125, 'ave_value': -120.12089599593266} step=17100
2022-04-20 20:48.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420204307/model_17100.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 20:48.39 [info     ] FQE_20220420204839: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00015092901436679335, 'time_algorithm_update': 0.002319040068660874, 'loss': 0.006978024377947651, 'time_step': 0.0025438213922891273, 'init_value': -0.47370588779449463, 'ave_value': -0.43142399990612323, 'soft_opc': nan} step=166




2022-04-20 20:48.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204839/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:48.40 [info     ] FQE_20220420204839: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.000157372061028538, 'time_algorithm_update': 0.0023714433233421968, 'loss': 0.005987481239352779, 'time_step': 0.002605360674570842, 'init_value': -0.5801640748977661, 'ave_value': -0.49386428806695853, 'soft_opc': nan} step=332




2022-04-20 20:48.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204839/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:48.40 [info     ] FQE_20220420204839: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.0001522575516298593, 'time_algorithm_update': 0.002296194972762142, 'loss': 0.005608914676683405, 'time_step': 0.0025177088128515036, 'init_value': -0.6355797052383423, 'ave_value': -0.5209320972094664, 'soft_opc': nan} step=498




2022-04-20 20:48.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204839/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:48.41 [info     ] FQE_20220420204839: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00015265683093702937, 'time_algorithm_update': 0.0021839443459568254, 'loss': 0.005474952920564416, 'time_step': 0.0024091565465352623, 'init_value': -0.6931172609329224, 'ave_value': -0.556927851446577, 'soft_opc': nan} step=664




2022-04-20 20:48.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204839/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:48.41 [info     ] FQE_20220420204839: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.0001546532274728798, 'time_algorithm_update': 0.002280238163040345, 'loss': 0.005229016164239182, 'time_step': 0.002507396491177111, 'init_value': -0.7503979206085205, 'ave_value': -0.5998060235904681, 'soft_opc': nan} step=830




2022-04-20 20:48.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204839/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:48.42 [info     ] FQE_20220420204839: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00015431139842573418, 'time_algorithm_update': 0.002250183059508542, 'loss': 0.005046369188117334, 'time_step': 0.0024717888200139425, 'init_value': -0.7751899361610413, 'ave_value': -0.6059625867854904, 'soft_opc': nan} step=996




2022-04-20 20:48.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204839/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:48.42 [info     ] FQE_20220420204839: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00015457566962184677, 'time_algorithm_update': 0.002269667315195842, 'loss': 0.004807320141502802, 'time_step': 0.00249474163515022, 'init_value': -0.8064567446708679, 'ave_value': -0.6331498469318355, 'soft_opc': nan} step=1162




2022-04-20 20:48.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204839/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:48.43 [info     ] FQE_20220420204839: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00014855057360178017, 'time_algorithm_update': 0.002077352569763919, 'loss': 0.004519407856222017, 'time_step': 0.0023042480629610726, 'init_value': -0.883764386177063, 'ave_value': -0.6834709267761256, 'soft_opc': nan} step=1328




2022-04-20 20:48.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204839/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:48.43 [info     ] FQE_20220420204839: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00014944679765816195, 'time_algorithm_update': 0.0022066802863615104, 'loss': 0.004408674955603678, 'time_step': 0.002431167177407138, 'init_value': -0.8869082927703857, 'ave_value': -0.6841575974883797, 'soft_opc': nan} step=1494




2022-04-20 20:48.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204839/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:48.44 [info     ] FQE_20220420204839: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00014779797519545956, 'time_algorithm_update': 0.0021020188389054262, 'loss': 0.004291998397794564, 'time_step': 0.0023196045174656145, 'init_value': -0.9411126375198364, 'ave_value': -0.7356394714264719, 'soft_opc': nan} step=1660




2022-04-20 20:48.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204839/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:48.44 [info     ] FQE_20220420204839: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.0001517864594976586, 'time_algorithm_update': 0.0022035765360636883, 'loss': 0.00434010886132762, 'time_step': 0.002429798424962055, 'init_value': -0.9921475052833557, 'ave_value': -0.777451146212784, 'soft_opc': nan} step=1826




2022-04-20 20:48.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204839/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:48.44 [info     ] FQE_20220420204839: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00015052829880312266, 'time_algorithm_update': 0.002171013728681817, 'loss': 0.004169312847851136, 'time_step': 0.0023886784013495387, 'init_value': -1.016908884048462, 'ave_value': -0.7988781330284772, 'soft_opc': nan} step=1992




2022-04-20 20:48.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204839/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:48.45 [info     ] FQE_20220420204839: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00015063889055366977, 'time_algorithm_update': 0.002168957009372941, 'loss': 0.004460581819560513, 'time_step': 0.002392439957124641, 'init_value': -1.1164159774780273, 'ave_value': -0.8917876310847901, 'soft_opc': nan} step=2158




2022-04-20 20:48.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204839/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:48.45 [info     ] FQE_20220420204839: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00015575196369584785, 'time_algorithm_update': 0.002247085054236722, 'loss': 0.0045390951668798744, 'time_step': 0.0024743712092020424, 'init_value': -1.1882801055908203, 'ave_value': -0.9496940254359632, 'soft_opc': nan} step=2324




2022-04-20 20:48.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204839/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:48.46 [info     ] FQE_20220420204839: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00014851035841976303, 'time_algorithm_update': 0.0021459453077201382, 'loss': 0.004815436307767249, 'time_step': 0.0023686627307570123, 'init_value': -1.1916608810424805, 'ave_value': -0.950808802461839, 'soft_opc': nan} step=2490




2022-04-20 20:48.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204839/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:48.46 [info     ] FQE_20220420204839: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00014801341367055135, 'time_algorithm_update': 0.0021296940654157155, 'loss': 0.00507784862820261, 'time_step': 0.002347658915692065, 'init_value': -1.267615556716919, 'ave_value': -1.0063093012047781, 'soft_opc': nan} step=2656




2022-04-20 20:48.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204839/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:48.47 [info     ] FQE_20220420204839: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00015242128487092903, 'time_algorithm_update': 0.0021757519388773353, 'loss': 0.005231977666234486, 'time_step': 0.0023979465645479867, 'init_value': -1.316847562789917, 'ave_value': -1.063218682769451, 'soft_opc': nan} step=2822




2022-04-20 20:48.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204839/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:48.47 [info     ] FQE_20220420204839: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00015030998781502964, 'time_algorithm_update': 0.0021332358739462242, 'loss': 0.005316347492499703, 'time_step': 0.002354323145854904, 'init_value': -1.320266604423523, 'ave_value': -1.0593567029380047, 'soft_opc': nan} step=2988




2022-04-20 20:48.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204839/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:48.48 [info     ] FQE_20220420204839: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00014704537678913898, 'time_algorithm_update': 0.0021030529435858668, 'loss': 0.005783647664889963, 'time_step': 0.0023234493761177524, 'init_value': -1.4372138977050781, 'ave_value': -1.1422921089438705, 'soft_opc': nan} step=3154




2022-04-20 20:48.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204839/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:48.48 [info     ] FQE_20220420204839: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00014966510864625494, 'time_algorithm_update': 0.002152503254901932, 'loss': 0.005861348383470876, 'time_step': 0.002369577626147902, 'init_value': -1.46684992313385, 'ave_value': -1.1482751290808926, 'soft_opc': nan} step=3320




2022-04-20 20:48.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204839/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:48.48 [info     ] FQE_20220420204839: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.0001491494925625353, 'time_algorithm_update': 0.0022067980593945607, 'loss': 0.006322199044642544, 'time_step': 0.0024311269622251212, 'init_value': -1.4894037246704102, 'ave_value': -1.1752955396164646, 'soft_opc': nan} step=3486




2022-04-20 20:48.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204839/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:48.49 [info     ] FQE_20220420204839: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00015239686851041862, 'time_algorithm_update': 0.002127577023333814, 'loss': 0.006861761843516345, 'time_step': 0.002352654215801193, 'init_value': -1.5588409900665283, 'ave_value': -1.2475115793849434, 'soft_opc': nan} step=3652




2022-04-20 20:48.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204839/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:48.49 [info     ] FQE_20220420204839: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.0001510812575558582, 'time_algorithm_update': 0.0021577628262071723, 'loss': 0.007422458555785019, 'time_step': 0.0023755294730864376, 'init_value': -1.5710231065750122, 'ave_value': -1.2413825902584437, 'soft_opc': nan} step=3818




2022-04-20 20:48.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204839/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:48.50 [info     ] FQE_20220420204839: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.0001617239182253918, 'time_algorithm_update': 0.002384300691535674, 'loss': 0.007779851951355855, 'time_step': 0.002622898802699813, 'init_value': -1.6286014318466187, 'ave_value': -1.2736706125977877, 'soft_opc': nan} step=3984




2022-04-20 20:48.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204839/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:48.50 [info     ] FQE_20220420204839: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.0001480062323880483, 'time_algorithm_update': 0.002110400831842997, 'loss': 0.008522693301321018, 'time_step': 0.00232811577348824, 'init_value': -1.7256590127944946, 'ave_value': -1.3619448575350614, 'soft_opc': nan} step=4150




2022-04-20 20:48.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204839/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:48.51 [info     ] FQE_20220420204839: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00015249597020896086, 'time_algorithm_update': 0.0021765504974916756, 'loss': 0.008589783658873829, 'time_step': 0.0023966941488794535, 'init_value': -1.8552508354187012, 'ave_value': -1.4812912708534314, 'soft_opc': nan} step=4316




2022-04-20 20:48.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204839/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:48.51 [info     ] FQE_20220420204839: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00014971394136727574, 'time_algorithm_update': 0.0021476803055728772, 'loss': 0.0100022485986049, 'time_step': 0.0023746246314910523, 'init_value': -1.8663297891616821, 'ave_value': -1.4788427150867007, 'soft_opc': nan} step=4482




2022-04-20 20:48.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204839/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:48.52 [info     ] FQE_20220420204839: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.0001552722540246435, 'time_algorithm_update': 0.0022725326469145626, 'loss': 0.010511852015393326, 'time_step': 0.0024982145033686995, 'init_value': -1.945805311203003, 'ave_value': -1.5438481651515037, 'soft_opc': nan} step=4648




2022-04-20 20:48.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204839/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:48.52 [info     ] FQE_20220420204839: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00014937498483313136, 'time_algorithm_update': 0.002210026764007936, 'loss': 0.011173399537661484, 'time_step': 0.002428785864129124, 'init_value': -2.0027549266815186, 'ave_value': -1.617971380289879, 'soft_opc': nan} step=4814




2022-04-20 20:48.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204839/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:48.52 [info     ] FQE_20220420204839: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00015139005270348974, 'time_algorithm_update': 0.0022485758884843573, 'loss': 0.011789474587926799, 'time_step': 0.0024741026292364283, 'init_value': -2.0111701488494873, 'ave_value': -1.5952092546734724, 'soft_opc': nan} step=4980




2022-04-20 20:48.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204839/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:48.53 [info     ] FQE_20220420204839: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00015035882053605044, 'time_algorithm_update': 0.002249351466994688, 'loss': 0.012236473628835955, 'time_step': 0.002467296209680029, 'init_value': -2.0376815795898438, 'ave_value': -1.6275312688839327, 'soft_opc': nan} step=5146




2022-04-20 20:48.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204839/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:48.53 [info     ] FQE_20220420204839: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00014969096126326597, 'time_algorithm_update': 0.0020432946193649107, 'loss': 0.013148811050404581, 'time_step': 0.002263533063681729, 'init_value': -2.1674818992614746, 'ave_value': -1.7629696607153427, 'soft_opc': nan} step=5312




2022-04-20 20:48.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204839/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:48.54 [info     ] FQE_20220420204839: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00015025110129850456, 'time_algorithm_update': 0.002179408647927893, 'loss': 0.013624792194122964, 'time_step': 0.002403223370931235, 'init_value': -2.18626070022583, 'ave_value': -1.773633089742145, 'soft_opc': nan} step=5478




2022-04-20 20:48.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204839/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:48.54 [info     ] FQE_20220420204839: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00014984607696533203, 'time_algorithm_update': 0.002141232950141631, 'loss': 0.014015833075096584, 'time_step': 0.002360279301562941, 'init_value': -2.1915059089660645, 'ave_value': -1.7611955260975405, 'soft_opc': nan} step=5644




2022-04-20 20:48.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204839/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:48.55 [info     ] FQE_20220420204839: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00015174768057214208, 'time_algorithm_update': 0.0021939478724835866, 'loss': 0.015045633162960904, 'time_step': 0.002414733530527138, 'init_value': -2.290743827819824, 'ave_value': -1.8768511426307866, 'soft_opc': nan} step=5810




2022-04-20 20:48.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204839/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:48.55 [info     ] FQE_20220420204839: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00015284928930811136, 'time_algorithm_update': 0.0021972871688475093, 'loss': 0.015850995387461216, 'time_step': 0.002421008535178311, 'init_value': -2.288893938064575, 'ave_value': -1.8724936535632288, 'soft_opc': nan} step=5976




2022-04-20 20:48.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204839/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:48.56 [info     ] FQE_20220420204839: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00015173044549413473, 'time_algorithm_update': 0.0022285688354308345, 'loss': 0.016421316212592983, 'time_step': 0.00245020906609225, 'init_value': -2.402628183364868, 'ave_value': -1.9825033978298978, 'soft_opc': nan} step=6142




2022-04-20 20:48.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204839/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:48.56 [info     ] FQE_20220420204839: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.000150230993707496, 'time_algorithm_update': 0.00216726509921522, 'loss': 0.01813321671987244, 'time_step': 0.002388711435249053, 'init_value': -2.341012954711914, 'ave_value': -1.9300914670431961, 'soft_opc': nan} step=6308




2022-04-20 20:48.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204839/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:48.56 [info     ] FQE_20220420204839: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00015322846102427287, 'time_algorithm_update': 0.0022143757486917885, 'loss': 0.01848857602348596, 'time_step': 0.0024412281541939243, 'init_value': -2.422437906265259, 'ave_value': -1.9950300097005098, 'soft_opc': nan} step=6474




2022-04-20 20:48.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204839/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:48.57 [info     ] FQE_20220420204839: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00014908055225050593, 'time_algorithm_update': 0.002177788550595203, 'loss': 0.019255270828711743, 'time_step': 0.002395282308739352, 'init_value': -2.5443058013916016, 'ave_value': -2.1116446059604117, 'soft_opc': nan} step=6640




2022-04-20 20:48.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204839/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:48.57 [info     ] FQE_20220420204839: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00015061734670616058, 'time_algorithm_update': 0.0021427999059837983, 'loss': 0.019621414875791193, 'time_step': 0.0023616739066250354, 'init_value': -2.602997303009033, 'ave_value': -2.1828287869826095, 'soft_opc': nan} step=6806




2022-04-20 20:48.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204839/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:48.58 [info     ] FQE_20220420204839: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00015121913817991693, 'time_algorithm_update': 0.002174304192324719, 'loss': 0.02082763621807996, 'time_step': 0.0023968305932470114, 'init_value': -2.6007769107818604, 'ave_value': -2.1669932159581555, 'soft_opc': nan} step=6972




2022-04-20 20:48.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204839/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:48.58 [info     ] FQE_20220420204839: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.0001499150172773614, 'time_algorithm_update': 0.0021430957748229244, 'loss': 0.021596792481519866, 'time_step': 0.002360905509397208, 'init_value': -2.7764720916748047, 'ave_value': -2.326343364361988, 'soft_opc': nan} step=7138




2022-04-20 20:48.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204839/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:48.59 [info     ] FQE_20220420204839: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00014710139079266284, 'time_algorithm_update': 0.0020466525870633414, 'loss': 0.023020620506918842, 'time_step': 0.0022620824446161108, 'init_value': -2.7894463539123535, 'ave_value': -2.3287018384150993, 'soft_opc': nan} step=7304




2022-04-20 20:48.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204839/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:48.59 [info     ] FQE_20220420204839: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00014493982475924204, 'time_algorithm_update': 0.0020513333469988353, 'loss': 0.024282131980857755, 'time_step': 0.0022638059524168453, 'init_value': -2.81841778755188, 'ave_value': -2.305419542705053, 'soft_opc': nan} step=7470




2022-04-20 20:48.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204839/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:49.00 [info     ] FQE_20220420204839: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00014802059495305442, 'time_algorithm_update': 0.0021009933517639897, 'loss': 0.025307048788905728, 'time_step': 0.0023184612572911276, 'init_value': -2.886253833770752, 'ave_value': -2.3893526207320055, 'soft_opc': nan} step=7636




2022-04-20 20:49.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204839/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:49.00 [info     ] FQE_20220420204839: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00015053260757262447, 'time_algorithm_update': 0.0021146535873413086, 'loss': 0.025653586401015026, 'time_step': 0.002332392945347062, 'init_value': -3.0206799507141113, 'ave_value': -2.522448749955836, 'soft_opc': nan} step=7802




2022-04-20 20:49.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204839/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:49.00 [info     ] FQE_20220420204839: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00014956744320421335, 'time_algorithm_update': 0.00210486837180264, 'loss': 0.02738783025976652, 'time_step': 0.0023256425397941865, 'init_value': -3.038985252380371, 'ave_value': -2.5254786259767408, 'soft_opc': nan} step=7968




2022-04-20 20:49.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204839/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:49.01 [info     ] FQE_20220420204839: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00015151357076254236, 'time_algorithm_update': 0.0021081861243190535, 'loss': 0.028883152813173784, 'time_step': 0.0023294500557773084, 'init_value': -3.128481388092041, 'ave_value': -2.619409144804855, 'soft_opc': nan} step=8134




2022-04-20 20:49.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204839/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-20 20:49.01 [info     ] FQE_20220420204839: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00015405574476862527, 'time_algorithm_update': 0.0022764593721872352, 'loss': 0.029064707191526352, 'time_step': 0.0025040227246571735, 'init_value': -3.147719383239746, 'ave_value': -2.6527858266927855, 'soft_opc': nan} step=8300




2022-04-20 20:49.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204839/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

2022-04-20 20:49.02 [info     ] Directory is created at d3rlpy_logs/FQE_20220420204902
2022-04-20 20:49.02 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 20:49.02 [debug    ] Building models...
2022-04-20 20:49.02 [debug    ] Models have been built.
2022-04-20 20:49.02 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220420204902/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-20 20:49.02 [info     ] FQE_20220420204902: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.0001493948836659276, 'time_algorithm_update': 0.002087083666823631, 'loss': 0.02257477968043193, 'time_step': 0.0023060350917106453, 'init_value': -1.0732526779174805, 'ave_value': -1.1195229894406087, 'soft_opc': nan} step=344




2022-04-20 20:49.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204902/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:49.03 [info     ] FQE_20220420204902: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00014843012011328408, 'time_algorithm_update': 0.0020394505456436513, 'loss': 0.02068008199539902, 'time_step': 0.002254726581795271, 'init_value': -1.8647841215133667, 'ave_value': -1.9439793502291043, 'soft_opc': nan} step=688




2022-04-20 20:49.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204902/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:49.04 [info     ] FQE_20220420204902: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.0001530314600744913, 'time_algorithm_update': 0.0021412899327832597, 'loss': 0.023124908486944297, 'time_step': 0.0023671651995459267, 'init_value': -2.8634355068206787, 'ave_value': -2.975704539305455, 'soft_opc': nan} step=1032




2022-04-20 20:49.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204902/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:49.05 [info     ] FQE_20220420204902: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00015471633090529332, 'time_algorithm_update': 0.0021254586619000103, 'loss': 0.025307102667622615, 'time_step': 0.0023516957149949183, 'init_value': -3.6924080848693848, 'ave_value': -3.8376636466598724, 'soft_opc': nan} step=1376




2022-04-20 20:49.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204902/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:49.06 [info     ] FQE_20220420204902: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00015454028928002647, 'time_algorithm_update': 0.0021674681541531586, 'loss': 0.031194146432870523, 'time_step': 0.0023934376794238423, 'init_value': -4.741091728210449, 'ave_value': -4.919684407007587, 'soft_opc': nan} step=1720




2022-04-20 20:49.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204902/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:49.07 [info     ] FQE_20220420204902: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00015759260155433832, 'time_algorithm_update': 0.002171101265175398, 'loss': 0.036596059447241036, 'time_step': 0.0024012749971345413, 'init_value': -5.435504913330078, 'ave_value': -5.6667107492953805, 'soft_opc': nan} step=2064




2022-04-20 20:49.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204902/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:49.08 [info     ] FQE_20220420204902: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00015323661094488098, 'time_algorithm_update': 0.0020796871462533642, 'loss': 0.043693346851288756, 'time_step': 0.0023048866626828217, 'init_value': -6.430129528045654, 'ave_value': -6.769475655834954, 'soft_opc': nan} step=2408




2022-04-20 20:49.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204902/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:49.09 [info     ] FQE_20220420204902: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00015616278315699376, 'time_algorithm_update': 0.0021228658598522808, 'loss': 0.05112600551246731, 'time_step': 0.0023512542247772217, 'init_value': -6.9244232177734375, 'ave_value': -7.360276978617316, 'soft_opc': nan} step=2752




2022-04-20 20:49.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204902/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:49.10 [info     ] FQE_20220420204902: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.0001567823942317519, 'time_algorithm_update': 0.002161231844924217, 'loss': 0.05757725402593699, 'time_step': 0.002390191305515378, 'init_value': -7.476044654846191, 'ave_value': -8.060856466626262, 'soft_opc': nan} step=3096




2022-04-20 20:49.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204902/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:49.10 [info     ] FQE_20220420204902: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00015345354412877284, 'time_algorithm_update': 0.0020462413166844567, 'loss': 0.06946566103697689, 'time_step': 0.002271526774694753, 'init_value': -8.21306037902832, 'ave_value': -8.983525227238466, 'soft_opc': nan} step=3440




2022-04-20 20:49.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204902/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:49.11 [info     ] FQE_20220420204902: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00015039152877275333, 'time_algorithm_update': 0.0020965684291928315, 'loss': 0.0762108413713746, 'time_step': 0.0023161651090134023, 'init_value': -8.80492115020752, 'ave_value': -9.736552421090838, 'soft_opc': nan} step=3784




2022-04-20 20:49.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204902/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:49.12 [info     ] FQE_20220420204902: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.0001561080300530722, 'time_algorithm_update': 0.002099988765494768, 'loss': 0.08435022301138054, 'time_step': 0.0023300079412238543, 'init_value': -9.384736061096191, 'ave_value': -10.508408347349446, 'soft_opc': nan} step=4128




2022-04-20 20:49.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204902/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:49.13 [info     ] FQE_20220420204902: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.0001547066278235857, 'time_algorithm_update': 0.002132081015165462, 'loss': 0.094445752905855, 'time_step': 0.002360163733016613, 'init_value': -9.863489151000977, 'ave_value': -11.231957604246096, 'soft_opc': nan} step=4472




2022-04-20 20:49.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204902/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:49.14 [info     ] FQE_20220420204902: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00014997776164564977, 'time_algorithm_update': 0.002064273800960807, 'loss': 0.10671646808014186, 'time_step': 0.002283489981362986, 'init_value': -10.579957008361816, 'ave_value': -12.229410364230473, 'soft_opc': nan} step=4816




2022-04-20 20:49.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204902/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:49.15 [info     ] FQE_20220420204902: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00014993825624155444, 'time_algorithm_update': 0.0020742749058923057, 'loss': 0.11892049822436515, 'time_step': 0.0022954822972763417, 'init_value': -10.974876403808594, 'ave_value': -12.909421341107771, 'soft_opc': nan} step=5160




2022-04-20 20:49.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204902/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:49.16 [info     ] FQE_20220420204902: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00015773676162542298, 'time_algorithm_update': 0.0021590888500213623, 'loss': 0.1319318630537668, 'time_step': 0.0023863946282586387, 'init_value': -11.57024097442627, 'ave_value': -13.822356666288934, 'soft_opc': nan} step=5504




2022-04-20 20:49.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204902/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:49.17 [info     ] FQE_20220420204902: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00015298987543860146, 'time_algorithm_update': 0.0021340652953746706, 'loss': 0.14471643208526075, 'time_step': 0.0023626151473023172, 'init_value': -11.993457794189453, 'ave_value': -14.606974450263891, 'soft_opc': nan} step=5848




2022-04-20 20:49.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204902/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:49.17 [info     ] FQE_20220420204902: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00015083301899045012, 'time_algorithm_update': 0.002040805511696394, 'loss': 0.15830702906431154, 'time_step': 0.0022594055464101393, 'init_value': -12.215473175048828, 'ave_value': -15.163465760742222, 'soft_opc': nan} step=6192




2022-04-20 20:49.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204902/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:49.18 [info     ] FQE_20220420204902: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00015260036601576695, 'time_algorithm_update': 0.0021279877008393753, 'loss': 0.17602453049716302, 'time_step': 0.0023518232412116473, 'init_value': -12.554805755615234, 'ave_value': -15.791842670738697, 'soft_opc': nan} step=6536




2022-04-20 20:49.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204902/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:49.19 [info     ] FQE_20220420204902: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.0001526662083559258, 'time_algorithm_update': 0.002095860104228175, 'loss': 0.18985718498981102, 'time_step': 0.002317553342774857, 'init_value': -12.802813529968262, 'ave_value': -16.275174160274837, 'soft_opc': nan} step=6880




2022-04-20 20:49.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204902/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:49.20 [info     ] FQE_20220420204902: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.0001515940178272336, 'time_algorithm_update': 0.0020490420419116352, 'loss': 0.2047001390533825, 'time_step': 0.0022688009018121762, 'init_value': -13.2680082321167, 'ave_value': -16.982779318241924, 'soft_opc': nan} step=7224




2022-04-20 20:49.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204902/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:49.21 [info     ] FQE_20220420204902: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00015423464220623638, 'time_algorithm_update': 0.0020632272542909134, 'loss': 0.21696384775798855, 'time_step': 0.0022882084513819495, 'init_value': -13.653013229370117, 'ave_value': -17.69228217526599, 'soft_opc': nan} step=7568




2022-04-20 20:49.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204902/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:49.22 [info     ] FQE_20220420204902: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00015256501907526062, 'time_algorithm_update': 0.0021009126374887865, 'loss': 0.22940464688161777, 'time_step': 0.0023226578568303308, 'init_value': -14.161108016967773, 'ave_value': -18.37360884774107, 'soft_opc': nan} step=7912




2022-04-20 20:49.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204902/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:49.23 [info     ] FQE_20220420204902: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00015186015949692836, 'time_algorithm_update': 0.001995694498683131, 'loss': 0.24290688851371753, 'time_step': 0.0022177120973897535, 'init_value': -14.374195098876953, 'ave_value': -18.944163711328763, 'soft_opc': nan} step=8256




2022-04-20 20:49.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204902/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:49.23 [info     ] FQE_20220420204902: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00015543020048806833, 'time_algorithm_update': 0.0021417036999103636, 'loss': 0.25413287857135886, 'time_step': 0.0023688895757808244, 'init_value': -14.866487503051758, 'ave_value': -19.67273990944162, 'soft_opc': nan} step=8600




2022-04-20 20:49.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204902/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:49.24 [info     ] FQE_20220420204902: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00015350968338722406, 'time_algorithm_update': 0.002099502918332122, 'loss': 0.26933566329264363, 'time_step': 0.002325006002603575, 'init_value': -15.438478469848633, 'ave_value': -20.40598243260169, 'soft_opc': nan} step=8944




2022-04-20 20:49.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204902/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:49.25 [info     ] FQE_20220420204902: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.0001514789670012718, 'time_algorithm_update': 0.0020407916501510976, 'loss': 0.27706158323061847, 'time_step': 0.0022634849991909293, 'init_value': -15.596551895141602, 'ave_value': -20.67234010852135, 'soft_opc': nan} step=9288




2022-04-20 20:49.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204902/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:49.26 [info     ] FQE_20220420204902: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00015325185864470726, 'time_algorithm_update': 0.002061543769614641, 'loss': 0.2811656575054364, 'time_step': 0.0022854895092720207, 'init_value': -15.493974685668945, 'ave_value': -20.840669110444214, 'soft_opc': nan} step=9632




2022-04-20 20:49.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204902/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:49.27 [info     ] FQE_20220420204902: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00015120658763619356, 'time_algorithm_update': 0.0020581601664077403, 'loss': 0.28086554279637543, 'time_step': 0.0022793065669924712, 'init_value': -15.58743953704834, 'ave_value': -21.14169513054259, 'soft_opc': nan} step=9976




2022-04-20 20:49.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204902/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:49.28 [info     ] FQE_20220420204902: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.0001531472039777179, 'time_algorithm_update': 0.0020574941191562387, 'loss': 0.28476114444932793, 'time_step': 0.002279886672663134, 'init_value': -15.589889526367188, 'ave_value': -21.289651417087864, 'soft_opc': nan} step=10320




2022-04-20 20:49.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204902/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:49.29 [info     ] FQE_20220420204902: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00015390543050544206, 'time_algorithm_update': 0.002073253310003946, 'loss': 0.29460940446682965, 'time_step': 0.0022985678772593655, 'init_value': -15.49102783203125, 'ave_value': -21.350015528862542, 'soft_opc': nan} step=10664




2022-04-20 20:49.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204902/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:49.29 [info     ] FQE_20220420204902: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.0001550469287606173, 'time_algorithm_update': 0.0020828621332035506, 'loss': 0.30362798134406466, 'time_step': 0.0023094443387763446, 'init_value': -15.375609397888184, 'ave_value': -21.456870281588923, 'soft_opc': nan} step=11008




2022-04-20 20:49.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204902/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:49.30 [info     ] FQE_20220420204902: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00015499979950660882, 'time_algorithm_update': 0.002065367476884709, 'loss': 0.30873638841453505, 'time_step': 0.002290487289428711, 'init_value': -16.158784866333008, 'ave_value': -22.327515873608288, 'soft_opc': nan} step=11352




2022-04-20 20:49.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204902/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:49.31 [info     ] FQE_20220420204902: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00015376265658888707, 'time_algorithm_update': 0.002088548832161482, 'loss': 0.310628282913375, 'time_step': 0.0023144753866417463, 'init_value': -15.990154266357422, 'ave_value': -22.359757292915035, 'soft_opc': nan} step=11696




2022-04-20 20:49.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204902/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:49.32 [info     ] FQE_20220420204902: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.0001513618369435155, 'time_algorithm_update': 0.0020407916501510976, 'loss': 0.3167481874927965, 'time_step': 0.0022626034049100653, 'init_value': -16.08827018737793, 'ave_value': -22.625373066626153, 'soft_opc': nan} step=12040




2022-04-20 20:49.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204902/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:49.33 [info     ] FQE_20220420204902: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00015203689419946006, 'time_algorithm_update': 0.002067455718683642, 'loss': 0.3357880040732494, 'time_step': 0.0022921569125596867, 'init_value': -16.57851219177246, 'ave_value': -23.236384602709933, 'soft_opc': nan} step=12384




2022-04-20 20:49.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204902/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:49.34 [info     ] FQE_20220420204902: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.0001540329567221708, 'time_algorithm_update': 0.0020698766375696937, 'loss': 0.3446505363424157, 'time_step': 0.0023003012634987053, 'init_value': -16.93691062927246, 'ave_value': -23.65345510840416, 'soft_opc': nan} step=12728




2022-04-20 20:49.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204902/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:49.35 [info     ] FQE_20220420204902: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.0001542512760605923, 'time_algorithm_update': 0.002118027487466502, 'loss': 0.35847422674969703, 'time_step': 0.0023442998875019164, 'init_value': -17.166183471679688, 'ave_value': -23.975349771761678, 'soft_opc': nan} step=13072




2022-04-20 20:49.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204902/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:49.35 [info     ] FQE_20220420204902: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00015379869660665823, 'time_algorithm_update': 0.0021059027937955633, 'loss': 0.37317548505961895, 'time_step': 0.00233059567074443, 'init_value': -17.625457763671875, 'ave_value': -24.650467400239396, 'soft_opc': nan} step=13416




2022-04-20 20:49.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204902/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:49.36 [info     ] FQE_20220420204902: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00015388047972390818, 'time_algorithm_update': 0.002054872900940651, 'loss': 0.3908526841659359, 'time_step': 0.0022805735122325807, 'init_value': -18.190929412841797, 'ave_value': -25.02789102126886, 'soft_opc': nan} step=13760




2022-04-20 20:49.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204902/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:49.37 [info     ] FQE_20220420204902: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00015541425971097724, 'time_algorithm_update': 0.0020437642585399538, 'loss': 0.40858973561093037, 'time_step': 0.002271043006763902, 'init_value': -18.659412384033203, 'ave_value': -25.54911517278031, 'soft_opc': nan} step=14104




2022-04-20 20:49.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204902/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:49.38 [info     ] FQE_20220420204902: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00015072143355081247, 'time_algorithm_update': 0.0020453728908716245, 'loss': 0.41770971041757526, 'time_step': 0.0022665726584057476, 'init_value': -19.226900100708008, 'ave_value': -26.069567726485364, 'soft_opc': nan} step=14448




2022-04-20 20:49.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204902/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:49.39 [info     ] FQE_20220420204902: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00015242293823597043, 'time_algorithm_update': 0.002053441696388777, 'loss': 0.4456476764591021, 'time_step': 0.0022799837034802105, 'init_value': -19.71973991394043, 'ave_value': -26.500217588903666, 'soft_opc': nan} step=14792




2022-04-20 20:49.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204902/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:49.40 [info     ] FQE_20220420204902: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.0001521096673122672, 'time_algorithm_update': 0.0020167682059975558, 'loss': 0.4590331095400764, 'time_step': 0.0022401747315428976, 'init_value': -20.219324111938477, 'ave_value': -26.81991836133304, 'soft_opc': nan} step=15136




2022-04-20 20:49.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204902/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:49.41 [info     ] FQE_20220420204902: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00015208055806714436, 'time_algorithm_update': 0.0020560331122819768, 'loss': 0.4717070562494269, 'time_step': 0.0022789634937463803, 'init_value': -20.413501739501953, 'ave_value': -27.11050151022705, 'soft_opc': nan} step=15480




2022-04-20 20:49.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204902/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:49.41 [info     ] FQE_20220420204902: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00015086836593095645, 'time_algorithm_update': 0.002024525819822799, 'loss': 0.4830290186435504, 'time_step': 0.0022433996200561523, 'init_value': -20.69222640991211, 'ave_value': -27.367384572340562, 'soft_opc': nan} step=15824




2022-04-20 20:49.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204902/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:49.42 [info     ] FQE_20220420204902: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00015327958173530046, 'time_algorithm_update': 0.0020862006863882373, 'loss': 0.5106182821300748, 'time_step': 0.002311077228812284, 'init_value': -20.77869987487793, 'ave_value': -27.486655817966202, 'soft_opc': nan} step=16168




2022-04-20 20:49.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204902/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:49.43 [info     ] FQE_20220420204902: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00015602624693582224, 'time_algorithm_update': 0.0020958379257557005, 'loss': 0.49977453694635526, 'time_step': 0.0023257323475771173, 'init_value': -20.840007781982422, 'ave_value': -27.557805025094265, 'soft_opc': nan} step=16512




2022-04-20 20:49.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204902/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:49.44 [info     ] FQE_20220420204902: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.0001530917577965315, 'time_algorithm_update': 0.0020502570063568827, 'loss': 0.511474343514893, 'time_step': 0.002274059972097707, 'init_value': -20.974571228027344, 'ave_value': -27.78835693087664, 'soft_opc': nan} step=16856




2022-04-20 20:49.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204902/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-20 20:49.45 [info     ] FQE_20220420204902: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00015653427257094273, 'time_algorithm_update': 0.0020980966645617817, 'loss': 0.5384062088885193, 'time_step': 0.0023267393888429153, 'init_value': -21.142227172851562, 'ave_value': -27.81341360630216, 'soft_opc': nan} step=17200




2022-04-20 20:49.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220420204902/model_17200.pt


## Reading hyper params from file

In [19]:
with open("hyperparams_cql.pkl", "rb") as f:
    data = pkl.load(f)

print(data)

[0.0008288831520532805, 0.003934260713329634, 4.6195344911552404e-05, 7]
