# Hyper parameter search for CQL

In [23]:
!pip install d3rlpy



In [24]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import itertools
import math
import subprocess
import os
import d3rlpy
# plt.style.use('matplotlibrc')

# from Python.data_sampler import *

## Building an MDPDataset

We first read in a large batch of samples from the file. As `d3rlpy` wants it in the form (observations, actions, rewards, terminal flags), we go ahead and do that. Here's a helper function to get a dataset from a list of chunks of your choosing.

In [25]:
import numpy as np
import torch
import random
import pandas
from Python.data_sampler import *

In [26]:
def get_dataset(chunks : list, batch_size=30000, 
                path="collected_data/rl_det_small.txt") -> d3rlpy.dataset.MDPDataset :
    random.seed(0)
    samples = DataSampler(path_to_data=path)
    samples.setting("coarse")
    states = []
    actions = []
    rewards = []
    next_states = []
    for chunk in chunks:
        samples.use_chunk(chunk)
        samples.read_chunk()
        [statesChunk, actionsChunk, rewardsChunk, nextStatesChunk] = samples.get_batch(batch_size)
        states.append(statesChunk)
        actions.append(actionsChunk)
        rewards.append(rewardsChunk)
        next_states.append(nextStatesChunk)
    states = torch.cat(states)
    actions = torch.cat(actions)
    rewards = torch.cat(rewards)
    next_states = torch.cat(next_states)
    terminals = np.zeros(len(states))
    terminals[::1111] = 1 #episode length 100, change if necessary
    print(states.shape)
    dataset = d3rlpy.dataset.MDPDataset(states.numpy(), 
                                        actions.numpy(), 
                                        rewards.numpy(), terminals)
    return dataset, states.numpy(), actions.numpy(), rewards.numpy()

We can build the dataset from there, just like this, and split into train and test sets.

In [27]:
dataset, states, actions, rewards = get_dataset([i for  i in range(50)], path="collected_data/rl_stochpid.txt")

start
[ 0.00000000e+00  7.95731469e+08 -4.75891077e-02 -3.69999953e-02
  2.00999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.50429671e-01 -4.92727243e-01 -5.31666025e-03]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.25610892e-01 -3.35999953e-02
 -2.42000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.08749986e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.90489108e-01 -5.87999953e-02
 -1.01000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.76979602e-02 -6.00000000e-01  6.00000000e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.48010892e-01  4.64000047e-02
 -1.04000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.83151637e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.08389108e-01  3.32000047e-02
 -2.02000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.25137655

[ 0.00000000e+00  7.95731469e+08  3.84410892e-01 -2.65999953e-02
  1.95999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 40 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.37889108e-01  4.72000047e-02
 -6.90001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.04730803e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 41 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.74610892e-01  1.40000047e-02
  7.59998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  3.58596924e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 42 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.29289108e-01 -1.49999953e-02
  1.51999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 43 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -7.03891077e-02 -1.45999953e-02
  2.71999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  5.45547944e-

In [28]:
print("The behavior policy value statistics are:")
dataset.compute_stats()['return']

The behavior policy value statistics are:


{'mean': -138.04016,
 'std': 107.28185,
 'min': -400.4336,
 'max': 0.0,
 'histogram': (array([ 2,  1,  2,  1,  0,  0,  0,  2,  4,  1,  0,  1,  5,  3,  3,  3,  5,
         15,  1,  1]),
  array([-400.4336  , -380.41193 , -360.39023 , -340.36856 , -320.34686 ,
         -300.3252  , -280.30353 , -260.28183 , -240.26016 , -220.23848 ,
         -200.2168  , -180.19511 , -160.17343 , -140.15176 , -120.13008 ,
         -100.1084  ,  -80.086716,  -60.06504 ,  -40.043358,  -20.021679,
            0.      ], dtype=float32))}

In [29]:
from sklearn.model_selection import train_test_split
train_episodes, test_episodes = train_test_split(dataset, test_size=0.2)

## Setting up an Algorithm

In [30]:
from d3rlpy.algos import CQL
from d3rlpy.models.encoders import VectorEncoderFactory

from d3rlpy.preprocessing import MinMaxActionScaler
import random
action_scaler = MinMaxActionScaler(minimum=-0.6, maximum=0.6)

from d3rlpy.metrics.scorer import td_error_scorer
from d3rlpy.metrics.scorer import average_value_estimation_scorer
from d3rlpy.metrics.scorer import initial_state_value_estimation_scorer

from d3rlpy.ope import FQE
# metrics to evaluate with
from d3rlpy.metrics.scorer import soft_opc_scorer
import pickle as pkl
from statistics import harmonic_mean as hm

## Perform a random search on hyper params. 

As good fqe on both stochastic and deterministic data is needed for good performance during the evaluation step in the simulator, we take the harmonic mean of both fqes to understand the performance on each dataset. The harmonic mean is large only when each number whose mean is being taken is large. Therefore, a large harmonic mean corresponds to large numbers whose mean is being taken.


In [33]:

# The number of times we would wanna do a random search for hyper-params. In 
# every random search, we randomly pick a new set of hyper-params.
num_search_iterations = 20
largest_fqe = -np.inf

for i in range(num_search_iterations):

    random.seed()

    actor_lr_this_iter = random.uniform(1e-5, 1e-2)
    critic_lr_this_iter = random.uniform(1e-5, 1e-2)
    temp_lr_this_iter = random.uniform(1e-5, 1e-4)
    n_steps_this_iter = random.choice([1, 3, 5, 7])

    actor_encoder = VectorEncoderFactory(hidden_units=[12, 24, 36, 24, 12],
                                          activation='relu', use_batch_norm=True, dropout_rate=0.2)
    critic_encoder = VectorEncoderFactory(hidden_units=[12, 24, 24, 12],
                                          activation='relu', use_batch_norm=True, dropout_rate=0.2)

    print("search iteration: ", i)
    print("using hyper params: ", [actor_lr_this_iter, critic_lr_this_iter, 
                                   temp_lr_this_iter, n_steps_this_iter])

    model = CQL(q_func_factory='qr', #qr -> quantile regression q function, but you don't have to use this
                reward_scaler='standard',
                actor_encoder_factory = actor_encoder,
                critic_encoder_factory = critic_encoder,
                action_scaler=action_scaler,
                actor_learning_rate=actor_lr_this_iter, 
                critic_learning_rate=critic_lr_this_iter,
                temp_learning_rate=temp_lr_this_iter,
                n_steps=n_steps_this_iter, 
                use_gpu=True) #change it to true if you have one
    model.build_with_dataset(dataset)

    model.fit(train_episodes,
        eval_episodes=test_episodes,
        n_epochs=50, 
        tensorboard_dir='runs',
        scorers={
            'td_error': td_error_scorer,
            'init_value': initial_state_value_estimation_scorer,
            'ave_value': average_value_estimation_scorer
        })
    
    ope_dataset, states_ope, actions_ope, rewards_ope = get_dataset([i+100 for i  in range(10)], 
                                                                    path="collected_data/rl_stochpid.txt") #change if you'd prefer different chunks
    ope_train_episodes, ope_test_episodes = train_test_split(ope_dataset, test_size=0.2)

    fqe = FQE(algo=model, action_scaler = action_scaler, use_gpu=False) #change this if you have one!
    history_stoch = fqe.fit(ope_train_episodes,
        eval_episodes=ope_test_episodes,
        tensorboard_dir='runs',
        n_epochs=50, n_steps_per_epoch=1000, #change if overfitting/underfitting
        scorers={
           'init_value': initial_state_value_estimation_scorer,
            'ave_value': average_value_estimation_scorer,
           'soft_opc': soft_opc_scorer(return_threshold=0)
        })
    
    ope_dataset, states_ope, actions_ope, rewards_ope = get_dataset([i+200 for i in range(10)], 
                                                                    path="collected_data/rl_stochpid.txt") #change if you'd prefer different chunks
    ope_train_episodes, ope_test_episodes = train_test_split(ope_dataset, test_size=0.2)

    fqe = FQE(algo=model, action_scaler = action_scaler, use_gpu=False) #change this if you have one!
    history_det = fqe.fit(ope_train_episodes,
        eval_episodes=ope_test_episodes,
        tensorboard_dir='runs',
        n_epochs=50, n_steps_per_epoch=1000, #change if overfitting/underfitting
        scorers={
           'init_value': initial_state_value_estimation_scorer,
            'ave_value': average_value_estimation_scorer,
           'soft_opc': soft_opc_scorer(return_threshold=0)
        })
    
    if(hm([history_stoch[-1][1]["ave_value"] + 50, 
           history_det[-1][1]["ave_value"] + 50]) \
                                                          > largest_fqe):
        largest_fqe = hm([history_stoch[-1][1]["ave_value"] + 50, 
                         history_det[-1][1]["ave_value"] + 50])

        # Save the hyper-params
        hyperparams = [actor_lr_this_iter, critic_lr_this_iter, 
                       temp_lr_this_iter, n_steps_this_iter]

        with open("hyperparams_cql.pkl", "wb") as f:
            print("most optimal hyper params for cql at this point: ", hyperparams)
            pkl.dump(hyperparams, f)

        # Save model and policy
        model.save_model("model_hyperparams_cql.pt")
        model.save_policy("policy_hyperparams_cql.pt")


search iteration:  0
using hyper params:  [0.00469557651656814, 0.003304999882985713, 4.52538497359818e-05, 7]
2022-04-17 14:51.01 [debug    ] RoundIterator is selected.
2022-04-17 14:51.01 [info     ] Directory is created at d3rlpy_logs/CQL_20220417145101
2022-04-17 14:51.01 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-17 14:51.01 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-17 14:51.01 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220417145101/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': True, 'dropout_rate': 0.2, 'use_dense': False}}, 'actor_learning_rate': 0.00469557651656814, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'alpha_learning_rate':

Epoch 1/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:51.07 [info     ] CQL_20220417145101: epoch=1 step=169 epoch=1 metrics={'time_sample_batch': 0.00036236238197462096, 'time_algorithm_update': 0.029904378236398188, 'temp_loss': 4.738555341077274, 'temp': 0.9959946380564447, 'alpha_loss': -22.22049955785627, 'alpha': 1.0091362232287255, 'critic_loss': 157.25083864652194, 'actor_loss': -0.46273912919255406, 'time_step': 0.030343302608241696, 'td_error': 9.133720136999088, 'init_value': -4.455194473266602, 'ave_value': -3.4397618988397958} step=169
2022-04-17 14:51.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145101/model_169.pt


Epoch 2/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:51.12 [info     ] CQL_20220417145101: epoch=2 step=338 epoch=2 metrics={'time_sample_batch': 0.00035676025074614576, 'time_algorithm_update': 0.031510051185562764, 'temp_loss': 4.935345353459465, 'temp': 0.9882349967956543, 'alpha_loss': -19.686638499152732, 'alpha': 1.027395603219433, 'critic_loss': 90.78072853765543, 'actor_loss': 4.483377108207116, 'time_step': 0.03194766213907998, 'td_error': 1.2886037594896624, 'init_value': -9.449234008789062, 'ave_value': -8.013484366197844} step=338
2022-04-17 14:51.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145101/model_338.pt


Epoch 3/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:51.18 [info     ] CQL_20220417145101: epoch=3 step=507 epoch=3 metrics={'time_sample_batch': 0.000362887185000809, 'time_algorithm_update': 0.03137576509509566, 'temp_loss': 4.94261001835208, 'temp': 0.9805445096196508, 'alpha_loss': -18.7920160801453, 'alpha': 1.0429914766514794, 'critic_loss': 120.63882951905741, 'actor_loss': 6.253955618164243, 'time_step': 0.031813686415993955, 'td_error': 1.4116607989898386, 'init_value': -12.475167274475098, 'ave_value': -10.727855943946151} step=507
2022-04-17 14:51.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145101/model_507.pt


Epoch 4/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:51.23 [info     ] CQL_20220417145101: epoch=4 step=676 epoch=4 metrics={'time_sample_batch': 0.00036676818802511903, 'time_algorithm_update': 0.031066252635075495, 'temp_loss': 4.907614321398312, 'temp': 0.9730124046816628, 'alpha_loss': -18.79099031030779, 'alpha': 1.0594291256729669, 'critic_loss': 162.42569710093844, 'actor_loss': 8.433008402762328, 'time_step': 0.031513559747729784, 'td_error': 1.5238332278818947, 'init_value': -14.831500053405762, 'ave_value': -12.901983915878846} step=676
2022-04-17 14:51.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145101/model_676.pt


Epoch 5/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:51.29 [info     ] CQL_20220417145101: epoch=5 step=845 epoch=5 metrics={'time_sample_batch': 0.0003647620861346905, 'time_algorithm_update': 0.03092675942641038, 'temp_loss': 4.87075095204912, 'temp': 0.9656022670706348, 'alpha_loss': -18.884033451418905, 'alpha': 1.0765363210757104, 'critic_loss': 215.34031171629414, 'actor_loss': 10.210721207793648, 'time_step': 0.031370628514938804, 'td_error': 1.5726488466635373, 'init_value': -17.466718673706055, 'ave_value': -15.170248460640778} step=845
2022-04-17 14:51.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145101/model_845.pt


Epoch 6/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:51.34 [info     ] CQL_20220417145101: epoch=6 step=1014 epoch=6 metrics={'time_sample_batch': 0.00037573639457747784, 'time_algorithm_update': 0.030733895725047096, 'temp_loss': 4.833600760916986, 'temp': 0.9582957240956775, 'alpha_loss': -19.18628339372443, 'alpha': 1.0943426126559106, 'critic_loss': 279.00260699571237, 'actor_loss': 11.802012488686827, 'time_step': 0.031187033512183195, 'td_error': 1.6331925456397938, 'init_value': -18.864086151123047, 'ave_value': -16.530760704719267} step=1014
2022-04-17 14:51.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145101/model_1014.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.00000

Epoch 1/10:   0%|          | 0/77 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-17 14:51.35 [info     ] FQE_20220417145134: epoch=1 step=77 epoch=1 metrics={'time_sample_batch': 0.00014304805111575437, 'time_algorithm_update': 0.003920598463578658, 'loss': 0.01270570397352824, 'time_step': 0.004132973683344854, 'init_value': -0.19792605936527252, 'ave_value': -0.11633461518380958, 'soft_opc': nan} step=77




2022-04-17 14:51.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145134/model_77.pt


Epoch 2/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:51.35 [info     ] FQE_20220417145134: epoch=2 step=154 epoch=2 metrics={'time_sample_batch': 0.00014895897407036322, 'time_algorithm_update': 0.0038644369546469155, 'loss': 0.008019341917877848, 'time_step': 0.004096477062671216, 'init_value': -0.3074937164783478, 'ave_value': -0.18441981329658144, 'soft_opc': nan} step=154




2022-04-17 14:51.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145134/model_154.pt


Epoch 3/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:51.36 [info     ] FQE_20220417145134: epoch=3 step=231 epoch=3 metrics={'time_sample_batch': 0.00015629731215439834, 'time_algorithm_update': 0.0043049508875066585, 'loss': 0.005813484055945626, 'time_step': 0.0045356998195895904, 'init_value': -0.38510945439338684, 'ave_value': -0.22688511380556603, 'soft_opc': nan} step=231




2022-04-17 14:51.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145134/model_231.pt


Epoch 4/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:51.36 [info     ] FQE_20220417145134: epoch=4 step=308 epoch=4 metrics={'time_sample_batch': 0.00015866291987431513, 'time_algorithm_update': 0.003877871996396548, 'loss': 0.00452786415525071, 'time_step': 0.004120136236215567, 'init_value': -0.4403287172317505, 'ave_value': -0.27359292173028193, 'soft_opc': nan} step=308




2022-04-17 14:51.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145134/model_308.pt


Epoch 5/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:51.36 [info     ] FQE_20220417145134: epoch=5 step=385 epoch=5 metrics={'time_sample_batch': 0.0001650816434389585, 'time_algorithm_update': 0.004040324842775023, 'loss': 0.0043005626467237995, 'time_step': 0.004287781653466162, 'init_value': -0.42748552560806274, 'ave_value': -0.23433114986086415, 'soft_opc': nan} step=385




2022-04-17 14:51.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145134/model_385.pt


Epoch 6/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:51.37 [info     ] FQE_20220417145134: epoch=6 step=462 epoch=6 metrics={'time_sample_batch': 0.0001739031308657163, 'time_algorithm_update': 0.004366936621727881, 'loss': 0.004235619999613468, 'time_step': 0.004642090240082183, 'init_value': -0.4827418923377991, 'ave_value': -0.25352796345177264, 'soft_opc': nan} step=462




2022-04-17 14:51.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145134/model_462.pt


Epoch 7/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:51.37 [info     ] FQE_20220417145134: epoch=7 step=539 epoch=7 metrics={'time_sample_batch': 0.00016769186242834315, 'time_algorithm_update': 0.004121387159669554, 'loss': 0.004013857015033627, 'time_step': 0.004387737868668197, 'init_value': -0.5353525280952454, 'ave_value': -0.2858829505963939, 'soft_opc': nan} step=539




2022-04-17 14:51.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145134/model_539.pt


Epoch 8/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:51.37 [info     ] FQE_20220417145134: epoch=8 step=616 epoch=8 metrics={'time_sample_batch': 0.00016207818861131544, 'time_algorithm_update': 0.003900048020598176, 'loss': 0.0038472556527220196, 'time_step': 0.004145498399610643, 'init_value': -0.5944085717201233, 'ave_value': -0.3081331640700096, 'soft_opc': nan} step=616




2022-04-17 14:51.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145134/model_616.pt


Epoch 9/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:51.38 [info     ] FQE_20220417145134: epoch=9 step=693 epoch=9 metrics={'time_sample_batch': 0.00016051453429383116, 'time_algorithm_update': 0.004280483567869508, 'loss': 0.0036495700599504756, 'time_step': 0.004518159024127118, 'init_value': -0.6220677495002747, 'ave_value': -0.325840507003034, 'soft_opc': nan} step=693




2022-04-17 14:51.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145134/model_693.pt


Epoch 10/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:51.38 [info     ] FQE_20220417145134: epoch=10 step=770 epoch=10 metrics={'time_sample_batch': 0.00016620871308562044, 'time_algorithm_update': 0.004385929603081245, 'loss': 0.0037957829447438965, 'time_step': 0.004642034505868887, 'init_value': -0.6792863011360168, 'ave_value': -0.3514772096749496, 'soft_opc': nan} step=770




2022-04-17 14:51.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145134/model_770.pt
start
[ 0.00000000e+00  7.95731469e+08 -2.55189108e-01  2.54000047e-02
  2.57999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  3.30457556e-01 -3.26640595e-01  6.00000000e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.22789108e-01 -1.71999953e-02
  2.39998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.18067885e-01 -4.36092621e-01  6.00000000e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.89891077e-02  1.38000047e-02
  1.55999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.13215135e-01  3.00794900e-01]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.39310892e-01 -5.61999953e-02
 -4.10001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.81283783e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.95731469

Epoch 1/10:   0%|          | 0/77 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-17 14:51.39 [info     ] FQE_20220417145138: epoch=1 step=77 epoch=1 metrics={'time_sample_batch': 0.0001652240753173828, 'time_algorithm_update': 0.004225285022289722, 'loss': 0.009648441113289687, 'time_step': 0.004464443627890054, 'init_value': 0.3076206147670746, 'ave_value': 0.3411765237112303, 'soft_opc': nan} step=77




2022-04-17 14:51.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145138/model_77.pt


Epoch 2/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:51.39 [info     ] FQE_20220417145138: epoch=2 step=154 epoch=2 metrics={'time_sample_batch': 0.0001513896050391259, 'time_algorithm_update': 0.004075505516745828, 'loss': 0.006384653486795239, 'time_step': 0.004314041756964349, 'init_value': 0.24985399842262268, 'ave_value': 0.3148299134596511, 'soft_opc': nan} step=154




2022-04-17 14:51.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145138/model_154.pt


Epoch 3/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:51.40 [info     ] FQE_20220417145138: epoch=3 step=231 epoch=3 metrics={'time_sample_batch': 0.00016139389632584213, 'time_algorithm_update': 0.00418664263440417, 'loss': 0.004556015291094006, 'time_step': 0.004440558421147334, 'init_value': 0.21330273151397705, 'ave_value': 0.2999563114868628, 'soft_opc': nan} step=231




2022-04-17 14:51.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145138/model_231.pt


Epoch 4/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:51.40 [info     ] FQE_20220417145138: epoch=4 step=308 epoch=4 metrics={'time_sample_batch': 0.0001747422404103465, 'time_algorithm_update': 0.004314382354934494, 'loss': 0.003481807943143241, 'time_step': 0.004588096172778638, 'init_value': 0.19751763343811035, 'ave_value': 0.2942350057784367, 'soft_opc': nan} step=308




2022-04-17 14:51.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145138/model_308.pt


Epoch 5/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:51.40 [info     ] FQE_20220417145138: epoch=5 step=385 epoch=5 metrics={'time_sample_batch': 0.00015114809011484122, 'time_algorithm_update': 0.0037414346422467914, 'loss': 0.0031022825716789087, 'time_step': 0.003960432944359717, 'init_value': 0.1904584914445877, 'ave_value': 0.2905507271958364, 'soft_opc': nan} step=385




2022-04-17 14:51.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145138/model_385.pt


Epoch 6/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:51.41 [info     ] FQE_20220417145138: epoch=6 step=462 epoch=6 metrics={'time_sample_batch': 0.000169088314105938, 'time_algorithm_update': 0.004175192349916929, 'loss': 0.002870390924939571, 'time_step': 0.004414979513589438, 'init_value': 0.15575623512268066, 'ave_value': 0.27654035717987263, 'soft_opc': nan} step=462




2022-04-17 14:51.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145138/model_462.pt


Epoch 7/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:51.41 [info     ] FQE_20220417145138: epoch=7 step=539 epoch=7 metrics={'time_sample_batch': 0.0001690697360348392, 'time_algorithm_update': 0.004075973064868481, 'loss': 0.0024057543948413683, 'time_step': 0.004313379139095158, 'init_value': 0.13181807100772858, 'ave_value': 0.26613618985389, 'soft_opc': nan} step=539




2022-04-17 14:51.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145138/model_539.pt


Epoch 8/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:51.41 [info     ] FQE_20220417145138: epoch=8 step=616 epoch=8 metrics={'time_sample_batch': 0.00015232779762961647, 'time_algorithm_update': 0.003970353634326489, 'loss': 0.002083008324231517, 'time_step': 0.004186772680901862, 'init_value': 0.09357085824012756, 'ave_value': 0.2409653917777243, 'soft_opc': nan} step=616




2022-04-17 14:51.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145138/model_616.pt


Epoch 9/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:51.42 [info     ] FQE_20220417145138: epoch=9 step=693 epoch=9 metrics={'time_sample_batch': 0.00015439615621195212, 'time_algorithm_update': 0.0038401492230303877, 'loss': 0.001698786892486712, 'time_step': 0.004066900773481889, 'init_value': 0.08607178181409836, 'ave_value': 0.23016622206447898, 'soft_opc': nan} step=693




2022-04-17 14:51.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145138/model_693.pt


Epoch 10/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:51.42 [info     ] FQE_20220417145138: epoch=10 step=770 epoch=10 metrics={'time_sample_batch': 0.00015303066798618862, 'time_algorithm_update': 0.0038145796045080406, 'loss': 0.00179588068246406, 'time_step': 0.004049016283704089, 'init_value': 0.0467398539185524, 'ave_value': 0.20278057161685045, 'soft_opc': nan} step=770




2022-04-17 14:51.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145138/model_770.pt
most optimal hyper params at this point:  [0.00469557651656814, 0.003304999882985713, 4.52538497359818e-05, 7]
search iteration:  1
using hyper params:  [0.0007435952971810115, 0.0011294859942086025, 7.935935706758037e-05, 3]
2022-04-17 14:51.42 [debug    ] RoundIterator is selected.
2022-04-17 14:51.42 [info     ] Directory is created at d3rlpy_logs/CQL_20220417145142
2022-04-17 14:51.42 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-17 14:51.42 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-17 14:51.42 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220417145142/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': True, 'dropout

  minimum = torch.tensor(
  maximum = torch.tensor(


Epoch 1/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:51.48 [info     ] CQL_20220417145142: epoch=1 step=169 epoch=1 metrics={'time_sample_batch': 0.00031816888843062363, 'time_algorithm_update': 0.031005646350115715, 'temp_loss': 4.055915214606291, 'temp': 0.9927926652530241, 'alpha_loss': -18.026222488583898, 'alpha': 1.0087165550367367, 'critic_loss': 76.19942022921771, 'actor_loss': -0.838252396471578, 'time_step': 0.03139782516208626, 'td_error': 0.543844728927669, 'init_value': -0.2870383560657501, 'ave_value': -0.11356559558377206} step=169
2022-04-17 14:51.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145142/model_169.pt


Epoch 2/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:51.53 [info     ] CQL_20220417145142: epoch=2 step=338 epoch=2 metrics={'time_sample_batch': 0.0003198223001152806, 'time_algorithm_update': 0.030656202304998093, 'temp_loss': 4.642327108326748, 'temp': 0.978310745143326, 'alpha_loss': -18.857850565712834, 'alpha': 1.0266914635720337, 'critic_loss': 55.41735765355578, 'actor_loss': -0.19792057954590525, 'time_step': 0.031050230624407708, 'td_error': 0.907962745650995, 'init_value': -2.631558418273926, 'ave_value': -2.1514638706988043} step=338
2022-04-17 14:51.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145142/model_338.pt


Epoch 3/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:51.58 [info     ] CQL_20220417145142: epoch=3 step=507 epoch=3 metrics={'time_sample_batch': 0.00031599349524142473, 'time_algorithm_update': 0.02979138193751228, 'temp_loss': 4.691266522605038, 'temp': 0.964437220928937, 'alpha_loss': -18.39790037256726, 'alpha': 1.0442493554403092, 'critic_loss': 56.17743588058201, 'actor_loss': 0.9930963798387517, 'time_step': 0.030184906615308052, 'td_error': 1.0816296848379476, 'init_value': -4.212973594665527, 'ave_value': -3.550053017308583} step=507
2022-04-17 14:51.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145142/model_507.pt


Epoch 4/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:52.04 [info     ] CQL_20220417145142: epoch=4 step=676 epoch=4 metrics={'time_sample_batch': 0.0003221006788445648, 'time_algorithm_update': 0.030292855211968957, 'temp_loss': 4.678153052132511, 'temp': 0.9511059797727145, 'alpha_loss': -18.864591846804647, 'alpha': 1.062136106237152, 'critic_loss': 64.11876992220004, 'actor_loss': 1.7174454098622474, 'time_step': 0.030696959185177054, 'td_error': 0.9590873543186518, 'init_value': -5.543028354644775, 'ave_value': -4.721753980103914} step=676
2022-04-17 14:52.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145142/model_676.pt


Epoch 5/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:52.09 [info     ] CQL_20220417145142: epoch=5 step=845 epoch=5 metrics={'time_sample_batch': 0.00032131488506610575, 'time_algorithm_update': 0.030353042500964284, 'temp_loss': 4.631262626873671, 'temp': 0.938175028950505, 'alpha_loss': -19.151573892175797, 'alpha': 1.080641127902375, 'critic_loss': 78.2650700856948, 'actor_loss': 2.796063746220967, 'time_step': 0.030751842013477575, 'td_error': 0.9567004429314827, 'init_value': -7.172296047210693, 'ave_value': -6.129510958044379} step=845
2022-04-17 14:52.09 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145142/model_845.pt


Epoch 6/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:52.13 [info     ] CQL_20220417145142: epoch=6 step=1014 epoch=6 metrics={'time_sample_batch': 0.0002719467208230284, 'time_algorithm_update': 0.02421863826774281, 'temp_loss': 4.597981402154505, 'temp': 0.9255968639131128, 'alpha_loss': -19.5926496855606, 'alpha': 1.0996556211505415, 'critic_loss': 96.33972515580217, 'actor_loss': 3.3929126149803928, 'time_step': 0.024554781659820377, 'td_error': 0.9371785645928713, 'init_value': -8.062685012817383, 'ave_value': -6.952760174977887} step=1014
2022-04-17 14:52.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145142/model_1014.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.00000000e-0

Epoch 1/10:   0%|          | 0/77 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-17 14:52.14 [info     ] FQE_20220417145213: epoch=1 step=77 epoch=1 metrics={'time_sample_batch': 0.00010568445379083806, 'time_algorithm_update': 0.002558596722491376, 'loss': 0.012711980785303688, 'time_step': 0.002722563681664405, 'init_value': -0.3247610628604889, 'ave_value': -0.30425376314837654, 'soft_opc': nan} step=77




2022-04-17 14:52.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145213/model_77.pt


Epoch 2/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:52.14 [info     ] FQE_20220417145213: epoch=2 step=154 epoch=2 metrics={'time_sample_batch': 9.013770462630631e-05, 'time_algorithm_update': 0.00246318903836337, 'loss': 0.008454004121194411, 'time_step': 0.0025959634161614752, 'init_value': -0.4433573782444, 'ave_value': -0.39201844905142313, 'soft_opc': nan} step=154




2022-04-17 14:52.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145213/model_154.pt


Epoch 3/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:52.14 [info     ] FQE_20220417145213: epoch=3 step=231 epoch=3 metrics={'time_sample_batch': 8.868551873541497e-05, 'time_algorithm_update': 0.0024045752240465833, 'loss': 0.006006480536314768, 'time_step': 0.002530546931477336, 'init_value': -0.499653160572052, 'ave_value': -0.4119369029797412, 'soft_opc': nan} step=231




2022-04-17 14:52.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145213/model_231.pt


Epoch 4/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:52.14 [info     ] FQE_20220417145213: epoch=4 step=308 epoch=4 metrics={'time_sample_batch': 9.656881357168222e-05, 'time_algorithm_update': 0.0027228144856242392, 'loss': 0.004692114378603829, 'time_step': 0.0028676336461847477, 'init_value': -0.5889928340911865, 'ave_value': -0.49801568011040087, 'soft_opc': nan} step=308




2022-04-17 14:52.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145213/model_308.pt


Epoch 5/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:52.15 [info     ] FQE_20220417145213: epoch=5 step=385 epoch=5 metrics={'time_sample_batch': 0.00010969731714818384, 'time_algorithm_update': 0.00261704023782309, 'loss': 0.004238894581189984, 'time_step': 0.0027833666120256695, 'init_value': -0.5802974700927734, 'ave_value': -0.4542918731433314, 'soft_opc': nan} step=385




2022-04-17 14:52.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145213/model_385.pt


Epoch 6/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:52.15 [info     ] FQE_20220417145213: epoch=6 step=462 epoch=6 metrics={'time_sample_batch': 0.0001167724658916523, 'time_algorithm_update': 0.0028570658200747006, 'loss': 0.004115418307241294, 'time_step': 0.003032099116932262, 'init_value': -0.6553144454956055, 'ave_value': -0.4970115476900393, 'soft_opc': nan} step=462




2022-04-17 14:52.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145213/model_462.pt


Epoch 7/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:52.15 [info     ] FQE_20220417145213: epoch=7 step=539 epoch=7 metrics={'time_sample_batch': 0.00011480319035517705, 'time_algorithm_update': 0.002776870479831448, 'loss': 0.0038252543830373264, 'time_step': 0.0029439337841876143, 'init_value': -0.7282736301422119, 'ave_value': -0.5442853490444454, 'soft_opc': nan} step=539




2022-04-17 14:52.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145213/model_539.pt


Epoch 8/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:52.15 [info     ] FQE_20220417145213: epoch=8 step=616 epoch=8 metrics={'time_sample_batch': 0.00010064979652305702, 'time_algorithm_update': 0.0025275032241623124, 'loss': 0.003638087741595197, 'time_step': 0.002676068962394417, 'init_value': -0.8023484349250793, 'ave_value': -0.5963359497688913, 'soft_opc': nan} step=616




2022-04-17 14:52.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145213/model_616.pt


Epoch 9/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:52.16 [info     ] FQE_20220417145213: epoch=9 step=693 epoch=9 metrics={'time_sample_batch': 0.00011065718415495637, 'time_algorithm_update': 0.002766875477580281, 'loss': 0.0033499770631481495, 'time_step': 0.002927027739487685, 'init_value': -0.8021412491798401, 'ave_value': -0.5842865750059352, 'soft_opc': nan} step=693




2022-04-17 14:52.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145213/model_693.pt


Epoch 10/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:52.16 [info     ] FQE_20220417145213: epoch=10 step=770 epoch=10 metrics={'time_sample_batch': 0.00011400433329792766, 'time_algorithm_update': 0.002760908820412376, 'loss': 0.0034057687145548983, 'time_step': 0.002923327606993836, 'init_value': -0.8773481845855713, 'ave_value': -0.6313647500298045, 'soft_opc': nan} step=770




2022-04-17 14:52.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145213/model_770.pt
start
[ 0.00000000e+00  7.95731469e+08 -2.55189108e-01  2.54000047e-02
  2.57999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  3.30457556e-01 -3.26640595e-01  6.00000000e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.22789108e-01 -1.71999953e-02
  2.39998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.18067885e-01 -4.36092621e-01  6.00000000e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.89891077e-02  1.38000047e-02
  1.55999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.13215135e-01  3.00794900e-01]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.39310892e-01 -5.61999953e-02
 -4.10001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.81283783e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.95731469

Epoch 1/10:   0%|          | 0/88 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-17 14:52.16 [info     ] FQE_20220417145216: epoch=1 step=88 epoch=1 metrics={'time_sample_batch': 0.00011317025531422009, 'time_algorithm_update': 0.002865146506916393, 'loss': 0.009860517816956748, 'time_step': 0.003038433465090665, 'init_value': 0.3769869804382324, 'ave_value': 0.4151074635284441, 'soft_opc': nan} step=88




2022-04-17 14:52.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145216/model_88.pt


Epoch 2/10:   0%|          | 0/88 [00:00<?, ?it/s]



2022-04-17 14:52.17 [info     ] FQE_20220417145216: epoch=2 step=176 epoch=2 metrics={'time_sample_batch': 0.00015848604115572843, 'time_algorithm_update': 0.0038221234625036068, 'loss': 0.006848436708837239, 'time_step': 0.00405601208860224, 'init_value': 0.318762868642807, 'ave_value': 0.387999927621704, 'soft_opc': nan} step=176




2022-04-17 14:52.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145216/model_176.pt


Epoch 3/10:   0%|          | 0/88 [00:00<?, ?it/s]



2022-04-17 14:52.17 [info     ] FQE_20220417145216: epoch=3 step=264 epoch=3 metrics={'time_sample_batch': 0.00017074563286521217, 'time_algorithm_update': 0.0042321167208931665, 'loss': 0.004912831336365674, 'time_step': 0.00446977669542486, 'init_value': 0.2943795621395111, 'ave_value': 0.3782371405545656, 'soft_opc': nan} step=264




2022-04-17 14:52.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145216/model_264.pt


Epoch 4/10:   0%|          | 0/88 [00:00<?, ?it/s]



2022-04-17 14:52.18 [info     ] FQE_20220417145216: epoch=4 step=352 epoch=4 metrics={'time_sample_batch': 0.00016287511045282537, 'time_algorithm_update': 0.004152547229420055, 'loss': 0.00410899820748124, 'time_step': 0.004391239448027177, 'init_value': 0.27377963066101074, 'ave_value': 0.36261219852142507, 'soft_opc': nan} step=352




2022-04-17 14:52.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145216/model_352.pt


Epoch 5/10:   0%|          | 0/88 [00:00<?, ?it/s]



2022-04-17 14:52.18 [info     ] FQE_20220417145216: epoch=5 step=440 epoch=5 metrics={'time_sample_batch': 0.00015658411112698641, 'time_algorithm_update': 0.0038934079083529386, 'loss': 0.0036145379893820395, 'time_step': 0.004129504615610296, 'init_value': 0.27900880575180054, 'ave_value': 0.36258777189362157, 'soft_opc': nan} step=440




2022-04-17 14:52.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145216/model_440.pt


Epoch 6/10:   0%|          | 0/88 [00:00<?, ?it/s]



2022-04-17 14:52.18 [info     ] FQE_20220417145216: epoch=6 step=528 epoch=6 metrics={'time_sample_batch': 0.0001695264469493519, 'time_algorithm_update': 0.004409429701891812, 'loss': 0.0033288730879906902, 'time_step': 0.004672248255122791, 'init_value': 0.2763334810733795, 'ave_value': 0.3555312220577721, 'soft_opc': nan} step=528




2022-04-17 14:52.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145216/model_528.pt


Epoch 7/10:   0%|          | 0/88 [00:00<?, ?it/s]



2022-04-17 14:52.19 [info     ] FQE_20220417145216: epoch=7 step=616 epoch=7 metrics={'time_sample_batch': 0.0001604042269966819, 'time_algorithm_update': 0.004034795544364236, 'loss': 0.0028996636260258542, 'time_step': 0.004266394810243087, 'init_value': 0.23249033093452454, 'ave_value': 0.31356186724460877, 'soft_opc': nan} step=616




2022-04-17 14:52.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145216/model_616.pt


Epoch 8/10:   0%|          | 0/88 [00:00<?, ?it/s]



2022-04-17 14:52.19 [info     ] FQE_20220417145216: epoch=8 step=704 epoch=8 metrics={'time_sample_batch': 0.00016795234246687457, 'time_algorithm_update': 0.004075410691174594, 'loss': 0.0027310297591611743, 'time_step': 0.004323921420357444, 'init_value': 0.21357320249080658, 'ave_value': 0.2892060421071611, 'soft_opc': nan} step=704




2022-04-17 14:52.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145216/model_704.pt


Epoch 9/10:   0%|          | 0/88 [00:00<?, ?it/s]



2022-04-17 14:52.20 [info     ] FQE_20220417145216: epoch=9 step=792 epoch=9 metrics={'time_sample_batch': 0.00016919049349698153, 'time_algorithm_update': 0.004395086656917225, 'loss': 0.0025040969842600384, 'time_step': 0.004624057899821888, 'init_value': 0.2439402937889099, 'ave_value': 0.309971133881324, 'soft_opc': nan} step=792




2022-04-17 14:52.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145216/model_792.pt


Epoch 10/10:   0%|          | 0/88 [00:00<?, ?it/s]



2022-04-17 14:52.20 [info     ] FQE_20220417145216: epoch=10 step=880 epoch=10 metrics={'time_sample_batch': 0.00015152855352921918, 'time_algorithm_update': 0.003889568827368996, 'loss': 0.002373378771484237, 'time_step': 0.00411030650138855, 'init_value': 0.21716023981571198, 'ave_value': 0.2809392527484142, 'soft_opc': nan} step=880




2022-04-17 14:52.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145216/model_880.pt
search iteration:  2
using hyper params:  [0.005394939104790958, 0.005962372476408486, 3.6865716783390766e-05, 5]
2022-04-17 14:52.20 [debug    ] RoundIterator is selected.
2022-04-17 14:52.20 [info     ] Directory is created at d3rlpy_logs/CQL_20220417145220
2022-04-17 14:52.20 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-17 14:52.20 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-17 14:52.20 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220417145220/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': True, 'dropout_rate': 0.2, 'use_dense': False}}, 'actor_learning_rate': 0.005394939104790958, 'actor_optim_factory': {'optim_c

Epoch 1/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:52.25 [info     ] CQL_20220417145220: epoch=1 step=169 epoch=1 metrics={'time_sample_batch': 0.00033695316879001595, 'time_algorithm_update': 0.030033053731071877, 'temp_loss': 4.8440834671787965, 'temp': 0.9967589967349577, 'alpha_loss': -20.212879937076004, 'alpha': 1.0089508929901574, 'critic_loss': 86.0944727384127, 'actor_loss': 0.6593671667681643, 'time_step': 0.030444966265435754, 'td_error': 1.138123812001164, 'init_value': -5.8615217208862305, 'ave_value': -5.005746449683164} step=169
2022-04-17 14:52.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145220/model_169.pt


Epoch 2/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:52.31 [info     ] CQL_20220417145220: epoch=2 step=338 epoch=2 metrics={'time_sample_batch': 0.00034094985420181904, 'time_algorithm_update': 0.030574976339848084, 'temp_loss': 4.992268579246025, 'temp': 0.9904014506988977, 'alpha_loss': -18.235702988664073, 'alpha': 1.0254166718770767, 'critic_loss': 73.93472768569134, 'actor_loss': 2.7580999601522143, 'time_step': 0.030995854259242672, 'td_error': 1.417544501438342, 'init_value': -7.3216705322265625, 'ave_value': -6.16500228653083} step=338
2022-04-17 14:52.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145220/model_338.pt


Epoch 3/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:52.36 [info     ] CQL_20220417145220: epoch=3 step=507 epoch=3 metrics={'time_sample_batch': 0.0003491153378458418, 'time_algorithm_update': 0.030628167665921725, 'temp_loss': 4.963630966886261, 'temp': 0.9841855193030905, 'alpha_loss': -18.283104123448478, 'alpha': 1.0419955310031508, 'critic_loss': 105.86324450250208, 'actor_loss': 3.93120924819856, 'time_step': 0.03105372931124896, 'td_error': 1.2902224200965744, 'init_value': -9.61863899230957, 'ave_value': -8.170615194498955} step=507
2022-04-17 14:52.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145220/model_507.pt


Epoch 4/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:52.42 [info     ] CQL_20220417145220: epoch=4 step=676 epoch=4 metrics={'time_sample_batch': 0.0003531261308658758, 'time_algorithm_update': 0.03096814691667726, 'temp_loss': 4.935084413494584, 'temp': 0.9780623651820527, 'alpha_loss': -18.65919739536985, 'alpha': 1.0593227606553297, 'critic_loss': 151.8247724905522, 'actor_loss': 4.963487997562928, 'time_step': 0.03139742732753415, 'td_error': 1.188560491862169, 'init_value': -9.737343788146973, 'ave_value': -8.197885774964686} step=676
2022-04-17 14:52.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145220/model_676.pt


Epoch 5/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:52.47 [info     ] CQL_20220417145220: epoch=5 step=845 epoch=5 metrics={'time_sample_batch': 0.00034715014801928277, 'time_algorithm_update': 0.03048289174864278, 'temp_loss': 4.904856038516795, 'temp': 0.97201139581274, 'alpha_loss': -18.950200684677213, 'alpha': 1.0773767247002506, 'critic_loss': 213.7057376839, 'actor_loss': 5.613106377731413, 'time_step': 0.03090500831604004, 'td_error': 1.2099364164856332, 'init_value': -10.564616203308105, 'ave_value': -9.043492204111976} step=845
2022-04-17 14:52.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145220/model_845.pt


Epoch 6/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:52.52 [info     ] CQL_20220417145220: epoch=6 step=1014 epoch=6 metrics={'time_sample_batch': 0.00035259709555721845, 'time_algorithm_update': 0.030227316907171668, 'temp_loss': 4.873612812990268, 'temp': 0.966022311230383, 'alpha_loss': -19.271166084786138, 'alpha': 1.0959980106918064, 'critic_loss': 291.2094783444376, 'actor_loss': 5.791837243638801, 'time_step': 0.030653715133666992, 'td_error': 1.2207716698536584, 'init_value': -10.286447525024414, 'ave_value': -8.804127677174302} step=1014
2022-04-17 14:52.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145220/model_1014.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.00000000

Epoch 1/10:   0%|          | 0/77 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-17 14:52.53 [info     ] FQE_20220417145253: epoch=1 step=77 epoch=1 metrics={'time_sample_batch': 0.00017462457929338728, 'time_algorithm_update': 0.004626794294877486, 'loss': 0.011362369420224583, 'time_step': 0.004886515728839032, 'init_value': -0.27760809659957886, 'ave_value': -0.26455577135857966, 'soft_opc': nan} step=77




2022-04-17 14:52.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145253/model_77.pt


Epoch 2/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:52.53 [info     ] FQE_20220417145253: epoch=2 step=154 epoch=2 metrics={'time_sample_batch': 0.00014972067498541497, 'time_algorithm_update': 0.004034451075962612, 'loss': 0.008289820865377204, 'time_step': 0.004252882746907024, 'init_value': -0.4307834506034851, 'ave_value': -0.37753344280561346, 'soft_opc': nan} step=154




2022-04-17 14:52.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145253/model_154.pt


Epoch 3/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:52.54 [info     ] FQE_20220417145253: epoch=3 step=231 epoch=3 metrics={'time_sample_batch': 0.0001738690710687018, 'time_algorithm_update': 0.004269962186937208, 'loss': 0.006394967067889966, 'time_step': 0.004534613002430309, 'init_value': -0.533098042011261, 'ave_value': -0.45242719990243246, 'soft_opc': nan} step=231




2022-04-17 14:52.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145253/model_231.pt


Epoch 4/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:52.54 [info     ] FQE_20220417145253: epoch=4 step=308 epoch=4 metrics={'time_sample_batch': 0.00016770115146389256, 'time_algorithm_update': 0.00406261852809361, 'loss': 0.005093670885574508, 'time_step': 0.0043181134508801745, 'init_value': -0.6542649269104004, 'ave_value': -0.5778944719616357, 'soft_opc': nan} step=308




2022-04-17 14:52.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145253/model_308.pt


Epoch 5/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:52.54 [info     ] FQE_20220417145253: epoch=5 step=385 epoch=5 metrics={'time_sample_batch': 0.00015889204941786728, 'time_algorithm_update': 0.0038293491710316052, 'loss': 0.004679883370635572, 'time_step': 0.004057017239657315, 'init_value': -0.6064252853393555, 'ave_value': -0.5069683378696576, 'soft_opc': nan} step=385




2022-04-17 14:52.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145253/model_385.pt


Epoch 6/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:52.55 [info     ] FQE_20220417145253: epoch=6 step=462 epoch=6 metrics={'time_sample_batch': 0.00015908711916440494, 'time_algorithm_update': 0.004177288575605912, 'loss': 0.004576745842184339, 'time_step': 0.004435353464894481, 'init_value': -0.6673609018325806, 'ave_value': -0.5452979525937154, 'soft_opc': nan} step=462




2022-04-17 14:52.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145253/model_462.pt


Epoch 7/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:52.55 [info     ] FQE_20220417145253: epoch=7 step=539 epoch=7 metrics={'time_sample_batch': 0.00015188192392324472, 'time_algorithm_update': 0.0038922421343914873, 'loss': 0.00420895145673837, 'time_step': 0.004119126827685864, 'init_value': -0.7152045369148254, 'ave_value': -0.5754881096382936, 'soft_opc': nan} step=539




2022-04-17 14:52.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145253/model_539.pt


Epoch 8/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:52.56 [info     ] FQE_20220417145253: epoch=8 step=616 epoch=8 metrics={'time_sample_batch': 0.00016691777613255884, 'time_algorithm_update': 0.004154490186022474, 'loss': 0.004009920777148241, 'time_step': 0.004407984869820731, 'init_value': -0.7759084701538086, 'ave_value': -0.6161953614554829, 'soft_opc': nan} step=616




2022-04-17 14:52.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145253/model_616.pt


Epoch 9/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:52.56 [info     ] FQE_20220417145253: epoch=9 step=693 epoch=9 metrics={'time_sample_batch': 0.000163490122014826, 'time_algorithm_update': 0.004196863669853706, 'loss': 0.0037357652463115656, 'time_step': 0.00443804728520381, 'init_value': -0.7616286277770996, 'ave_value': -0.6025422706730194, 'soft_opc': nan} step=693




2022-04-17 14:52.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145253/model_693.pt


Epoch 10/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:52.56 [info     ] FQE_20220417145253: epoch=10 step=770 epoch=10 metrics={'time_sample_batch': 0.00017119073248528816, 'time_algorithm_update': 0.004464050392051796, 'loss': 0.003877050346126417, 'time_step': 0.004709794923856661, 'init_value': -0.788424551486969, 'ave_value': -0.6219860120198211, 'soft_opc': nan} step=770




2022-04-17 14:52.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145253/model_770.pt
start
[ 0.00000000e+00  7.95731469e+08 -2.55189108e-01  2.54000047e-02
  2.57999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  3.30457556e-01 -3.26640595e-01  6.00000000e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.22789108e-01 -1.71999953e-02
  2.39998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.18067885e-01 -4.36092621e-01  6.00000000e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.89891077e-02  1.38000047e-02
  1.55999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.13215135e-01  3.00794900e-01]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.39310892e-01 -5.61999953e-02
 -4.10001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.81283783e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.95731469

Epoch 1/10:   0%|          | 0/77 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-17 14:52.57 [info     ] FQE_20220417145256: epoch=1 step=77 epoch=1 metrics={'time_sample_batch': 0.00015780832860376928, 'time_algorithm_update': 0.003937414714268276, 'loss': 0.011514762924476103, 'time_step': 0.004170293931837206, 'init_value': 0.058163903653621674, 'ave_value': 0.10476166860007488, 'soft_opc': nan} step=77




2022-04-17 14:52.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145256/model_77.pt


Epoch 2/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:52.57 [info     ] FQE_20220417145256: epoch=2 step=154 epoch=2 metrics={'time_sample_batch': 0.00016288014201374797, 'time_algorithm_update': 0.004035104404796253, 'loss': 0.007692883861577743, 'time_step': 0.0042795360862434685, 'init_value': -0.013794975355267525, 'ave_value': 0.06148148483789719, 'soft_opc': nan} step=154




2022-04-17 14:52.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145256/model_154.pt


Epoch 3/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:52.58 [info     ] FQE_20220417145256: epoch=3 step=231 epoch=3 metrics={'time_sample_batch': 0.0001586938833261465, 'time_algorithm_update': 0.0038918458022080458, 'loss': 0.005535912137575351, 'time_step': 0.004123929259064909, 'init_value': -0.054747577756643295, 'ave_value': 0.038460989818312565, 'soft_opc': nan} step=231




2022-04-17 14:52.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145256/model_231.pt


Epoch 4/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:52.58 [info     ] FQE_20220417145256: epoch=4 step=308 epoch=4 metrics={'time_sample_batch': 0.0001668558492288961, 'time_algorithm_update': 0.004226796038739093, 'loss': 0.0041508968562710205, 'time_step': 0.004479928450150924, 'init_value': -0.06836238503456116, 'ave_value': 0.02943454844591854, 'soft_opc': nan} step=308




2022-04-17 14:52.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145256/model_308.pt


Epoch 5/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:52.58 [info     ] FQE_20220417145256: epoch=5 step=385 epoch=5 metrics={'time_sample_batch': 0.00016648738415210278, 'time_algorithm_update': 0.0041444858947357575, 'loss': 0.0035579130522205263, 'time_step': 0.004384446453738523, 'init_value': -0.05802325904369354, 'ave_value': 0.03351916517928109, 'soft_opc': nan} step=385




2022-04-17 14:52.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145256/model_385.pt


Epoch 6/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:52.59 [info     ] FQE_20220417145256: epoch=6 step=462 epoch=6 metrics={'time_sample_batch': 0.0001587031723616959, 'time_algorithm_update': 0.004125016076224191, 'loss': 0.0032564598035865595, 'time_step': 0.00435443977256874, 'init_value': -0.057458776980638504, 'ave_value': 0.027482298427672535, 'soft_opc': nan} step=462




2022-04-17 14:52.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145256/model_462.pt


Epoch 7/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:52.59 [info     ] FQE_20220417145256: epoch=7 step=539 epoch=7 metrics={'time_sample_batch': 0.00016286775663301544, 'time_algorithm_update': 0.004299838821609299, 'loss': 0.0028360501693731003, 'time_step': 0.004542852376962637, 'init_value': -0.05841498821973801, 'ave_value': 0.029700845199729407, 'soft_opc': nan} step=539




2022-04-17 14:52.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145256/model_539.pt


Epoch 8/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:53.00 [info     ] FQE_20220417145256: epoch=8 step=616 epoch=8 metrics={'time_sample_batch': 0.000163874068817535, 'time_algorithm_update': 0.004184348242623466, 'loss': 0.002518468899289509, 'time_step': 0.004421881267002651, 'init_value': -0.05852570757269859, 'ave_value': 0.015364673683369481, 'soft_opc': nan} step=616




2022-04-17 14:53.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145256/model_616.pt


Epoch 9/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:53.00 [info     ] FQE_20220417145256: epoch=9 step=693 epoch=9 metrics={'time_sample_batch': 0.00015477081397911172, 'time_algorithm_update': 0.003873930348978414, 'loss': 0.002229111648600113, 'time_step': 0.004095706072720614, 'init_value': -0.059325702488422394, 'ave_value': 0.009827486083314225, 'soft_opc': nan} step=693




2022-04-17 14:53.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145256/model_693.pt


Epoch 10/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:53.00 [info     ] FQE_20220417145256: epoch=10 step=770 epoch=10 metrics={'time_sample_batch': 0.0001680881946117847, 'time_algorithm_update': 0.004432969279103465, 'loss': 0.0022181074018590152, 'time_step': 0.004684008561171495, 'init_value': -0.06620112806558609, 'ave_value': -0.006529154025253143, 'soft_opc': nan} step=770




2022-04-17 14:53.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145256/model_770.pt
search iteration:  3
using hyper params:  [0.007027836354118029, 0.008555952580378579, 3.9116615445711005e-05, 3]
2022-04-17 14:53.00 [debug    ] RoundIterator is selected.
2022-04-17 14:53.00 [info     ] Directory is created at d3rlpy_logs/CQL_20220417145300
2022-04-17 14:53.00 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-17 14:53.00 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-17 14:53.00 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220417145300/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': True, 'dropout_rate': 0.2, 'use_dense': False}}, 'actor_learning_rate': 0.007027836354118029, 'actor_optim_factory': {'optim_c

Epoch 1/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:53.06 [info     ] CQL_20220417145300: epoch=1 step=169 epoch=1 metrics={'time_sample_batch': 0.0003212076672435512, 'time_algorithm_update': 0.030332120917957915, 'temp_loss': 4.918619205260418, 'temp': 0.9965800720559069, 'alpha_loss': -18.96918927706205, 'alpha': 1.0086819201531494, 'critic_loss': 48.37380864070012, 'actor_loss': -0.1041181427178651, 'time_step': 0.030727890116223216, 'td_error': 0.9855777723996442, 'init_value': -3.4286160469055176, 'ave_value': -2.937846561788439} step=169
2022-04-17 14:53.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145300/model_169.pt


Epoch 2/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:53.11 [info     ] CQL_20220417145300: epoch=2 step=338 epoch=2 metrics={'time_sample_batch': 0.0003175594397550504, 'time_algorithm_update': 0.03135597635303023, 'temp_loss': 4.990003532206519, 'temp': 0.989902682205629, 'alpha_loss': -18.116621277036046, 'alpha': 1.0252977256944193, 'critic_loss': 54.02530758197491, 'actor_loss': 0.8045903232455607, 'time_step': 0.031749100374752245, 'td_error': 0.8165522626881645, 'init_value': -4.2464799880981445, 'ave_value': -3.6068463394300894} step=338
2022-04-17 14:53.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145300/model_338.pt


Epoch 3/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:53.17 [info     ] CQL_20220417145300: epoch=3 step=507 epoch=3 metrics={'time_sample_batch': 0.0003282995619012054, 'time_algorithm_update': 0.030083375569631362, 'temp_loss': 4.9588820976618475, 'temp': 0.9833593118120227, 'alpha_loss': -18.36988157915646, 'alpha': 1.042465366555389, 'critic_loss': 76.71705264029418, 'actor_loss': 1.3164934032767481, 'time_step': 0.03048993567743245, 'td_error': 0.9979308612732931, 'init_value': -4.906195640563965, 'ave_value': -4.087823746098323} step=507
2022-04-17 14:53.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145300/model_507.pt


Epoch 4/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:53.22 [info     ] CQL_20220417145300: epoch=4 step=676 epoch=4 metrics={'time_sample_batch': 0.0003092966136142347, 'time_algorithm_update': 0.03032873650274333, 'temp_loss': 4.928683255551129, 'temp': 0.9768987456722372, 'alpha_loss': -18.649673394197542, 'alpha': 1.0602488179178633, 'critic_loss': 112.45295417379346, 'actor_loss': 1.563905407691143, 'time_step': 0.03071766633253831, 'td_error': 0.9728627675910274, 'init_value': -5.130832195281982, 'ave_value': -4.224039000436529} step=676
2022-04-17 14:53.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145300/model_676.pt


Epoch 5/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:53.27 [info     ] CQL_20220417145300: epoch=5 step=845 epoch=5 metrics={'time_sample_batch': 0.00031812797636675413, 'time_algorithm_update': 0.030447477419700848, 'temp_loss': 4.899106378386007, 'temp': 0.9705069837485545, 'alpha_loss': -18.880674046172192, 'alpha': 1.0785358474099425, 'critic_loss': 164.12820651263175, 'actor_loss': 1.4991092064676905, 'time_step': 0.030844282116410295, 'td_error': 0.9120494870539169, 'init_value': -4.7114434242248535, 'ave_value': -3.9383518288425496} step=845
2022-04-17 14:53.27 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145300/model_845.pt


Epoch 6/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:53.33 [info     ] CQL_20220417145300: epoch=6 step=1014 epoch=6 metrics={'time_sample_batch': 0.0003128601954533504, 'time_algorithm_update': 0.030528869854627983, 'temp_loss': 4.867094195100683, 'temp': 0.9641792619016749, 'alpha_loss': -19.213308526214057, 'alpha': 1.0973231594943436, 'critic_loss': 232.2379570233046, 'actor_loss': 1.072372353641239, 'time_step': 0.03091878298471665, 'td_error': 0.9984382450389475, 'init_value': -3.67872953414917, 'ave_value': -3.141775959209279} step=1014
2022-04-17 14:53.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145300/model_1014.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.00000000e-0

Epoch 1/10:   0%|          | 0/77 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-17 14:53.33 [info     ] FQE_20220417145333: epoch=1 step=77 epoch=1 metrics={'time_sample_batch': 0.00014386858258928572, 'time_algorithm_update': 0.003610087679578112, 'loss': 0.012893218528963142, 'time_step': 0.003824447656606699, 'init_value': -0.5393759608268738, 'ave_value': -0.4954762118490967, 'soft_opc': nan} step=77




2022-04-17 14:53.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145333/model_77.pt


Epoch 2/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:53.34 [info     ] FQE_20220417145333: epoch=2 step=154 epoch=2 metrics={'time_sample_batch': 0.00016834828760716822, 'time_algorithm_update': 0.004054273877825055, 'loss': 0.008636470678468029, 'time_step': 0.004317779045600396, 'init_value': -0.6543323397636414, 'ave_value': -0.5746992039250898, 'soft_opc': nan} step=154




2022-04-17 14:53.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145333/model_154.pt


Epoch 3/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:53.34 [info     ] FQE_20220417145333: epoch=3 step=231 epoch=3 metrics={'time_sample_batch': 0.0001523216049392502, 'time_algorithm_update': 0.003983677207649528, 'loss': 0.006258805996024764, 'time_step': 0.00420553343636649, 'init_value': -0.7359147667884827, 'ave_value': -0.6148766619665129, 'soft_opc': nan} step=231




2022-04-17 14:53.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145333/model_231.pt


Epoch 4/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:53.34 [info     ] FQE_20220417145333: epoch=4 step=308 epoch=4 metrics={'time_sample_batch': 0.00016139389632584213, 'time_algorithm_update': 0.004005150361494584, 'loss': 0.004931264992367911, 'time_step': 0.004239469379573674, 'init_value': -0.76890629529953, 'ave_value': -0.6428677789262823, 'soft_opc': nan} step=308




2022-04-17 14:53.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145333/model_308.pt


Epoch 5/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:53.35 [info     ] FQE_20220417145333: epoch=5 step=385 epoch=5 metrics={'time_sample_batch': 0.00017561231340680803, 'time_algorithm_update': 0.004120191970428863, 'loss': 0.004635683285048256, 'time_step': 0.004366292581929789, 'init_value': -0.756876528263092, 'ave_value': -0.6076750671809859, 'soft_opc': nan} step=385




2022-04-17 14:53.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145333/model_385.pt


Epoch 6/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:53.35 [info     ] FQE_20220417145333: epoch=6 step=462 epoch=6 metrics={'time_sample_batch': 0.0001660972446590275, 'time_algorithm_update': 0.004116643558848988, 'loss': 0.004609208953167711, 'time_step': 0.0043684321564513365, 'init_value': -0.8190247416496277, 'ave_value': -0.6362665334397608, 'soft_opc': nan} step=462




2022-04-17 14:53.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145333/model_462.pt


Epoch 7/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:53.36 [info     ] FQE_20220417145333: epoch=7 step=539 epoch=7 metrics={'time_sample_batch': 0.00015125336585106788, 'time_algorithm_update': 0.0037207108039360543, 'loss': 0.0042446936305322045, 'time_step': 0.003941857969606078, 'init_value': -0.8981397151947021, 'ave_value': -0.6763855908905063, 'soft_opc': nan} step=539




2022-04-17 14:53.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145333/model_539.pt


Epoch 8/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:53.36 [info     ] FQE_20220417145333: epoch=8 step=616 epoch=8 metrics={'time_sample_batch': 0.00016903257989264153, 'time_algorithm_update': 0.004108896503200779, 'loss': 0.004206573747872532, 'time_step': 0.004353705938760336, 'init_value': -0.9458370208740234, 'ave_value': -0.6953956704687427, 'soft_opc': nan} step=616




2022-04-17 14:53.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145333/model_616.pt


Epoch 9/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:53.36 [info     ] FQE_20220417145333: epoch=9 step=693 epoch=9 metrics={'time_sample_batch': 0.00016769186242834315, 'time_algorithm_update': 0.004314552653919567, 'loss': 0.004082423371758747, 'time_step': 0.004576169051133193, 'init_value': -0.9381764531135559, 'ave_value': -0.6806160912454665, 'soft_opc': nan} step=693




2022-04-17 14:53.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145333/model_693.pt


Epoch 10/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:53.37 [info     ] FQE_20220417145333: epoch=10 step=770 epoch=10 metrics={'time_sample_batch': 0.00015770305286754262, 'time_algorithm_update': 0.003930884522277039, 'loss': 0.004167664608639943, 'time_step': 0.004163423141875825, 'init_value': -1.0539460182189941, 'ave_value': -0.7499303837482993, 'soft_opc': nan} step=770




2022-04-17 14:53.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145333/model_770.pt
start
[ 0.00000000e+00  7.95731469e+08 -2.55189108e-01  2.54000047e-02
  2.57999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  3.30457556e-01 -3.26640595e-01  6.00000000e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.22789108e-01 -1.71999953e-02
  2.39998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.18067885e-01 -4.36092621e-01  6.00000000e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.89891077e-02  1.38000047e-02
  1.55999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.13215135e-01  3.00794900e-01]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.39310892e-01 -5.61999953e-02
 -4.10001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.81283783e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.95731469

Epoch 1/10:   0%|          | 0/88 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-17 14:53.37 [info     ] FQE_20220417145337: epoch=1 step=88 epoch=1 metrics={'time_sample_batch': 0.0001662400635805997, 'time_algorithm_update': 0.004134188998829235, 'loss': 0.009715322595597669, 'time_step': 0.004369632764296098, 'init_value': -0.4744076430797577, 'ave_value': -0.39119754496726905, 'soft_opc': nan} step=88




2022-04-17 14:53.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145337/model_88.pt


Epoch 2/10:   0%|          | 0/88 [00:00<?, ?it/s]



2022-04-17 14:53.38 [info     ] FQE_20220417145337: epoch=2 step=176 epoch=2 metrics={'time_sample_batch': 0.00016085938973860308, 'time_algorithm_update': 0.0038330582055178556, 'loss': 0.006077321534658867, 'time_step': 0.004076104272495617, 'init_value': -0.5335708856582642, 'ave_value': -0.4249556193883355, 'soft_opc': nan} step=176




2022-04-17 14:53.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145337/model_176.pt


Epoch 3/10:   0%|          | 0/88 [00:00<?, ?it/s]



2022-04-17 14:53.38 [info     ] FQE_20220417145337: epoch=3 step=264 epoch=3 metrics={'time_sample_batch': 0.00015669248320839623, 'time_algorithm_update': 0.004083636132153598, 'loss': 0.004395146650495008, 'time_step': 0.004317196932705966, 'init_value': -0.5744309425354004, 'ave_value': -0.45758156134738576, 'soft_opc': nan} step=264




2022-04-17 14:53.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145337/model_264.pt


Epoch 4/10:   0%|          | 0/88 [00:00<?, ?it/s]



2022-04-17 14:53.38 [info     ] FQE_20220417145337: epoch=4 step=352 epoch=4 metrics={'time_sample_batch': 0.00016411868008700284, 'time_algorithm_update': 0.004031075672669845, 'loss': 0.0035544497685887936, 'time_step': 0.004290477796034379, 'init_value': -0.5997427701950073, 'ave_value': -0.4729382339227307, 'soft_opc': nan} step=352




2022-04-17 14:53.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145337/model_352.pt


Epoch 5/10:   0%|          | 0/88 [00:00<?, ?it/s]



2022-04-17 14:53.39 [info     ] FQE_20220417145337: epoch=5 step=440 epoch=5 metrics={'time_sample_batch': 0.0001755844463001598, 'time_algorithm_update': 0.004247210242531516, 'loss': 0.0030993485124781728, 'time_step': 0.004506162621758201, 'init_value': -0.6084730625152588, 'ave_value': -0.48199822605998666, 'soft_opc': nan} step=440




2022-04-17 14:53.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145337/model_440.pt


Epoch 6/10:   0%|          | 0/88 [00:00<?, ?it/s]



2022-04-17 14:53.39 [info     ] FQE_20220417145337: epoch=6 step=528 epoch=6 metrics={'time_sample_batch': 0.000158182599327781, 'time_algorithm_update': 0.003910980441353538, 'loss': 0.002827104486641474, 'time_step': 0.004138532009991733, 'init_value': -0.6067150831222534, 'ave_value': -0.4795580039153228, 'soft_opc': nan} step=528




2022-04-17 14:53.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145337/model_528.pt


Epoch 7/10:   0%|          | 0/88 [00:00<?, ?it/s]



2022-04-17 14:53.40 [info     ] FQE_20220417145337: epoch=7 step=616 epoch=7 metrics={'time_sample_batch': 0.00016415119171142578, 'time_algorithm_update': 0.004182503982023759, 'loss': 0.0025994873404587534, 'time_step': 0.004438411105762829, 'init_value': -0.6325485110282898, 'ave_value': -0.4977187200038283, 'soft_opc': nan} step=616




2022-04-17 14:53.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145337/model_616.pt


Epoch 8/10:   0%|          | 0/88 [00:00<?, ?it/s]



2022-04-17 14:53.40 [info     ] FQE_20220417145337: epoch=8 step=704 epoch=8 metrics={'time_sample_batch': 0.00017384507439353249, 'time_algorithm_update': 0.0042341568253257056, 'loss': 0.0023448625090002843, 'time_step': 0.004505886272950606, 'init_value': -0.6759483814239502, 'ave_value': -0.5368299855439512, 'soft_opc': nan} step=704




2022-04-17 14:53.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145337/model_704.pt


Epoch 9/10:   0%|          | 0/88 [00:00<?, ?it/s]



2022-04-17 14:53.41 [info     ] FQE_20220417145337: epoch=9 step=792 epoch=9 metrics={'time_sample_batch': 0.00018421628258445045, 'time_algorithm_update': 0.004358348521319303, 'loss': 0.0023143115659794685, 'time_step': 0.004659016023982655, 'init_value': -0.6455778479576111, 'ave_value': -0.5024729495649939, 'soft_opc': nan} step=792




2022-04-17 14:53.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145337/model_792.pt


Epoch 10/10:   0%|          | 0/88 [00:00<?, ?it/s]



2022-04-17 14:53.41 [info     ] FQE_20220417145337: epoch=10 step=880 epoch=10 metrics={'time_sample_batch': 0.00016625090078874067, 'time_algorithm_update': 0.004264958880164407, 'loss': 0.002181664775972339, 'time_step': 0.004533708095550537, 'init_value': -0.6739041805267334, 'ave_value': -0.5212965902981458, 'soft_opc': nan} step=880




2022-04-17 14:53.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145337/model_880.pt
search iteration:  4
using hyper params:  [0.007182181387981417, 0.004557980807771779, 6.895134917633803e-05, 5]
2022-04-17 14:53.41 [debug    ] RoundIterator is selected.
2022-04-17 14:53.41 [info     ] Directory is created at d3rlpy_logs/CQL_20220417145341
2022-04-17 14:53.41 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-17 14:53.41 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-17 14:53.41 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220417145341/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': True, 'dropout_rate': 0.2, 'use_dense': False}}, 'actor_learning_rate': 0.007182181387981417, 'actor_optim_factory': {'optim_cl

Epoch 1/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:53.46 [info     ] CQL_20220417145341: epoch=1 step=169 epoch=1 metrics={'time_sample_batch': 0.0003502763940032417, 'time_algorithm_update': 0.03046001202961397, 'temp_loss': 4.884651683491363, 'temp': 0.9939450060827493, 'alpha_loss': -19.745272066466203, 'alpha': 1.0089251007554094, 'critic_loss': 91.40494144597703, 'actor_loss': 0.22904148032150326, 'time_step': 0.030892984401544876, 'td_error': 1.8918977684675478, 'init_value': -6.009394645690918, 'ave_value': -5.034387720909205} step=169
2022-04-17 14:53.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145341/model_169.pt


Epoch 2/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:53.52 [info     ] CQL_20220417145341: epoch=2 step=338 epoch=2 metrics={'time_sample_batch': 0.00034746333692200793, 'time_algorithm_update': 0.03027702506477311, 'temp_loss': 4.927338140250663, 'temp': 0.9822346183675281, 'alpha_loss': -19.452532119299534, 'alpha': 1.0263289417740862, 'critic_loss': 73.66517332178601, 'actor_loss': 3.0538567417472073, 'time_step': 0.03071047568462304, 'td_error': 0.8384417715317011, 'init_value': -6.9583563804626465, 'ave_value': -5.865333044668576} step=338
2022-04-17 14:53.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145341/model_338.pt


Epoch 3/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:53.57 [info     ] CQL_20220417145341: epoch=3 step=507 epoch=3 metrics={'time_sample_batch': 0.0003334558927095853, 'time_algorithm_update': 0.03008370850918561, 'temp_loss': 4.882818766599576, 'temp': 0.9708729746073661, 'alpha_loss': -18.796107263960078, 'alpha': 1.0433543969893597, 'critic_loss': 101.84208814632258, 'actor_loss': 4.5656472479803325, 'time_step': 0.03049625306439823, 'td_error': 1.2159809903812515, 'init_value': -9.6256742477417, 'ave_value': -8.12295322263563} step=507
2022-04-17 14:53.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145341/model_507.pt


Epoch 4/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:54.03 [info     ] CQL_20220417145341: epoch=4 step=676 epoch=4 metrics={'time_sample_batch': 0.00034582826512805103, 'time_algorithm_update': 0.031053452802127634, 'temp_loss': 4.835888357557488, 'temp': 0.9597131038558554, 'alpha_loss': -19.190848390026204, 'alpha': 1.0607987331920827, 'critic_loss': 140.00441796116576, 'actor_loss': 5.915707765951664, 'time_step': 0.031480152931439104, 'td_error': 1.1154493688370783, 'init_value': -10.52656078338623, 'ave_value': -8.804182092370214} step=676
2022-04-17 14:54.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145341/model_676.pt


Epoch 5/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:54.08 [info     ] CQL_20220417145341: epoch=5 step=845 epoch=5 metrics={'time_sample_batch': 0.00034613863250913, 'time_algorithm_update': 0.03035185322959042, 'temp_loss': 4.784573140228994, 'temp': 0.9487475103880527, 'alpha_loss': -19.063259768062796, 'alpha': 1.0788092309906638, 'critic_loss': 191.09416289019163, 'actor_loss': 6.935782387411806, 'time_step': 0.030778196436413646, 'td_error': 1.2477559646626104, 'init_value': -12.088884353637695, 'ave_value': -10.197012996093648} step=845
2022-04-17 14:54.08 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145341/model_845.pt


Epoch 6/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:54.13 [info     ] CQL_20220417145341: epoch=6 step=1014 epoch=6 metrics={'time_sample_batch': 0.0003453584817739633, 'time_algorithm_update': 0.030264429792144595, 'temp_loss': 4.735029536591479, 'temp': 0.937963172881561, 'alpha_loss': -19.189642923118093, 'alpha': 1.0970515506507377, 'critic_loss': 255.40332410462509, 'actor_loss': 7.528410412150727, 'time_step': 0.03068696958778878, 'td_error': 1.2227656066950068, 'init_value': -12.362313270568848, 'ave_value': -10.495159814228883} step=1014
2022-04-17 14:54.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145341/model_1014.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.00000000

Epoch 1/10:   0%|          | 0/88 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-17 14:54.14 [info     ] FQE_20220417145413: epoch=1 step=88 epoch=1 metrics={'time_sample_batch': 0.00016139312223954633, 'time_algorithm_update': 0.003928669474341653, 'loss': 0.011063593646130439, 'time_step': 0.00416288050738248, 'init_value': -0.5772755742073059, 'ave_value': -0.5603886676264239, 'soft_opc': nan} step=88




2022-04-17 14:54.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145413/model_88.pt


Epoch 2/10:   0%|          | 0/88 [00:00<?, ?it/s]



2022-04-17 14:54.14 [info     ] FQE_20220417145413: epoch=2 step=176 epoch=2 metrics={'time_sample_batch': 0.00016644326123324308, 'time_algorithm_update': 0.004283352331681685, 'loss': 0.00797593380345709, 'time_step': 0.004526243968443437, 'init_value': -0.6970099210739136, 'ave_value': -0.638821048162005, 'soft_opc': nan} step=176




2022-04-17 14:54.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145413/model_176.pt


Epoch 3/10:   0%|          | 0/88 [00:00<?, ?it/s]



2022-04-17 14:54.15 [info     ] FQE_20220417145413: epoch=3 step=264 epoch=3 metrics={'time_sample_batch': 0.00016334923830899325, 'time_algorithm_update': 0.004307830875570124, 'loss': 0.006369644656925547, 'time_step': 0.00455601377920671, 'init_value': -0.7945083379745483, 'ave_value': -0.6960040196910635, 'soft_opc': nan} step=264




2022-04-17 14:54.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145413/model_264.pt


Epoch 4/10:   0%|          | 0/88 [00:00<?, ?it/s]



2022-04-17 14:54.15 [info     ] FQE_20220417145413: epoch=4 step=352 epoch=4 metrics={'time_sample_batch': 0.00018188899213617498, 'time_algorithm_update': 0.004432981664484198, 'loss': 0.005335792994380675, 'time_step': 0.0046940174969759855, 'init_value': -0.8810534477233887, 'ave_value': -0.7432000001003076, 'soft_opc': nan} step=352




2022-04-17 14:54.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145413/model_352.pt


Epoch 5/10:   0%|          | 0/88 [00:00<?, ?it/s]



2022-04-17 14:54.16 [info     ] FQE_20220417145413: epoch=5 step=440 epoch=5 metrics={'time_sample_batch': 0.00017446821386163884, 'time_algorithm_update': 0.00424957275390625, 'loss': 0.0047010754960038785, 'time_step': 0.004518993876197122, 'init_value': -0.96419358253479, 'ave_value': -0.791623713545971, 'soft_opc': nan} step=440




2022-04-17 14:54.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145413/model_440.pt


Epoch 6/10:   0%|          | 0/88 [00:00<?, ?it/s]



2022-04-17 14:54.16 [info     ] FQE_20220417145413: epoch=6 step=528 epoch=6 metrics={'time_sample_batch': 0.00015835599465803668, 'time_algorithm_update': 0.004069108854640614, 'loss': 0.00452151666236618, 'time_step': 0.004328107292001898, 'init_value': -1.0362889766693115, 'ave_value': -0.8378217707614641, 'soft_opc': nan} step=528




2022-04-17 14:54.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145413/model_528.pt


Epoch 7/10:   0%|          | 0/88 [00:00<?, ?it/s]



2022-04-17 14:54.16 [info     ] FQE_20220417145413: epoch=7 step=616 epoch=7 metrics={'time_sample_batch': 0.0001675161448392001, 'time_algorithm_update': 0.004224611954255538, 'loss': 0.004167235228868032, 'time_step': 0.00448436357758262, 'init_value': -1.0767444372177124, 'ave_value': -0.8626085538316417, 'soft_opc': nan} step=616




2022-04-17 14:54.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145413/model_616.pt


Epoch 8/10:   0%|          | 0/88 [00:00<?, ?it/s]



2022-04-17 14:54.17 [info     ] FQE_20220417145413: epoch=8 step=704 epoch=8 metrics={'time_sample_batch': 0.00016539205204356801, 'time_algorithm_update': 0.0041720975529063835, 'loss': 0.0039978863629089164, 'time_step': 0.004427015781402588, 'init_value': -1.0911545753479004, 'ave_value': -0.8684211996761528, 'soft_opc': nan} step=704




2022-04-17 14:54.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145413/model_704.pt


Epoch 9/10:   0%|          | 0/88 [00:00<?, ?it/s]



2022-04-17 14:54.17 [info     ] FQE_20220417145413: epoch=9 step=792 epoch=9 metrics={'time_sample_batch': 0.00017509948123585093, 'time_algorithm_update': 0.00424869494004683, 'loss': 0.0039878001473632385, 'time_step': 0.004509606144644998, 'init_value': -1.0948054790496826, 'ave_value': -0.8522495255008474, 'soft_opc': nan} step=792




2022-04-17 14:54.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145413/model_792.pt


Epoch 10/10:   0%|          | 0/88 [00:00<?, ?it/s]



2022-04-17 14:54.18 [info     ] FQE_20220417145413: epoch=10 step=880 epoch=10 metrics={'time_sample_batch': 0.000161301005970348, 'time_algorithm_update': 0.003824496811086481, 'loss': 0.004138039074446583, 'time_step': 0.004057765007019043, 'init_value': -1.1420700550079346, 'ave_value': -0.8607309345860739, 'soft_opc': nan} step=880




2022-04-17 14:54.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145413/model_880.pt
start
[ 0.00000000e+00  7.95731469e+08 -2.55189108e-01  2.54000047e-02
  2.57999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  3.30457556e-01 -3.26640595e-01  6.00000000e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.22789108e-01 -1.71999953e-02
  2.39998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.18067885e-01 -4.36092621e-01  6.00000000e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.89891077e-02  1.38000047e-02
  1.55999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.13215135e-01  3.00794900e-01]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.39310892e-01 -5.61999953e-02
 -4.10001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.81283783e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.95731469

Epoch 1/10:   0%|          | 0/77 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-17 14:54.18 [info     ] FQE_20220417145418: epoch=1 step=77 epoch=1 metrics={'time_sample_batch': 0.00017538937655362216, 'time_algorithm_update': 0.0044817986426415385, 'loss': 0.010477723662632627, 'time_step': 0.0047321814995307425, 'init_value': -0.08989398926496506, 'ave_value': -0.07391464091031938, 'soft_opc': nan} step=77




2022-04-17 14:54.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145418/model_77.pt


Epoch 2/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:54.19 [info     ] FQE_20220417145418: epoch=2 step=154 epoch=2 metrics={'time_sample_batch': 0.00017044141695096894, 'time_algorithm_update': 0.004122891983428559, 'loss': 0.006470685818704304, 'time_step': 0.004387722386942281, 'init_value': -0.13044501841068268, 'ave_value': -0.08680634622625705, 'soft_opc': nan} step=154




2022-04-17 14:54.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145418/model_154.pt


Epoch 3/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:54.19 [info     ] FQE_20220417145418: epoch=3 step=231 epoch=3 metrics={'time_sample_batch': 0.00015192836910099178, 'time_algorithm_update': 0.0038328418483981838, 'loss': 0.004540790552757196, 'time_step': 0.0040467900115174135, 'init_value': -0.1311371624469757, 'ave_value': -0.07798107502319188, 'soft_opc': nan} step=231




2022-04-17 14:54.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145418/model_231.pt


Epoch 4/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:54.19 [info     ] FQE_20220417145418: epoch=4 step=308 epoch=4 metrics={'time_sample_batch': 0.00017668984153053978, 'time_algorithm_update': 0.004429040017066064, 'loss': 0.003698791402948464, 'time_step': 0.004722666430782962, 'init_value': -0.1547161191701889, 'ave_value': -0.0970802635910946, 'soft_opc': nan} step=308




2022-04-17 14:54.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145418/model_308.pt


Epoch 5/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:54.20 [info     ] FQE_20220417145418: epoch=5 step=385 epoch=5 metrics={'time_sample_batch': 0.00015561921255929128, 'time_algorithm_update': 0.0041459566586977475, 'loss': 0.003428038261080911, 'time_step': 0.004374782760421951, 'init_value': -0.15080469846725464, 'ave_value': -0.09457502440289334, 'soft_opc': nan} step=385




2022-04-17 14:54.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145418/model_385.pt


Epoch 6/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:54.20 [info     ] FQE_20220417145418: epoch=6 step=462 epoch=6 metrics={'time_sample_batch': 0.00018073366833971694, 'time_algorithm_update': 0.004453290592540394, 'loss': 0.0030770358318075925, 'time_step': 0.004714337262240323, 'init_value': -0.15610799193382263, 'ave_value': -0.09026418100069235, 'soft_opc': nan} step=462




2022-04-17 14:54.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145418/model_462.pt


Epoch 7/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:54.21 [info     ] FQE_20220417145418: epoch=7 step=539 epoch=7 metrics={'time_sample_batch': 0.00017473295137479706, 'time_algorithm_update': 0.004345451082502093, 'loss': 0.0028362631564959884, 'time_step': 0.004623617444719587, 'init_value': -0.16167567670345306, 'ave_value': -0.08811082792267003, 'soft_opc': nan} step=539




2022-04-17 14:54.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145418/model_539.pt


Epoch 8/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:54.21 [info     ] FQE_20220417145418: epoch=8 step=616 epoch=8 metrics={'time_sample_batch': 0.00017352228040819044, 'time_algorithm_update': 0.004292119633067738, 'loss': 0.002603402164728417, 'time_step': 0.004558649930087003, 'init_value': -0.18237127363681793, 'ave_value': -0.10705253487822038, 'soft_opc': nan} step=616




2022-04-17 14:54.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145418/model_616.pt


Epoch 9/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:54.21 [info     ] FQE_20220417145418: epoch=9 step=693 epoch=9 metrics={'time_sample_batch': 0.00016655860009131494, 'time_algorithm_update': 0.004060816455197024, 'loss': 0.002376175972379067, 'time_step': 0.00431035091350605, 'init_value': -0.1941472291946411, 'ave_value': -0.11250642380123405, 'soft_opc': nan} step=693




2022-04-17 14:54.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145418/model_693.pt


Epoch 10/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:54.22 [info     ] FQE_20220417145418: epoch=10 step=770 epoch=10 metrics={'time_sample_batch': 0.00017392480528199828, 'time_algorithm_update': 0.004293190968501104, 'loss': 0.0024444993491611116, 'time_step': 0.004546639207121614, 'init_value': -0.21296115219593048, 'ave_value': -0.12249684767873102, 'soft_opc': nan} step=770




2022-04-17 14:54.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145418/model_770.pt
search iteration:  5
using hyper params:  [0.003666988190299397, 0.008943104026615807, 1.1815015619882159e-05, 3]
2022-04-17 14:54.22 [debug    ] RoundIterator is selected.
2022-04-17 14:54.22 [info     ] Directory is created at d3rlpy_logs/CQL_20220417145422
2022-04-17 14:54.22 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-17 14:54.22 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-17 14:54.22 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220417145422/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': True, 'dropout_rate': 0.2, 'use_dense': False}}, 'actor_learning_rate': 0.003666988190299397, 'actor_optim_factory': {'optim_c

Epoch 1/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:54.27 [info     ] CQL_20220417145422: epoch=1 step=169 epoch=1 metrics={'time_sample_batch': 0.00033252620132717155, 'time_algorithm_update': 0.03192512382417036, 'temp_loss': 4.830894681828967, 'temp': 0.9989460085976053, 'alpha_loss': -18.485825374986998, 'alpha': 1.0086894479729014, 'critic_loss': 49.4807208811743, 'actor_loss': -0.07487990115300791, 'time_step': 0.032342455090855704, 'td_error': 1.0292002890110559, 'init_value': -3.0206236839294434, 'ave_value': -2.5297164931705405} step=169
2022-04-17 14:54.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145422/model_169.pt


Epoch 2/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:54.33 [info     ] CQL_20220417145422: epoch=2 step=338 epoch=2 metrics={'time_sample_batch': 0.000319435751649755, 'time_algorithm_update': 0.0311851148774638, 'temp_loss': 5.02616246917544, 'temp': 0.9968792990114562, 'alpha_loss': -17.84821722493369, 'alpha': 1.0254846804240751, 'critic_loss': 53.56650601900541, 'actor_loss': 0.48165821850564355, 'time_step': 0.031584913208639834, 'td_error': 1.1153170047959304, 'init_value': -4.240959644317627, 'ave_value': -3.5570196583518037} step=338
2022-04-17 14:54.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145422/model_338.pt


Epoch 3/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:54.39 [info     ] CQL_20220417145422: epoch=3 step=507 epoch=3 metrics={'time_sample_batch': 0.00033020832129484096, 'time_algorithm_update': 0.03120068121238573, 'temp_loss': 5.0221759609922145, 'temp': 0.9948583245277405, 'alpha_loss': -18.167904272587343, 'alpha': 1.0427674457166323, 'critic_loss': 77.66639905850563, 'actor_loss': 0.9616172592315448, 'time_step': 0.03160621005402514, 'td_error': 1.0510007404071953, 'init_value': -4.371805667877197, 'ave_value': -3.626089461982787} step=507
2022-04-17 14:54.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145422/model_507.pt


Epoch 4/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:54.44 [info     ] CQL_20220417145422: epoch=4 step=676 epoch=4 metrics={'time_sample_batch': 0.00032464286984776607, 'time_algorithm_update': 0.03163278737717126, 'temp_loss': 5.009804099269167, 'temp': 0.9928573172473343, 'alpha_loss': -18.466721608088566, 'alpha': 1.0606802433905518, 'critic_loss': 114.67575330564962, 'actor_loss': 1.1536822809270149, 'time_step': 0.032035394533146064, 'td_error': 1.0443427844368132, 'init_value': -4.221829414367676, 'ave_value': -3.4932430591991355} step=676
2022-04-17 14:54.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145422/model_676.pt


Epoch 5/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:54.50 [info     ] CQL_20220417145422: epoch=5 step=845 epoch=5 metrics={'time_sample_batch': 0.0003304128816141885, 'time_algorithm_update': 0.03132274447108162, 'temp_loss': 5.001340042204546, 'temp': 0.9908694751869292, 'alpha_loss': -18.789294508081923, 'alpha': 1.0791051895660762, 'critic_loss': 168.09736588156434, 'actor_loss': 1.0902965171097299, 'time_step': 0.03173469227446607, 'td_error': 1.0227189645226737, 'init_value': -3.8297314643859863, 'ave_value': -3.2660148104891045} step=845
2022-04-17 14:54.50 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145422/model_845.pt


Epoch 6/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:54.55 [info     ] CQL_20220417145422: epoch=6 step=1014 epoch=6 metrics={'time_sample_batch': 0.0003349498884212336, 'time_algorithm_update': 0.031332333412396135, 'temp_loss': 4.992567897548337, 'temp': 0.988890492704493, 'alpha_loss': -19.110412834664068, 'alpha': 1.0980235091327915, 'critic_loss': 235.25742434891018, 'actor_loss': 0.7700851146991436, 'time_step': 0.03175043918677336, 'td_error': 1.0304126460124405, 'init_value': -3.117426633834839, 'ave_value': -2.7825590773638305} step=1014
2022-04-17 14:54.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145422/model_1014.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.00000000

Epoch 1/10:   0%|          | 0/77 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-17 14:54.56 [info     ] FQE_20220417145455: epoch=1 step=77 epoch=1 metrics={'time_sample_batch': 0.0001751107054871398, 'time_algorithm_update': 0.00422336838462136, 'loss': 0.01251372777820601, 'time_step': 0.00448103074903612, 'init_value': -0.3135446310043335, 'ave_value': -0.27032499582515107, 'soft_opc': nan} step=77




2022-04-17 14:54.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145455/model_77.pt


Epoch 2/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:54.56 [info     ] FQE_20220417145455: epoch=2 step=154 epoch=2 metrics={'time_sample_batch': 0.00016105329835569703, 'time_algorithm_update': 0.003958250021005606, 'loss': 0.009098907098077334, 'time_step': 0.004202833423366794, 'init_value': -0.43744564056396484, 'ave_value': -0.3617613283766283, 'soft_opc': nan} step=154




2022-04-17 14:54.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145455/model_154.pt


Epoch 3/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:54.56 [info     ] FQE_20220417145455: epoch=3 step=231 epoch=3 metrics={'time_sample_batch': 0.00016780023450975294, 'time_algorithm_update': 0.004054394635287198, 'loss': 0.006882210603806299, 'time_step': 0.004319259098597935, 'init_value': -0.5323688983917236, 'ave_value': -0.421129953787402, 'soft_opc': nan} step=231




2022-04-17 14:54.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145455/model_231.pt


Epoch 4/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:54.57 [info     ] FQE_20220417145455: epoch=4 step=308 epoch=4 metrics={'time_sample_batch': 0.00016386168343680247, 'time_algorithm_update': 0.004146637854638037, 'loss': 0.00514161034620234, 'time_step': 0.004400525774274554, 'init_value': -0.6214957237243652, 'ave_value': -0.508370795147913, 'soft_opc': nan} step=308




2022-04-17 14:54.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145455/model_308.pt


Epoch 5/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:54.57 [info     ] FQE_20220417145455: epoch=5 step=385 epoch=5 metrics={'time_sample_batch': 0.00017070150994635247, 'time_algorithm_update': 0.004108466111220323, 'loss': 0.0045078536949132554, 'time_step': 0.004356266616226791, 'init_value': -0.6372256278991699, 'ave_value': -0.48643048628493474, 'soft_opc': nan} step=385




2022-04-17 14:54.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145455/model_385.pt


Epoch 6/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:54.58 [info     ] FQE_20220417145455: epoch=6 step=462 epoch=6 metrics={'time_sample_batch': 0.00016527980953067928, 'time_algorithm_update': 0.0040281809769667585, 'loss': 0.0043452332769943905, 'time_step': 0.004284273494373669, 'init_value': -0.6853955388069153, 'ave_value': -0.506746781301928, 'soft_opc': nan} step=462




2022-04-17 14:54.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145455/model_462.pt


Epoch 7/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:54.58 [info     ] FQE_20220417145455: epoch=7 step=539 epoch=7 metrics={'time_sample_batch': 0.00017712023351099584, 'time_algorithm_update': 0.004302836083746576, 'loss': 0.003983442874189901, 'time_step': 0.004558776880239511, 'init_value': -0.7639731764793396, 'ave_value': -0.5524786181143813, 'soft_opc': nan} step=539




2022-04-17 14:54.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145455/model_539.pt


Epoch 8/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:54.58 [info     ] FQE_20220417145455: epoch=8 step=616 epoch=8 metrics={'time_sample_batch': 0.00016967971603591722, 'time_algorithm_update': 0.004235533924845906, 'loss': 0.003716112525722423, 'time_step': 0.004487322522448255, 'init_value': -0.8135563731193542, 'ave_value': -0.5838335401772916, 'soft_opc': nan} step=616




2022-04-17 14:54.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145455/model_616.pt


Epoch 9/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:54.59 [info     ] FQE_20220417145455: epoch=9 step=693 epoch=9 metrics={'time_sample_batch': 0.0001858116744400619, 'time_algorithm_update': 0.004422054662332906, 'loss': 0.003514233373359523, 'time_step': 0.004679311405528675, 'init_value': -0.8053632378578186, 'ave_value': -0.5634679645299911, 'soft_opc': nan} step=693




2022-04-17 14:54.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145455/model_693.pt


Epoch 10/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:54.59 [info     ] FQE_20220417145455: epoch=10 step=770 epoch=10 metrics={'time_sample_batch': 0.00016529529125659497, 'time_algorithm_update': 0.004363982708423169, 'loss': 0.003587082370075506, 'time_step': 0.00463120968310864, 'init_value': -0.8638213276863098, 'ave_value': -0.5941805709489025, 'soft_opc': nan} step=770




2022-04-17 14:54.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145455/model_770.pt
start
[ 0.00000000e+00  7.95731469e+08 -2.55189108e-01  2.54000047e-02
  2.57999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  3.30457556e-01 -3.26640595e-01  6.00000000e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.22789108e-01 -1.71999953e-02
  2.39998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.18067885e-01 -4.36092621e-01  6.00000000e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.89891077e-02  1.38000047e-02
  1.55999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.13215135e-01  3.00794900e-01]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.39310892e-01 -5.61999953e-02
 -4.10001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.81283783e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.95731469

Epoch 1/10:   0%|          | 0/77 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-17 14:55.00 [info     ] FQE_20220417145459: epoch=1 step=77 epoch=1 metrics={'time_sample_batch': 0.00016626135095373378, 'time_algorithm_update': 0.004247408408623237, 'loss': 0.010163402983120509, 'time_step': 0.004492347890680487, 'init_value': -0.24345919489860535, 'ave_value': -0.2577559354824123, 'soft_opc': nan} step=77




2022-04-17 14:55.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145459/model_77.pt


Epoch 2/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:55.00 [info     ] FQE_20220417145459: epoch=2 step=154 epoch=2 metrics={'time_sample_batch': 0.00016689300537109375, 'time_algorithm_update': 0.004015972087909649, 'loss': 0.006675426996397701, 'time_step': 0.004262298732608943, 'init_value': -0.292735755443573, 'ave_value': -0.28142675387228394, 'soft_opc': nan} step=154




2022-04-17 14:55.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145459/model_154.pt


Epoch 3/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:55.00 [info     ] FQE_20220417145459: epoch=3 step=231 epoch=3 metrics={'time_sample_batch': 0.0001821641798143263, 'time_algorithm_update': 0.004262342081441508, 'loss': 0.004785863074848985, 'time_step': 0.0045526925619546466, 'init_value': -0.3130300045013428, 'ave_value': -0.2875111119049761, 'soft_opc': nan} step=231




2022-04-17 14:55.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145459/model_231.pt


Epoch 4/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:55.01 [info     ] FQE_20220417145459: epoch=4 step=308 epoch=4 metrics={'time_sample_batch': 0.00017629660569228134, 'time_algorithm_update': 0.004395392033960912, 'loss': 0.0038575336137520416, 'time_step': 0.004648326279281021, 'init_value': -0.30723169445991516, 'ave_value': -0.28086396616858406, 'soft_opc': nan} step=308




2022-04-17 14:55.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145459/model_308.pt


Epoch 5/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:55.01 [info     ] FQE_20220417145459: epoch=5 step=385 epoch=5 metrics={'time_sample_batch': 0.0001649051517635197, 'time_algorithm_update': 0.004073434061818309, 'loss': 0.0035610637255617745, 'time_step': 0.0043071802560385175, 'init_value': -0.3040394186973572, 'ave_value': -0.27432442510685134, 'soft_opc': nan} step=385




2022-04-17 14:55.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145459/model_385.pt


Epoch 6/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:55.02 [info     ] FQE_20220417145459: epoch=6 step=462 epoch=6 metrics={'time_sample_batch': 0.0001709461212158203, 'time_algorithm_update': 0.004429507565188718, 'loss': 0.0032417766807310575, 'time_step': 0.0047054445588743535, 'init_value': -0.3020250201225281, 'ave_value': -0.2707097436695754, 'soft_opc': nan} step=462




2022-04-17 14:55.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145459/model_462.pt


Epoch 7/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:55.02 [info     ] FQE_20220417145459: epoch=7 step=539 epoch=7 metrics={'time_sample_batch': 0.00016087990302544135, 'time_algorithm_update': 0.0038318169581425653, 'loss': 0.002907455517825755, 'time_step': 0.004073124427299995, 'init_value': -0.3577682077884674, 'ave_value': -0.32388275692256185, 'soft_opc': nan} step=539




2022-04-17 14:55.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145459/model_539.pt


Epoch 8/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:55.02 [info     ] FQE_20220417145459: epoch=8 step=616 epoch=8 metrics={'time_sample_batch': 0.00017321264588987672, 'time_algorithm_update': 0.003940511059451413, 'loss': 0.002665231584166116, 'time_step': 0.004198798885593167, 'init_value': -0.3497943878173828, 'ave_value': -0.3099198546596207, 'soft_opc': nan} step=616




2022-04-17 14:55.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145459/model_616.pt


Epoch 9/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:55.03 [info     ] FQE_20220417145459: epoch=9 step=693 epoch=9 metrics={'time_sample_batch': 0.00016409700567072088, 'time_algorithm_update': 0.004271244073843027, 'loss': 0.0023268046531850447, 'time_step': 0.004500076368257597, 'init_value': -0.3346620202064514, 'ave_value': -0.2989205917628767, 'soft_opc': nan} step=693




2022-04-17 14:55.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145459/model_693.pt


Epoch 10/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:55.03 [info     ] FQE_20220417145459: epoch=10 step=770 epoch=10 metrics={'time_sample_batch': 0.00018377427930955763, 'time_algorithm_update': 0.004375278175651253, 'loss': 0.0022370144316636318, 'time_step': 0.004657116803255948, 'init_value': -0.35881006717681885, 'ave_value': -0.3205782275680486, 'soft_opc': nan} step=770




2022-04-17 14:55.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145459/model_770.pt
search iteration:  6
using hyper params:  [0.006706697006643553, 0.003445549415175851, 7.42674898216831e-05, 5]
2022-04-17 14:55.03 [debug    ] RoundIterator is selected.
2022-04-17 14:55.03 [info     ] Directory is created at d3rlpy_logs/CQL_20220417145503
2022-04-17 14:55.03 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-17 14:55.03 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-17 14:55.03 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220417145503/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': True, 'dropout_rate': 0.2, 'use_dense': False}}, 'actor_learning_rate': 0.006706697006643553, 'actor_optim_factory': {'optim_cls

Epoch 1/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:55.09 [info     ] CQL_20220417145503: epoch=1 step=169 epoch=1 metrics={'time_sample_batch': 0.0003496175686988605, 'time_algorithm_update': 0.03141701433080188, 'temp_loss': 4.852262384087377, 'temp': 0.9934718855739345, 'alpha_loss': -20.13979047289967, 'alpha': 1.0089870656030417, 'critic_loss': 101.67770532461313, 'actor_loss': -0.31098451297840424, 'time_step': 0.03185053266717132, 'td_error': 1.562033648017791, 'init_value': -4.9767045974731445, 'ave_value': -4.145396838376114} step=169
2022-04-17 14:55.09 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145503/model_169.pt


Epoch 2/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:55.14 [info     ] CQL_20220417145503: epoch=2 step=338 epoch=2 metrics={'time_sample_batch': 0.00034306176315397906, 'time_algorithm_update': 0.03097011210650382, 'temp_loss': 4.932432084393924, 'temp': 0.9808526592847158, 'alpha_loss': -18.975693945348617, 'alpha': 1.026459071057788, 'critic_loss': 73.59876016470102, 'actor_loss': 2.6712294887508867, 'time_step': 0.03139869419075328, 'td_error': 1.2628750860437359, 'init_value': -7.206223487854004, 'ave_value': -6.138461329582575} step=338
2022-04-17 14:55.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145503/model_338.pt


Epoch 3/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:55.20 [info     ] CQL_20220417145503: epoch=3 step=507 epoch=3 metrics={'time_sample_batch': 0.0003448209819003675, 'time_algorithm_update': 0.030386184094220222, 'temp_loss': 4.882019203795484, 'temp': 0.9685853039019207, 'alpha_loss': -18.461239267383103, 'alpha': 1.0430414606128218, 'critic_loss': 98.28292991141596, 'actor_loss': 4.289755606792382, 'time_step': 0.030817415587295442, 'td_error': 1.1021118618080636, 'init_value': -8.866594314575195, 'ave_value': -7.511478865726574} step=507
2022-04-17 14:55.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145503/model_507.pt


Epoch 4/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:55.25 [info     ] CQL_20220417145503: epoch=4 step=676 epoch=4 metrics={'time_sample_batch': 0.00035291874902488213, 'time_algorithm_update': 0.0311734676361084, 'temp_loss': 4.823201523730035, 'temp': 0.9565942724780924, 'alpha_loss': -18.675432848507132, 'alpha': 1.0602492764151308, 'critic_loss': 131.50650096645018, 'actor_loss': 5.826073333356508, 'time_step': 0.031606270716740534, 'td_error': 1.35254331135383, 'init_value': -10.647809982299805, 'ave_value': -9.015775527159374} step=676
2022-04-17 14:55.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145503/model_676.pt


Epoch 5/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:55.31 [info     ] CQL_20220417145503: epoch=5 step=845 epoch=5 metrics={'time_sample_batch': 0.0003413857792961527, 'time_algorithm_update': 0.03081198839040903, 'temp_loss': 4.764315952210737, 'temp': 0.9448440494621999, 'alpha_loss': -18.955636289698134, 'alpha': 1.078131299047075, 'critic_loss': 175.35069139469306, 'actor_loss': 7.208725302882448, 'time_step': 0.03123345600782767, 'td_error': 1.2750941633997686, 'init_value': -11.840264320373535, 'ave_value': -10.224395123580555} step=845
2022-04-17 14:55.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145503/model_845.pt


Epoch 6/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:55.36 [info     ] CQL_20220417145503: epoch=6 step=1014 epoch=6 metrics={'time_sample_batch': 0.00034515251069379275, 'time_algorithm_update': 0.03094704052400307, 'temp_loss': 4.708403869493473, 'temp': 0.933308883178869, 'alpha_loss': -19.24846771059657, 'alpha': 1.0965931246266563, 'critic_loss': 227.99384772848094, 'actor_loss': 8.202785743058786, 'time_step': 0.031373413356803576, 'td_error': 1.4334219367245733, 'init_value': -13.242138862609863, 'ave_value': -11.402623826104241} step=1014
2022-04-17 14:55.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145503/model_1014.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.00000000

Epoch 1/10:   0%|          | 0/77 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-17 14:55.37 [info     ] FQE_20220417145536: epoch=1 step=77 epoch=1 metrics={'time_sample_batch': 0.00014949154544186281, 'time_algorithm_update': 0.0036264611529065417, 'loss': 0.012438569580773254, 'time_step': 0.003863043599314504, 'init_value': -0.13756071031093597, 'ave_value': -0.10542810659788482, 'soft_opc': nan} step=77




2022-04-17 14:55.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145536/model_77.pt


Epoch 2/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:55.37 [info     ] FQE_20220417145536: epoch=2 step=154 epoch=2 metrics={'time_sample_batch': 0.00017677653919566762, 'time_algorithm_update': 0.004489406362756506, 'loss': 0.007912203609072542, 'time_step': 0.0047667119410130885, 'init_value': -0.26257646083831787, 'ave_value': -0.1968230729771627, 'soft_opc': nan} step=154




2022-04-17 14:55.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145536/model_154.pt


Epoch 3/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:55.37 [info     ] FQE_20220417145536: epoch=3 step=231 epoch=3 metrics={'time_sample_batch': 0.00017332101797128653, 'time_algorithm_update': 0.004189670859993278, 'loss': 0.005706434485853299, 'time_step': 0.004423386090761656, 'init_value': -0.32609108090400696, 'ave_value': -0.23247509079205023, 'soft_opc': nan} step=231




2022-04-17 14:55.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145536/model_231.pt


Epoch 4/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:55.38 [info     ] FQE_20220417145536: epoch=4 step=308 epoch=4 metrics={'time_sample_batch': 0.00018004318336387734, 'time_algorithm_update': 0.004456321914474685, 'loss': 0.004248726964843544, 'time_step': 0.00474948078006893, 'init_value': -0.34851399064064026, 'ave_value': -0.2499861550425087, 'soft_opc': nan} step=308




2022-04-17 14:55.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145536/model_308.pt


Epoch 5/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:55.38 [info     ] FQE_20220417145536: epoch=5 step=385 epoch=5 metrics={'time_sample_batch': 0.00017517263239080256, 'time_algorithm_update': 0.004321389384083934, 'loss': 0.0038763863645348844, 'time_step': 0.004580231456013469, 'init_value': -0.3417346775531769, 'ave_value': -0.2206493332487036, 'soft_opc': nan} step=385




2022-04-17 14:55.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145536/model_385.pt


Epoch 6/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:55.39 [info     ] FQE_20220417145536: epoch=6 step=462 epoch=6 metrics={'time_sample_batch': 0.00017663720366242644, 'time_algorithm_update': 0.0043601401440508955, 'loss': 0.003636164173435468, 'time_step': 0.004635959476619572, 'init_value': -0.40771517157554626, 'ave_value': -0.25765090505382765, 'soft_opc': nan} step=462




2022-04-17 14:55.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145536/model_462.pt


Epoch 7/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:55.39 [info     ] FQE_20220417145536: epoch=7 step=539 epoch=7 metrics={'time_sample_batch': 0.00016758968303729962, 'time_algorithm_update': 0.004131434799788834, 'loss': 0.0034530386328697205, 'time_step': 0.00440521054453664, 'init_value': -0.4499400556087494, 'ave_value': -0.2736718853503499, 'soft_opc': nan} step=539




2022-04-17 14:55.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145536/model_539.pt


Epoch 8/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:55.39 [info     ] FQE_20220417145536: epoch=8 step=616 epoch=8 metrics={'time_sample_batch': 0.00017235495827414772, 'time_algorithm_update': 0.0041970184871128625, 'loss': 0.003331259966452981, 'time_step': 0.004473447799682617, 'init_value': -0.5070118308067322, 'ave_value': -0.3049608633559835, 'soft_opc': nan} step=616




2022-04-17 14:55.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145536/model_616.pt


Epoch 9/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:55.40 [info     ] FQE_20220417145536: epoch=9 step=693 epoch=9 metrics={'time_sample_batch': 0.0001873134018538834, 'time_algorithm_update': 0.004669499087643313, 'loss': 0.0030429606740181516, 'time_step': 0.004948445728846959, 'init_value': -0.5143969655036926, 'ave_value': -0.2994328975052413, 'soft_opc': nan} step=693




2022-04-17 14:55.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145536/model_693.pt


Epoch 10/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:55.40 [info     ] FQE_20220417145536: epoch=10 step=770 epoch=10 metrics={'time_sample_batch': 0.0001622980291193182, 'time_algorithm_update': 0.004374912806919643, 'loss': 0.0031615171170273383, 'time_step': 0.0046280297366055574, 'init_value': -0.5618544816970825, 'ave_value': -0.3175934016352167, 'soft_opc': nan} step=770




2022-04-17 14:55.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145536/model_770.pt
start
[ 0.00000000e+00  7.95731469e+08 -2.55189108e-01  2.54000047e-02
  2.57999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  3.30457556e-01 -3.26640595e-01  6.00000000e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.22789108e-01 -1.71999953e-02
  2.39998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.18067885e-01 -4.36092621e-01  6.00000000e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.89891077e-02  1.38000047e-02
  1.55999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.13215135e-01  3.00794900e-01]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.39310892e-01 -5.61999953e-02
 -4.10001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.81283783e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.95731469

Epoch 1/10:   0%|          | 0/88 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-17 14:55.41 [info     ] FQE_20220417145540: epoch=1 step=88 epoch=1 metrics={'time_sample_batch': 0.0001586648550900546, 'time_algorithm_update': 0.003909474069421942, 'loss': 0.009023644688899036, 'time_step': 0.00414696606722745, 'init_value': -0.147201806306839, 'ave_value': -0.11130704583415577, 'soft_opc': nan} step=88




2022-04-17 14:55.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145540/model_88.pt


Epoch 2/10:   0%|          | 0/88 [00:00<?, ?it/s]



2022-04-17 14:55.41 [info     ] FQE_20220417145540: epoch=2 step=176 epoch=2 metrics={'time_sample_batch': 0.0001624551686373624, 'time_algorithm_update': 0.00414872711355036, 'loss': 0.005217064325486056, 'time_step': 0.004396872086958451, 'init_value': -0.17839913070201874, 'ave_value': -0.12794707957403484, 'soft_opc': nan} step=176




2022-04-17 14:55.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145540/model_176.pt


Epoch 3/10:   0%|          | 0/88 [00:00<?, ?it/s]



2022-04-17 14:55.42 [info     ] FQE_20220417145540: epoch=3 step=264 epoch=3 metrics={'time_sample_batch': 0.00016244704073125666, 'time_algorithm_update': 0.004238643429496072, 'loss': 0.003867874228903516, 'time_step': 0.0044698335907676, 'init_value': -0.19999131560325623, 'ave_value': -0.1485463665210986, 'soft_opc': nan} step=264




2022-04-17 14:55.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145540/model_264.pt


Epoch 4/10:   0%|          | 0/88 [00:00<?, ?it/s]



2022-04-17 14:55.42 [info     ] FQE_20220417145540: epoch=4 step=352 epoch=4 metrics={'time_sample_batch': 0.00018114935268055308, 'time_algorithm_update': 0.004563242197036743, 'loss': 0.0033304195425113326, 'time_step': 0.004818932576612992, 'init_value': -0.1999140977859497, 'ave_value': -0.1544150684062425, 'soft_opc': nan} step=352




2022-04-17 14:55.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145540/model_352.pt


Epoch 5/10:   0%|          | 0/88 [00:00<?, ?it/s]



2022-04-17 14:55.43 [info     ] FQE_20220417145540: epoch=5 step=440 epoch=5 metrics={'time_sample_batch': 0.00017686323686079547, 'time_algorithm_update': 0.0042937397956848145, 'loss': 0.0031060427466978913, 'time_step': 0.004565493627028031, 'init_value': -0.19094815850257874, 'ave_value': -0.15148355723232837, 'soft_opc': nan} step=440




2022-04-17 14:55.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145540/model_440.pt


Epoch 6/10:   0%|          | 0/88 [00:00<?, ?it/s]



2022-04-17 14:55.43 [info     ] FQE_20220417145540: epoch=6 step=528 epoch=6 metrics={'time_sample_batch': 0.00016431104053150523, 'time_algorithm_update': 0.004192839969288219, 'loss': 0.0029227439557540824, 'time_step': 0.004434428431771018, 'init_value': -0.22498653829097748, 'ave_value': -0.1801460915454873, 'soft_opc': nan} step=528




2022-04-17 14:55.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145540/model_528.pt


Epoch 7/10:   0%|          | 0/88 [00:00<?, ?it/s]



2022-04-17 14:55.43 [info     ] FQE_20220417145540: epoch=7 step=616 epoch=7 metrics={'time_sample_batch': 0.0001579116691242565, 'time_algorithm_update': 0.004075941714373502, 'loss': 0.0027610039476051247, 'time_step': 0.004302778027274392, 'init_value': -0.22222943603992462, 'ave_value': -0.18851145586116358, 'soft_opc': nan} step=616




2022-04-17 14:55.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145540/model_616.pt


Epoch 8/10:   0%|          | 0/88 [00:00<?, ?it/s]



2022-04-17 14:55.44 [info     ] FQE_20220417145540: epoch=8 step=704 epoch=8 metrics={'time_sample_batch': 0.00015544891357421875, 'time_algorithm_update': 0.004157402298667214, 'loss': 0.0024898093834053725, 'time_step': 0.004377668554132635, 'init_value': -0.2858026921749115, 'ave_value': -0.25749184377155865, 'soft_opc': nan} step=704




2022-04-17 14:55.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145540/model_704.pt


Epoch 9/10:   0%|          | 0/88 [00:00<?, ?it/s]



2022-04-17 14:55.44 [info     ] FQE_20220417145540: epoch=9 step=792 epoch=9 metrics={'time_sample_batch': 0.0001664215868169611, 'time_algorithm_update': 0.004509495063261552, 'loss': 0.00236763710828117, 'time_step': 0.004743405363776467, 'init_value': -0.23992249369621277, 'ave_value': -0.21448091089993984, 'soft_opc': nan} step=792




2022-04-17 14:55.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145540/model_792.pt


Epoch 10/10:   0%|          | 0/88 [00:00<?, ?it/s]



2022-04-17 14:55.45 [info     ] FQE_20220417145540: epoch=10 step=880 epoch=10 metrics={'time_sample_batch': 0.00017284534194252708, 'time_algorithm_update': 0.004423325712030584, 'loss': 0.0023624816167284735, 'time_step': 0.004686859520998868, 'init_value': -0.2423781156539917, 'ave_value': -0.2243666360272212, 'soft_opc': nan} step=880




2022-04-17 14:55.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145540/model_880.pt
search iteration:  7
using hyper params:  [0.005785903170194118, 0.008901374810356485, 2.0433429704414475e-05, 1]
2022-04-17 14:55.45 [debug    ] RoundIterator is selected.
2022-04-17 14:55.45 [info     ] Directory is created at d3rlpy_logs/CQL_20220417145545
2022-04-17 14:55.45 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-17 14:55.45 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-17 14:55.45 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220417145545/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': True, 'dropout_rate': 0.2, 'use_dense': False}}, 'actor_learning_rate': 0.005785903170194118, 'actor_optim_factory': {'optim_c

Epoch 1/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:55.50 [info     ] CQL_20220417145545: epoch=1 step=169 epoch=1 metrics={'time_sample_batch': 0.0003022569171070347, 'time_algorithm_update': 0.03154486311963324, 'temp_loss': 4.9034749911381645, 'temp': 0.9981902224072338, 'alpha_loss': -17.778707729994192, 'alpha': 1.008612684244235, 'critic_loss': 25.61571638118586, 'actor_loss': -1.8880255453332642, 'time_step': 0.031932479531101926, 'td_error': 1.0807851920960234, 'init_value': -0.2723446786403656, 'ave_value': -0.054040101049048404} step=169
2022-04-17 14:55.50 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145545/model_169.pt


Epoch 2/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:55.56 [info     ] CQL_20220417145545: epoch=2 step=338 epoch=2 metrics={'time_sample_batch': 0.0003003693191257454, 'time_algorithm_update': 0.031394253115682204, 'temp_loss': 5.015512497467403, 'temp': 0.9946637301755374, 'alpha_loss': -17.824545820789226, 'alpha': 1.0257737678888987, 'critic_loss': 24.93425036323141, 'actor_loss': -2.149307277781018, 'time_step': 0.031775545086380995, 'td_error': 1.0448838660929742, 'init_value': -0.5706638097763062, 'ave_value': -0.2209981747877759} step=338
2022-04-17 14:55.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145545/model_338.pt


Epoch 3/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:56.01 [info     ] CQL_20220417145545: epoch=3 step=507 epoch=3 metrics={'time_sample_batch': 0.00030364369499612844, 'time_algorithm_update': 0.03146871166116387, 'temp_loss': 5.000898640536698, 'temp': 0.9912077251976058, 'alpha_loss': -18.134569935544707, 'alpha': 1.0434014169422128, 'critic_loss': 28.08797607196153, 'actor_loss': -2.1582698822021484, 'time_step': 0.031852359602437215, 'td_error': 1.0861757963105223, 'init_value': -0.7675862312316895, 'ave_value': -0.21725420222021974} step=507
2022-04-17 14:56.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145545/model_507.pt


Epoch 4/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:56.07 [info     ] CQL_20220417145545: epoch=4 step=676 epoch=4 metrics={'time_sample_batch': 0.0002981600676767925, 'time_algorithm_update': 0.03109652191929563, 'temp_loss': 4.983864592377251, 'temp': 0.9877837254450872, 'alpha_loss': -18.491578299618332, 'alpha': 1.0615360722739315, 'critic_loss': 32.49598889096954, 'actor_loss': -2.037525414715152, 'time_step': 0.03147462980281672, 'td_error': 1.0409803838998375, 'init_value': -1.1951764822006226, 'ave_value': -0.5534776652711737} step=676
2022-04-17 14:56.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145545/model_676.pt


Epoch 5/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:56.12 [info     ] CQL_20220417145545: epoch=5 step=845 epoch=5 metrics={'time_sample_batch': 0.00028560006406885634, 'time_algorithm_update': 0.03036944823857595, 'temp_loss': 4.968113907695522, 'temp': 0.9843831090532111, 'alpha_loss': -18.807028764803736, 'alpha': 1.0801510310031959, 'critic_loss': 37.76836189856896, 'actor_loss': -1.9524718113904873, 'time_step': 0.030738315638705823, 'td_error': 1.039278793305169, 'init_value': -1.598207712173462, 'ave_value': -0.8583360934807911} step=845
2022-04-17 14:56.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145545/model_845.pt


Epoch 6/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:56.18 [info     ] CQL_20220417145545: epoch=6 step=1014 epoch=6 metrics={'time_sample_batch': 0.00028142280127169817, 'time_algorithm_update': 0.030135374802809495, 'temp_loss': 4.949737320284871, 'temp': 0.9810025670824671, 'alpha_loss': -19.135505303828673, 'alpha': 1.0992129787185487, 'critic_loss': 44.11831141364645, 'actor_loss': -1.8505679326649953, 'time_step': 0.030492287415724535, 'td_error': 1.090116210185415, 'init_value': -1.6908531188964844, 'ave_value': -0.860064778565555} step=1014
2022-04-17 14:56.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145545/model_1014.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.000000

Epoch 1/10:   0%|          | 0/77 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-17 14:56.18 [info     ] FQE_20220417145618: epoch=1 step=77 epoch=1 metrics={'time_sample_batch': 0.00015277367133598822, 'time_algorithm_update': 0.004115052037424855, 'loss': 0.01073556849974897, 'time_step': 0.004337053794365425, 'init_value': -0.48908060789108276, 'ave_value': -0.45487966242137257, 'soft_opc': nan} step=77




2022-04-17 14:56.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145618/model_77.pt


Epoch 2/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:56.18 [info     ] FQE_20220417145618: epoch=2 step=154 epoch=2 metrics={'time_sample_batch': 0.00016559254039417613, 'time_algorithm_update': 0.004117748954079368, 'loss': 0.0076601420069572984, 'time_step': 0.004363338668625076, 'init_value': -0.5969580411911011, 'ave_value': -0.5141698043625634, 'soft_opc': nan} step=154




2022-04-17 14:56.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145618/model_154.pt


Epoch 3/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:56.19 [info     ] FQE_20220417145618: epoch=3 step=231 epoch=3 metrics={'time_sample_batch': 0.00015809938505098419, 'time_algorithm_update': 0.004172631672450474, 'loss': 0.0060918576676737175, 'time_step': 0.004414193041912921, 'init_value': -0.6748554706573486, 'ave_value': -0.5547114379771121, 'soft_opc': nan} step=231




2022-04-17 14:56.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145618/model_231.pt


Epoch 4/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:56.19 [info     ] FQE_20220417145618: epoch=4 step=308 epoch=4 metrics={'time_sample_batch': 0.0001673884206003957, 'time_algorithm_update': 0.004159360736995549, 'loss': 0.00507960278335233, 'time_step': 0.004404733707378437, 'init_value': -0.7356580495834351, 'ave_value': -0.5911457586395847, 'soft_opc': nan} step=308




2022-04-17 14:56.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145618/model_308.pt


Epoch 5/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:56.20 [info     ] FQE_20220417145618: epoch=5 step=385 epoch=5 metrics={'time_sample_batch': 0.00016931125095912388, 'time_algorithm_update': 0.004245581564965186, 'loss': 0.004650887316576652, 'time_step': 0.004501646215265448, 'init_value': -0.7431862354278564, 'ave_value': -0.5686725479391244, 'soft_opc': nan} step=385




2022-04-17 14:56.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145618/model_385.pt


Epoch 6/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:56.20 [info     ] FQE_20220417145618: epoch=6 step=462 epoch=6 metrics={'time_sample_batch': 0.00016917191542588272, 'time_algorithm_update': 0.004225625620259867, 'loss': 0.004519381605741846, 'time_step': 0.004513260606047395, 'init_value': -0.8394472002983093, 'ave_value': -0.6352446523604093, 'soft_opc': nan} step=462




2022-04-17 14:56.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145618/model_462.pt


Epoch 7/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:56.20 [info     ] FQE_20220417145618: epoch=7 step=539 epoch=7 metrics={'time_sample_batch': 0.00016334459379121856, 'time_algorithm_update': 0.004122359412057059, 'loss': 0.004372642535788866, 'time_step': 0.004361505632276659, 'init_value': -0.9171592593193054, 'ave_value': -0.6777892119712657, 'soft_opc': nan} step=539




2022-04-17 14:56.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145618/model_539.pt


Epoch 8/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:56.21 [info     ] FQE_20220417145618: epoch=8 step=616 epoch=8 metrics={'time_sample_batch': 0.00016718715816349178, 'time_algorithm_update': 0.0041019885570972, 'loss': 0.004288406900505161, 'time_step': 0.0043648001435515164, 'init_value': -0.9837837815284729, 'ave_value': -0.7033654294572435, 'soft_opc': nan} step=616




2022-04-17 14:56.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145618/model_616.pt


Epoch 9/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:56.21 [info     ] FQE_20220417145618: epoch=9 step=693 epoch=9 metrics={'time_sample_batch': 0.0001574027073847783, 'time_algorithm_update': 0.0039896345757818846, 'loss': 0.003988330941889193, 'time_step': 0.004228003613360516, 'init_value': -0.999047577381134, 'ave_value': -0.7132704445639172, 'soft_opc': nan} step=693




2022-04-17 14:56.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145618/model_693.pt


Epoch 10/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:56.22 [info     ] FQE_20220417145618: epoch=10 step=770 epoch=10 metrics={'time_sample_batch': 0.00016549036100313262, 'time_algorithm_update': 0.004282140112542487, 'loss': 0.004176964795870053, 'time_step': 0.00453854226446771, 'init_value': -1.0426722764968872, 'ave_value': -0.719910745456949, 'soft_opc': nan} step=770




2022-04-17 14:56.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145618/model_770.pt
start
[ 0.00000000e+00  7.95731469e+08 -2.55189108e-01  2.54000047e-02
  2.57999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  3.30457556e-01 -3.26640595e-01  6.00000000e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.22789108e-01 -1.71999953e-02
  2.39998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.18067885e-01 -4.36092621e-01  6.00000000e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.89891077e-02  1.38000047e-02
  1.55999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.13215135e-01  3.00794900e-01]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.39310892e-01 -5.61999953e-02
 -4.10001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.81283783e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.95731469

Epoch 1/10:   0%|          | 0/77 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-17 14:56.22 [info     ] FQE_20220417145622: epoch=1 step=77 epoch=1 metrics={'time_sample_batch': 0.00015514237540108816, 'time_algorithm_update': 0.00389851223338734, 'loss': 0.009273415225763599, 'time_step': 0.004128196022727273, 'init_value': -0.20410147309303284, 'ave_value': -0.134043627056117, 'soft_opc': nan} step=77




2022-04-17 14:56.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145622/model_77.pt


Epoch 2/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:56.22 [info     ] FQE_20220417145622: epoch=2 step=154 epoch=2 metrics={'time_sample_batch': 0.00018303425281078784, 'time_algorithm_update': 0.004519026000778396, 'loss': 0.0057412266985259275, 'time_step': 0.004803741133058226, 'init_value': -0.26499754190444946, 'ave_value': -0.1639797213999135, 'soft_opc': nan} step=154




2022-04-17 14:56.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145622/model_154.pt


Epoch 3/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:56.23 [info     ] FQE_20220417145622: epoch=3 step=231 epoch=3 metrics={'time_sample_batch': 0.00017142915106438972, 'time_algorithm_update': 0.004030301973417208, 'loss': 0.004031972383608582, 'time_step': 0.004293367460176542, 'init_value': -0.27532804012298584, 'ave_value': -0.16167805723107612, 'soft_opc': nan} step=231




2022-04-17 14:56.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145622/model_231.pt


Epoch 4/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:56.23 [info     ] FQE_20220417145622: epoch=4 step=308 epoch=4 metrics={'time_sample_batch': 0.00014938007701526988, 'time_algorithm_update': 0.0039754842782949475, 'loss': 0.003310758316611218, 'time_step': 0.004193442208426339, 'init_value': -0.2723093628883362, 'ave_value': -0.15936707803761369, 'soft_opc': nan} step=308




2022-04-17 14:56.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145622/model_308.pt


Epoch 5/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:56.24 [info     ] FQE_20220417145622: epoch=5 step=385 epoch=5 metrics={'time_sample_batch': 0.00016445927805714794, 'time_algorithm_update': 0.004075449782532531, 'loss': 0.003017531987582031, 'time_step': 0.004310703896856927, 'init_value': -0.2819266617298126, 'ave_value': -0.16264732383728564, 'soft_opc': nan} step=385




2022-04-17 14:56.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145622/model_385.pt


Epoch 6/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:56.24 [info     ] FQE_20220417145622: epoch=6 step=462 epoch=6 metrics={'time_sample_batch': 0.0001671128458790965, 'time_algorithm_update': 0.004076462287407417, 'loss': 0.0028241863398655474, 'time_step': 0.004322361636471438, 'init_value': -0.3121776282787323, 'ave_value': -0.1868692384831406, 'soft_opc': nan} step=462




2022-04-17 14:56.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145622/model_462.pt


Epoch 7/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:56.24 [info     ] FQE_20220417145622: epoch=7 step=539 epoch=7 metrics={'time_sample_batch': 0.0001721536958372438, 'time_algorithm_update': 0.004450763974870954, 'loss': 0.0026256690760348717, 'time_step': 0.00469624222099007, 'init_value': -0.33548104763031006, 'ave_value': -0.20139902538141688, 'soft_opc': nan} step=539




2022-04-17 14:56.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145622/model_539.pt


Epoch 8/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:56.25 [info     ] FQE_20220417145622: epoch=8 step=616 epoch=8 metrics={'time_sample_batch': 0.00017168305136940697, 'time_algorithm_update': 0.0044206798850715935, 'loss': 0.002320146554016641, 'time_step': 0.004677893279434799, 'init_value': -0.3491184711456299, 'ave_value': -0.20968183082704608, 'soft_opc': nan} step=616




2022-04-17 14:56.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145622/model_616.pt


Epoch 9/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:56.25 [info     ] FQE_20220417145622: epoch=9 step=693 epoch=9 metrics={'time_sample_batch': 0.00017023396182369875, 'time_algorithm_update': 0.004425203645384157, 'loss': 0.00209206843812188, 'time_step': 0.004660185281332437, 'init_value': -0.33950233459472656, 'ave_value': -0.20450749930229273, 'soft_opc': nan} step=693




2022-04-17 14:56.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145622/model_693.pt


Epoch 10/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:56.26 [info     ] FQE_20220417145622: epoch=10 step=770 epoch=10 metrics={'time_sample_batch': 0.00018082655869521103, 'time_algorithm_update': 0.004594403427916688, 'loss': 0.002356114046642097, 'time_step': 0.004862803917426568, 'init_value': -0.3594649136066437, 'ave_value': -0.2176061858162955, 'soft_opc': nan} step=770




2022-04-17 14:56.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145622/model_770.pt
search iteration:  8
using hyper params:  [0.0066446624867211165, 0.0019610320160441015, 2.0005218905191153e-05, 5]
2022-04-17 14:56.26 [debug    ] RoundIterator is selected.
2022-04-17 14:56.26 [info     ] Directory is created at d3rlpy_logs/CQL_20220417145626
2022-04-17 14:56.26 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-17 14:56.26 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-17 14:56.26 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220417145626/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': True, 'dropout_rate': 0.2, 'use_dense': False}}, 'actor_learning_rate': 0.0066446624867211165, 'actor_optim_factory': {'opti

Epoch 1/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:56.31 [info     ] CQL_20220417145626: epoch=1 step=169 epoch=1 metrics={'time_sample_batch': 0.00037189912513868346, 'time_algorithm_update': 0.03211362122078619, 'temp_loss': 4.888659344622369, 'temp': 0.9982459485178163, 'alpha_loss': -19.317534215351532, 'alpha': 1.0088074214359712, 'critic_loss': 123.81724724402794, 'actor_loss': -1.4322572774611986, 'time_step': 0.03256576865382448, 'td_error': 2.2846848115778045, 'init_value': -1.2917360067367554, 'ave_value': -0.8167146111689172} step=169
2022-04-17 14:56.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145626/model_169.pt


Epoch 2/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:56.37 [info     ] CQL_20220417145626: epoch=2 step=338 epoch=2 metrics={'time_sample_batch': 0.00034927616457967366, 'time_algorithm_update': 0.03058737128443972, 'temp_loss': 4.915228987586569, 'temp': 0.9948473477504662, 'alpha_loss': -20.015589663262904, 'alpha': 1.0271099439034095, 'critic_loss': 73.81833375558345, 'actor_loss': 1.7321733249979612, 'time_step': 0.03101341117768598, 'td_error': 1.021125018946965, 'init_value': -6.251915454864502, 'ave_value': -5.294047313434584} step=338
2022-04-17 14:56.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145626/model_338.pt


Epoch 3/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:56.42 [info     ] CQL_20220417145626: epoch=3 step=507 epoch=3 metrics={'time_sample_batch': 0.00034884023948534, 'time_algorithm_update': 0.03067363507648897, 'temp_loss': 4.940799078292395, 'temp': 0.991479403168492, 'alpha_loss': -19.14939879242485, 'alpha': 1.044217072295014, 'critic_loss': 85.3588078064326, 'actor_loss': 3.585542969449737, 'time_step': 0.031102770178981082, 'td_error': 1.0327047963681106, 'init_value': -8.633169174194336, 'ave_value': -7.291500937766857} step=507
2022-04-17 14:56.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145626/model_507.pt


Epoch 4/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:56.48 [info     ] CQL_20220417145626: epoch=4 step=676 epoch=4 metrics={'time_sample_batch': 0.00034607091598962185, 'time_algorithm_update': 0.03099612653608153, 'temp_loss': 4.988450699303982, 'temp': 0.9880976666360212, 'alpha_loss': -19.54116179110736, 'alpha': 1.0617949398311637, 'critic_loss': 110.70813304291674, 'actor_loss': 5.132978420991164, 'time_step': 0.03141756311676206, 'td_error': 1.0458309635653076, 'init_value': -10.945154190063477, 'ave_value': -9.418600524008811} step=676
2022-04-17 14:56.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145626/model_676.pt


Epoch 5/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:56.53 [info     ] CQL_20220417145626: epoch=5 step=845 epoch=5 metrics={'time_sample_batch': 0.0003633231100951426, 'time_algorithm_update': 0.031037485811131946, 'temp_loss': 4.968729527038936, 'temp': 0.9847375969209614, 'alpha_loss': -19.810892048672105, 'alpha': 1.080030479374722, 'critic_loss': 142.6140477558565, 'actor_loss': 6.810781636887048, 'time_step': 0.03148559282517292, 'td_error': 1.1373160256356316, 'init_value': -12.616605758666992, 'ave_value': -11.012041463379388} step=845
2022-04-17 14:56.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145626/model_845.pt


Epoch 6/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:56.59 [info     ] CQL_20220417145626: epoch=6 step=1014 epoch=6 metrics={'time_sample_batch': 0.00035572757382364666, 'time_algorithm_update': 0.031063796500482504, 'temp_loss': 4.95555098099116, 'temp': 0.9814046141663952, 'alpha_loss': -19.990183858476446, 'alpha': 1.0987023312664597, 'critic_loss': 180.51819642196745, 'actor_loss': 8.127616470382058, 'time_step': 0.03150310883155236, 'td_error': 1.1042275035150018, 'init_value': -13.429641723632812, 'ave_value': -11.794340538334202} step=1014
2022-04-17 14:56.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145626/model_1014.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.0000000

Epoch 1/10:   0%|          | 0/77 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-17 14:56.59 [info     ] FQE_20220417145659: epoch=1 step=77 epoch=1 metrics={'time_sample_batch': 0.00016222681318010603, 'time_algorithm_update': 0.004231298124635375, 'loss': 0.007648551207664725, 'time_step': 0.0044686856207909525, 'init_value': -0.359598845243454, 'ave_value': -0.2979421211859664, 'soft_opc': nan} step=77




2022-04-17 14:56.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145659/model_77.pt


Epoch 2/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:57.00 [info     ] FQE_20220417145659: epoch=2 step=154 epoch=2 metrics={'time_sample_batch': 0.00016136912556437703, 'time_algorithm_update': 0.004100353686840503, 'loss': 0.0051092571373303215, 'time_step': 0.004349965553779107, 'init_value': -0.46235960721969604, 'ave_value': -0.35265038794762377, 'soft_opc': nan} step=154




2022-04-17 14:57.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145659/model_154.pt


Epoch 3/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:57.00 [info     ] FQE_20220417145659: epoch=3 step=231 epoch=3 metrics={'time_sample_batch': 0.00015248880757913962, 'time_algorithm_update': 0.003631796155657087, 'loss': 0.0037651322116809233, 'time_step': 0.0038464750562395367, 'init_value': -0.5352809429168701, 'ave_value': -0.39338592020375235, 'soft_opc': nan} step=231




2022-04-17 14:57.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145659/model_231.pt


Epoch 4/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:57.00 [info     ] FQE_20220417145659: epoch=4 step=308 epoch=4 metrics={'time_sample_batch': 0.00016368519176136363, 'time_algorithm_update': 0.004019139649031998, 'loss': 0.0030293202499387327, 'time_step': 0.004263846905200513, 'init_value': -0.5744796395301819, 'ave_value': -0.429277205406814, 'soft_opc': nan} step=308




2022-04-17 14:57.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145659/model_308.pt


Epoch 5/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:57.01 [info     ] FQE_20220417145659: epoch=5 step=385 epoch=5 metrics={'time_sample_batch': 0.00015894468728598062, 'time_algorithm_update': 0.003866350495970094, 'loss': 0.002822104874732239, 'time_step': 0.0041018647032898745, 'init_value': -0.5748752951622009, 'ave_value': -0.40041219259167576, 'soft_opc': nan} step=385




2022-04-17 14:57.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145659/model_385.pt


Epoch 6/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:57.01 [info     ] FQE_20220417145659: epoch=6 step=462 epoch=6 metrics={'time_sample_batch': 0.0001748846722887708, 'time_algorithm_update': 0.004408885906269024, 'loss': 0.0027958362804749956, 'time_step': 0.00465361483685382, 'init_value': -0.6497275829315186, 'ave_value': -0.4399578927302951, 'soft_opc': nan} step=462




2022-04-17 14:57.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145659/model_462.pt


Epoch 7/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:57.01 [info     ] FQE_20220417145659: epoch=7 step=539 epoch=7 metrics={'time_sample_batch': 0.00017122169593711953, 'time_algorithm_update': 0.004270138678612647, 'loss': 0.0026609551996089422, 'time_step': 0.004517874160370269, 'init_value': -0.6975318789482117, 'ave_value': -0.45430387787118154, 'soft_opc': nan} step=539




2022-04-17 14:57.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145659/model_539.pt


Epoch 8/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:57.02 [info     ] FQE_20220417145659: epoch=8 step=616 epoch=8 metrics={'time_sample_batch': 0.00016731101197081728, 'time_algorithm_update': 0.004148706213220373, 'loss': 0.0026041252943747616, 'time_step': 0.004404294026362432, 'init_value': -0.7676424980163574, 'ave_value': -0.492070834080244, 'soft_opc': nan} step=616




2022-04-17 14:57.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145659/model_616.pt


Epoch 9/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:57.02 [info     ] FQE_20220417145659: epoch=9 step=693 epoch=9 metrics={'time_sample_batch': 0.0001677135368446251, 'time_algorithm_update': 0.004113825884732333, 'loss': 0.0024905464757088718, 'time_step': 0.004366286389239422, 'init_value': -0.7715631127357483, 'ave_value': -0.49032509730340124, 'soft_opc': nan} step=693




2022-04-17 14:57.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145659/model_693.pt


Epoch 10/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:57.03 [info     ] FQE_20220417145659: epoch=10 step=770 epoch=10 metrics={'time_sample_batch': 0.0001646976966362495, 'time_algorithm_update': 0.004445531151511453, 'loss': 0.002657140632365625, 'time_step': 0.0046712454263266034, 'init_value': -0.8358138799667358, 'ave_value': -0.5225084389786463, 'soft_opc': nan} step=770




2022-04-17 14:57.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145659/model_770.pt
start
[ 0.00000000e+00  7.95731469e+08 -2.55189108e-01  2.54000047e-02
  2.57999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  3.30457556e-01 -3.26640595e-01  6.00000000e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.22789108e-01 -1.71999953e-02
  2.39998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.18067885e-01 -4.36092621e-01  6.00000000e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.89891077e-02  1.38000047e-02
  1.55999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.13215135e-01  3.00794900e-01]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.39310892e-01 -5.61999953e-02
 -4.10001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.81283783e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.95731469

Epoch 1/10:   0%|          | 0/77 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-17 14:57.03 [info     ] FQE_20220417145703: epoch=1 step=77 epoch=1 metrics={'time_sample_batch': 0.00016383381633015422, 'time_algorithm_update': 0.0041481705455036905, 'loss': 0.010614742307210124, 'time_step': 0.004387663556383802, 'init_value': -0.11560989916324615, 'ave_value': -0.09293353084122409, 'soft_opc': nan} step=77




2022-04-17 14:57.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145703/model_77.pt


Epoch 2/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:57.03 [info     ] FQE_20220417145703: epoch=2 step=154 epoch=2 metrics={'time_sample_batch': 0.00015716428880567675, 'time_algorithm_update': 0.0038859658427052683, 'loss': 0.006476803607150138, 'time_step': 0.004125790162519975, 'init_value': -0.1327536404132843, 'ave_value': -0.08914596510061004, 'soft_opc': nan} step=154




2022-04-17 14:57.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145703/model_154.pt


Epoch 3/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:57.04 [info     ] FQE_20220417145703: epoch=3 step=231 epoch=3 metrics={'time_sample_batch': 0.00017580738315334569, 'time_algorithm_update': 0.0044202680711622365, 'loss': 0.0046838631811128425, 'time_step': 0.004683116813758751, 'init_value': -0.16267704963684082, 'ave_value': -0.10595779704718708, 'soft_opc': nan} step=231




2022-04-17 14:57.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145703/model_231.pt


Epoch 4/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:57.04 [info     ] FQE_20220417145703: epoch=4 step=308 epoch=4 metrics={'time_sample_batch': 0.000166515251258751, 'time_algorithm_update': 0.004267906213735605, 'loss': 0.003827782375012319, 'time_step': 0.004526212617948458, 'init_value': -0.18128928542137146, 'ave_value': -0.12629094542180364, 'soft_opc': nan} step=308




2022-04-17 14:57.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145703/model_308.pt


Epoch 5/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:57.05 [info     ] FQE_20220417145703: epoch=5 step=385 epoch=5 metrics={'time_sample_batch': 0.0001756587585845551, 'time_algorithm_update': 0.004385728340644341, 'loss': 0.003286170767160592, 'time_step': 0.004656095009345513, 'init_value': -0.1627642810344696, 'ave_value': -0.10960501217711213, 'soft_opc': nan} step=385




2022-04-17 14:57.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145703/model_385.pt


Epoch 6/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:57.05 [info     ] FQE_20220417145703: epoch=6 step=462 epoch=6 metrics={'time_sample_batch': 0.0001622825473934025, 'time_algorithm_update': 0.004214255840747388, 'loss': 0.002904738405380737, 'time_step': 0.004443592839426808, 'init_value': -0.18899142742156982, 'ave_value': -0.1347608778374018, 'soft_opc': nan} step=462




2022-04-17 14:57.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145703/model_462.pt


Epoch 7/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:57.05 [info     ] FQE_20220417145703: epoch=7 step=539 epoch=7 metrics={'time_sample_batch': 0.00016183977003221388, 'time_algorithm_update': 0.004171238317118063, 'loss': 0.0025593967353864633, 'time_step': 0.00441178408536044, 'init_value': -0.21119359135627747, 'ave_value': -0.15660810043682924, 'soft_opc': nan} step=539




2022-04-17 14:57.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145703/model_539.pt


Epoch 8/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:57.06 [info     ] FQE_20220417145703: epoch=8 step=616 epoch=8 metrics={'time_sample_batch': 0.00016707259339171572, 'time_algorithm_update': 0.004091380478499771, 'loss': 0.002237946073246467, 'time_step': 0.004335908146647664, 'init_value': -0.24459856748580933, 'ave_value': -0.1924144972283561, 'soft_opc': nan} step=616




2022-04-17 14:57.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145703/model_616.pt


Epoch 9/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:57.06 [info     ] FQE_20220417145703: epoch=9 step=693 epoch=9 metrics={'time_sample_batch': 0.00015860408931583553, 'time_algorithm_update': 0.004168705506758256, 'loss': 0.0019377041600535732, 'time_step': 0.004408554597334428, 'init_value': -0.23645660281181335, 'ave_value': -0.19235125734798006, 'soft_opc': nan} step=693




2022-04-17 14:57.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145703/model_693.pt


Epoch 10/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:57.07 [info     ] FQE_20220417145703: epoch=10 step=770 epoch=10 metrics={'time_sample_batch': 0.00017493731015688413, 'time_algorithm_update': 0.004508554161369026, 'loss': 0.0019018440068905035, 'time_step': 0.0047739450033608966, 'init_value': -0.2505619525909424, 'ave_value': -0.207459960924881, 'soft_opc': nan} step=770




2022-04-17 14:57.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145703/model_770.pt
search iteration:  9
using hyper params:  [0.000717030101298408, 0.009667471731789562, 6.912770858658488e-05, 5]
2022-04-17 14:57.07 [debug    ] RoundIterator is selected.
2022-04-17 14:57.07 [info     ] Directory is created at d3rlpy_logs/CQL_20220417145707
2022-04-17 14:57.07 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-17 14:57.07 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-17 14:57.07 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220417145707/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': True, 'dropout_rate': 0.2, 'use_dense': False}}, 'actor_learning_rate': 0.000717030101298408, 'actor_optim_factory': {'optim_cl

Epoch 1/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:57.12 [info     ] CQL_20220417145707: epoch=1 step=169 epoch=1 metrics={'time_sample_batch': 0.00034000182292870515, 'time_algorithm_update': 0.030450142346895658, 'temp_loss': 4.1981042799865, 'temp': 0.993801550046932, 'alpha_loss': -18.776590211857, 'alpha': 1.0086697008482803, 'critic_loss': 76.38361406044142, 'actor_loss': 2.0984433018685094, 'time_step': 0.030872003566583938, 'td_error': 0.8948610484139061, 'init_value': -5.388497352600098, 'ave_value': -4.662250341971715} step=169
2022-04-17 14:57.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145707/model_169.pt


Epoch 2/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:57.18 [info     ] CQL_20220417145707: epoch=2 step=338 epoch=2 metrics={'time_sample_batch': 0.0003489023129615558, 'time_algorithm_update': 0.030520990755431045, 'temp_loss': 4.7057928672203655, 'temp': 0.9813175183781505, 'alpha_loss': -17.79806949683195, 'alpha': 1.0252409326959644, 'critic_loss': 81.62112984290489, 'actor_loss': 3.0538136056189003, 'time_step': 0.030954468179736617, 'td_error': 1.1460912907368317, 'init_value': -7.207576751708984, 'ave_value': -6.069282220580557} step=338
2022-04-17 14:57.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145707/model_338.pt


Epoch 3/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:57.23 [info     ] CQL_20220417145707: epoch=3 step=507 epoch=3 metrics={'time_sample_batch': 0.0003491590714313575, 'time_algorithm_update': 0.031438414161727275, 'temp_loss': 4.769362475039691, 'temp': 0.9692818480130483, 'alpha_loss': -18.173723254683456, 'alpha': 1.0423784940200445, 'critic_loss': 120.9064355884078, 'actor_loss': 3.8130435647343743, 'time_step': 0.03186843663277711, 'td_error': 1.16882877162189, 'init_value': -8.300946235656738, 'ave_value': -6.95793222304937} step=507
2022-04-17 14:57.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145707/model_507.pt


Epoch 4/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:57.28 [info     ] CQL_20220417145707: epoch=4 step=676 epoch=4 metrics={'time_sample_batch': 0.00035020021291879506, 'time_algorithm_update': 0.030378527895233334, 'temp_loss': 4.7685046732073, 'temp': 0.9576057414331379, 'alpha_loss': -18.57599002228686, 'alpha': 1.0602535968701514, 'critic_loss': 183.7860662697335, 'actor_loss': 4.2089439905606785, 'time_step': 0.030804683470867088, 'td_error': 1.103978234689923, 'init_value': -8.385937690734863, 'ave_value': -7.075362979001827} step=676
2022-04-17 14:57.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145707/model_676.pt


Epoch 5/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:57.34 [info     ] CQL_20220417145707: epoch=5 step=845 epoch=5 metrics={'time_sample_batch': 0.00035238830295540173, 'time_algorithm_update': 0.030945948595125998, 'temp_loss': 4.742010421301487, 'temp': 0.9462443657175323, 'alpha_loss': -18.859294609205257, 'alpha': 1.0786821411911554, 'critic_loss': 271.63459841034114, 'actor_loss': 3.987603451373309, 'time_step': 0.03137969406398796, 'td_error': 1.0537395706343513, 'init_value': -7.21973180770874, 'ave_value': -6.241295125033404} step=845
2022-04-17 14:57.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145707/model_845.pt


Epoch 6/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:57.39 [info     ] CQL_20220417145707: epoch=6 step=1014 epoch=6 metrics={'time_sample_batch': 0.0003499716696654551, 'time_algorithm_update': 0.030654481176793928, 'temp_loss': 4.70225599108363, 'temp': 0.935148308968403, 'alpha_loss': -19.147455204168015, 'alpha': 1.097563907239564, 'critic_loss': 385.5467614168246, 'actor_loss': 3.1319641121745816, 'time_step': 0.031080851188072793, 'td_error': 1.0054155845978772, 'init_value': -5.698882102966309, 'ave_value': -5.003935461924956} step=1014
2022-04-17 14:57.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145707/model_1014.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.00000000e-0

Epoch 1/10:   0%|          | 0/77 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-17 14:57.40 [info     ] FQE_20220417145740: epoch=1 step=77 epoch=1 metrics={'time_sample_batch': 0.00015665958454082538, 'time_algorithm_update': 0.003935795325737495, 'loss': 0.011592360134780794, 'time_step': 0.004167934516807656, 'init_value': -0.14914891123771667, 'ave_value': -0.14391296337618753, 'soft_opc': nan} step=77




2022-04-17 14:57.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145740/model_77.pt


Epoch 2/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:57.40 [info     ] FQE_20220417145740: epoch=2 step=154 epoch=2 metrics={'time_sample_batch': 0.00018082965504039419, 'time_algorithm_update': 0.004312087963153789, 'loss': 0.008763834289819389, 'time_step': 0.004574267895190747, 'init_value': -0.3116249442100525, 'ave_value': -0.2772611115899709, 'soft_opc': nan} step=154




2022-04-17 14:57.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145740/model_154.pt


Epoch 3/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:57.41 [info     ] FQE_20220417145740: epoch=3 step=231 epoch=3 metrics={'time_sample_batch': 0.00016450262688971184, 'time_algorithm_update': 0.003979704596779563, 'loss': 0.007163421489757958, 'time_step': 0.004236060303527039, 'init_value': -0.4057438373565674, 'ave_value': -0.3409086113510368, 'soft_opc': nan} step=231




2022-04-17 14:57.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145740/model_231.pt


Epoch 4/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:57.41 [info     ] FQE_20220417145740: epoch=4 step=308 epoch=4 metrics={'time_sample_batch': 0.0001636913844517299, 'time_algorithm_update': 0.004164017640150987, 'loss': 0.005605742169011917, 'time_step': 0.0044049783186479044, 'init_value': -0.5452725887298584, 'ave_value': -0.4839055822423852, 'soft_opc': nan} step=308




2022-04-17 14:57.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145740/model_308.pt


Epoch 5/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:57.41 [info     ] FQE_20220417145740: epoch=5 step=385 epoch=5 metrics={'time_sample_batch': 0.0001690047127859933, 'time_algorithm_update': 0.00403258397981718, 'loss': 0.0053618116540132795, 'time_step': 0.004286484284834428, 'init_value': -0.49803346395492554, 'ave_value': -0.40396647690921217, 'soft_opc': nan} step=385




2022-04-17 14:57.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145740/model_385.pt


Epoch 6/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:57.42 [info     ] FQE_20220417145740: epoch=6 step=462 epoch=6 metrics={'time_sample_batch': 0.00016948464628937956, 'time_algorithm_update': 0.003912470557472922, 'loss': 0.005363173940061749, 'time_step': 0.00416275742766145, 'init_value': -0.5940610766410828, 'ave_value': -0.4693230048468051, 'soft_opc': nan} step=462




2022-04-17 14:57.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145740/model_462.pt


Epoch 7/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:57.42 [info     ] FQE_20220417145740: epoch=7 step=539 epoch=7 metrics={'time_sample_batch': 0.00016293587622704444, 'time_algorithm_update': 0.0040388819459196805, 'loss': 0.004933879251120153, 'time_step': 0.004297888124143922, 'init_value': -0.6556713581085205, 'ave_value': -0.5106521824248941, 'soft_opc': nan} step=539




2022-04-17 14:57.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145740/model_539.pt


Epoch 8/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:57.43 [info     ] FQE_20220417145740: epoch=8 step=616 epoch=8 metrics={'time_sample_batch': 0.00017761255239511466, 'time_algorithm_update': 0.004345828836614435, 'loss': 0.0044740192447002836, 'time_step': 0.004637532419972606, 'init_value': -0.767186164855957, 'ave_value': -0.607286412099393, 'soft_opc': nan} step=616




2022-04-17 14:57.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145740/model_616.pt


Epoch 9/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:57.43 [info     ] FQE_20220417145740: epoch=9 step=693 epoch=9 metrics={'time_sample_batch': 0.00016845975603376116, 'time_algorithm_update': 0.004159196630700842, 'loss': 0.004023946643286905, 'time_step': 0.004413880311049424, 'init_value': -0.7589762210845947, 'ave_value': -0.5877517053144271, 'soft_opc': nan} step=693




2022-04-17 14:57.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145740/model_693.pt


Epoch 10/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:57.43 [info     ] FQE_20220417145740: epoch=10 step=770 epoch=10 metrics={'time_sample_batch': 0.00016093873358392097, 'time_algorithm_update': 0.004247495106288365, 'loss': 0.0042680686660256095, 'time_step': 0.004479572370454862, 'init_value': -0.8148214817047119, 'ave_value': -0.6235669963464544, 'soft_opc': nan} step=770




2022-04-17 14:57.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145740/model_770.pt
start
[ 0.00000000e+00  7.95731469e+08 -2.55189108e-01  2.54000047e-02
  2.57999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  3.30457556e-01 -3.26640595e-01  6.00000000e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.22789108e-01 -1.71999953e-02
  2.39998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.18067885e-01 -4.36092621e-01  6.00000000e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.89891077e-02  1.38000047e-02
  1.55999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.13215135e-01  3.00794900e-01]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.39310892e-01 -5.61999953e-02
 -4.10001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.81283783e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.95731469

Epoch 1/10:   0%|          | 0/77 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-17 14:57.44 [info     ] FQE_20220417145744: epoch=1 step=77 epoch=1 metrics={'time_sample_batch': 0.00017883560874245384, 'time_algorithm_update': 0.004579618379667208, 'loss': 0.009759772508465624, 'time_step': 0.004842953248457475, 'init_value': -0.6326111555099487, 'ave_value': -0.6165687501162022, 'soft_opc': nan} step=77




2022-04-17 14:57.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145744/model_77.pt


Epoch 2/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:57.44 [info     ] FQE_20220417145744: epoch=2 step=154 epoch=2 metrics={'time_sample_batch': 0.00015081678118024553, 'time_algorithm_update': 0.003761115012230811, 'loss': 0.006251365372717574, 'time_step': 0.003989408542583515, 'init_value': -0.6651951670646667, 'ave_value': -0.6326027486238394, 'soft_opc': nan} step=154




2022-04-17 14:57.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145744/model_154.pt


Epoch 3/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:57.45 [info     ] FQE_20220417145744: epoch=3 step=231 epoch=3 metrics={'time_sample_batch': 0.00016073747114701703, 'time_algorithm_update': 0.004206768878094562, 'loss': 0.0046989901698883865, 'time_step': 0.00442930320640663, 'init_value': -0.6802911162376404, 'ave_value': -0.6339999920076078, 'soft_opc': nan} step=231




2022-04-17 14:57.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145744/model_231.pt


Epoch 4/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:57.45 [info     ] FQE_20220417145744: epoch=4 step=308 epoch=4 metrics={'time_sample_batch': 0.00017199268588772069, 'time_algorithm_update': 0.004259211676461356, 'loss': 0.003740957575281719, 'time_step': 0.0045162795426009535, 'init_value': -0.6801283359527588, 'ave_value': -0.6383417671059703, 'soft_opc': nan} step=308




2022-04-17 14:57.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145744/model_308.pt


Epoch 5/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:57.45 [info     ] FQE_20220417145744: epoch=5 step=385 epoch=5 metrics={'time_sample_batch': 0.0001566441028149097, 'time_algorithm_update': 0.004076638779082856, 'loss': 0.00359588507532638, 'time_step': 0.00431012488030768, 'init_value': -0.6803278923034668, 'ave_value': -0.6534164805401553, 'soft_opc': nan} step=385




2022-04-17 14:57.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145744/model_385.pt


Epoch 6/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:57.46 [info     ] FQE_20220417145744: epoch=6 step=462 epoch=6 metrics={'time_sample_batch': 0.00018471556824523134, 'time_algorithm_update': 0.004520258346161285, 'loss': 0.0032602805484013703, 'time_step': 0.004817399111661044, 'init_value': -0.6513347625732422, 'ave_value': -0.6318214891864373, 'soft_opc': nan} step=462




2022-04-17 14:57.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145744/model_462.pt


Epoch 7/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:57.46 [info     ] FQE_20220417145744: epoch=7 step=539 epoch=7 metrics={'time_sample_batch': 0.000158817737133472, 'time_algorithm_update': 0.004212017183179979, 'loss': 0.0030299802991114073, 'time_step': 0.004453767429698597, 'init_value': -0.6330125331878662, 'ave_value': -0.6309524593455298, 'soft_opc': nan} step=539




2022-04-17 14:57.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145744/model_539.pt


Epoch 8/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:57.47 [info     ] FQE_20220417145744: epoch=8 step=616 epoch=8 metrics={'time_sample_batch': 0.00016863315136401684, 'time_algorithm_update': 0.0044573591901110364, 'loss': 0.0025918192408567707, 'time_step': 0.004700385130845107, 'init_value': -0.6447063684463501, 'ave_value': -0.6505661356019544, 'soft_opc': nan} step=616




2022-04-17 14:57.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145744/model_616.pt


Epoch 9/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:57.47 [info     ] FQE_20220417145744: epoch=9 step=693 epoch=9 metrics={'time_sample_batch': 0.00015961969053590453, 'time_algorithm_update': 0.004082311283458363, 'loss': 0.002433488451627797, 'time_step': 0.004327269343586711, 'init_value': -0.6150896549224854, 'ave_value': -0.629624031799602, 'soft_opc': nan} step=693




2022-04-17 14:57.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145744/model_693.pt


Epoch 10/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:57.48 [info     ] FQE_20220417145744: epoch=10 step=770 epoch=10 metrics={'time_sample_batch': 0.0001720700945172991, 'time_algorithm_update': 0.004356158244145381, 'loss': 0.0024863389863439096, 'time_step': 0.004598493699903612, 'init_value': -0.6080725193023682, 'ave_value': -0.6407434296701943, 'soft_opc': nan} step=770




2022-04-17 14:57.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145744/model_770.pt
search iteration:  10
using hyper params:  [0.004243437033318438, 0.0030796914604841862, 6.614981787390904e-05, 3]
2022-04-17 14:57.48 [debug    ] RoundIterator is selected.
2022-04-17 14:57.48 [info     ] Directory is created at d3rlpy_logs/CQL_20220417145748
2022-04-17 14:57.48 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-17 14:57.48 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-17 14:57.48 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220417145748/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': True, 'dropout_rate': 0.2, 'use_dense': False}}, 'actor_learning_rate': 0.004243437033318438, 'actor_optim_factory': {'optim_

Epoch 1/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:57.53 [info     ] CQL_20220417145748: epoch=1 step=169 epoch=1 metrics={'time_sample_batch': 0.00032977521772215354, 'time_algorithm_update': 0.031330873274944236, 'temp_loss': 4.777621087237928, 'temp': 0.9940809392364773, 'alpha_loss': -18.940909064027686, 'alpha': 1.0088363512027898, 'critic_loss': 60.375942275368956, 'actor_loss': -0.7358249441869337, 'time_step': 0.03173926031801122, 'td_error': 0.8549602567798005, 'init_value': -2.889810085296631, 'ave_value': -2.536912887300457} step=169
2022-04-17 14:57.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145748/model_169.pt


Epoch 2/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:57.58 [info     ] CQL_20220417145748: epoch=2 step=338 epoch=2 metrics={'time_sample_batch': 0.00032843640570104474, 'time_algorithm_update': 0.03058099605628019, 'temp_loss': 4.91298863450451, 'temp': 0.9826788062880025, 'alpha_loss': -18.20006914533807, 'alpha': 1.0259886246461134, 'critic_loss': 48.17850817166842, 'actor_loss': 0.7653402809119789, 'time_step': 0.030996180144992804, 'td_error': 1.083441793542205, 'init_value': -4.305008411407471, 'ave_value': -3.653663363116416} step=338
2022-04-17 14:57.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145748/model_338.pt


Epoch 3/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:58.04 [info     ] CQL_20220417145748: epoch=3 step=507 epoch=3 metrics={'time_sample_batch': 0.0003137122949904944, 'time_algorithm_update': 0.03071535268478845, 'temp_loss': 4.895886412739048, 'temp': 0.9716488807159063, 'alpha_loss': -18.310757687811314, 'alpha': 1.0431228622176942, 'critic_loss': 63.77422540427665, 'actor_loss': 1.508662134937986, 'time_step': 0.031101140750230415, 'td_error': 1.0630418836962272, 'init_value': -5.472828388214111, 'ave_value': -4.655639891033774} step=507
2022-04-17 14:58.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145748/model_507.pt


Epoch 4/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:58.09 [info     ] CQL_20220417145748: epoch=4 step=676 epoch=4 metrics={'time_sample_batch': 0.000324281715077056, 'time_algorithm_update': 0.031230466605643548, 'temp_loss': 4.842169778586845, 'temp': 0.9608500017922306, 'alpha_loss': -18.6755432490061, 'alpha': 1.0608891474424735, 'critic_loss': 83.64506747454581, 'actor_loss': 2.3728077108338033, 'time_step': 0.03163297218683909, 'td_error': 1.1353299110779815, 'init_value': -6.887836456298828, 'ave_value': -5.917078912067521} step=676
2022-04-17 14:58.09 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145748/model_676.pt


Epoch 5/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:58.15 [info     ] CQL_20220417145748: epoch=5 step=845 epoch=5 metrics={'time_sample_batch': 0.0003194667883878629, 'time_algorithm_update': 0.03111342001243456, 'temp_loss': 4.734030142338319, 'temp': 0.9503102390723821, 'alpha_loss': -18.967501657248953, 'alpha': 1.0792448979157667, 'critic_loss': 110.02030840992222, 'actor_loss': 3.327643343682825, 'time_step': 0.03152204124179817, 'td_error': 1.2021514507045854, 'init_value': -7.796298980712891, 'ave_value': -6.549240668663034} step=845
2022-04-17 14:58.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145748/model_845.pt


Epoch 6/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:58.20 [info     ] CQL_20220417145748: epoch=6 step=1014 epoch=6 metrics={'time_sample_batch': 0.00031761587018797386, 'time_algorithm_update': 0.031434857633692276, 'temp_loss': 4.7382261259315985, 'temp': 0.9399637276604331, 'alpha_loss': -19.263783020380686, 'alpha': 1.0980811464715992, 'critic_loss': 142.5100045288808, 'actor_loss': 3.642280746493819, 'time_step': 0.031828423223551915, 'td_error': 1.1942510996667144, 'init_value': -8.642354965209961, 'ave_value': -7.3038697263034615} step=1014
2022-04-17 14:58.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145748/model_1014.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.000000

Epoch 1/10:   0%|          | 0/77 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-17 14:58.21 [info     ] FQE_20220417145821: epoch=1 step=77 epoch=1 metrics={'time_sample_batch': 0.00014602673518193231, 'time_algorithm_update': 0.0037656821213759386, 'loss': 0.011210216185102215, 'time_step': 0.003983358284095665, 'init_value': -0.048795752227306366, 'ave_value': -0.010430316383707094, 'soft_opc': nan} step=77




2022-04-17 14:58.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145821/model_77.pt


Epoch 2/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:58.21 [info     ] FQE_20220417145821: epoch=2 step=154 epoch=2 metrics={'time_sample_batch': 0.0001796561402159852, 'time_algorithm_update': 0.004446215443796925, 'loss': 0.007165123267075071, 'time_step': 0.004710513275939149, 'init_value': -0.1518348902463913, 'ave_value': -0.07595193189157694, 'soft_opc': nan} step=154




2022-04-17 14:58.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145821/model_154.pt


Epoch 3/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:58.22 [info     ] FQE_20220417145821: epoch=3 step=231 epoch=3 metrics={'time_sample_batch': 0.00016896446029861253, 'time_algorithm_update': 0.00410987494827865, 'loss': 0.005507275933859411, 'time_step': 0.004354901128001027, 'init_value': -0.2600889801979065, 'ave_value': -0.15348977219011334, 'soft_opc': nan} step=231




2022-04-17 14:58.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145821/model_231.pt


Epoch 4/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:58.22 [info     ] FQE_20220417145821: epoch=4 step=308 epoch=4 metrics={'time_sample_batch': 0.00016542843409946987, 'time_algorithm_update': 0.004251195238782214, 'loss': 0.004533004067088296, 'time_step': 0.004497865577796837, 'init_value': -0.30002427101135254, 'ave_value': -0.1978096452818529, 'soft_opc': nan} step=308




2022-04-17 14:58.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145821/model_308.pt


Epoch 5/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:58.22 [info     ] FQE_20220417145821: epoch=5 step=385 epoch=5 metrics={'time_sample_batch': 0.00017029279238217837, 'time_algorithm_update': 0.00404657946004496, 'loss': 0.004131112132802025, 'time_step': 0.00430287943257914, 'init_value': -0.29917648434638977, 'ave_value': -0.17611554810475255, 'soft_opc': nan} step=385




2022-04-17 14:58.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145821/model_385.pt


Epoch 6/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:58.23 [info     ] FQE_20220417145821: epoch=6 step=462 epoch=6 metrics={'time_sample_batch': 0.00016692396882292512, 'time_algorithm_update': 0.004075926619690734, 'loss': 0.003971748921850865, 'time_step': 0.0043261329849045, 'init_value': -0.3558916449546814, 'ave_value': -0.20317936304134426, 'soft_opc': nan} step=462




2022-04-17 14:58.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145821/model_462.pt


Epoch 7/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:58.23 [info     ] FQE_20220417145821: epoch=7 step=539 epoch=7 metrics={'time_sample_batch': 0.00015893230190524806, 'time_algorithm_update': 0.004103964025324041, 'loss': 0.0036372544739830806, 'time_step': 0.004331375097299551, 'init_value': -0.4111604690551758, 'ave_value': -0.23118322652991155, 'soft_opc': nan} step=539




2022-04-17 14:58.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145821/model_539.pt


Epoch 8/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:58.24 [info     ] FQE_20220417145821: epoch=8 step=616 epoch=8 metrics={'time_sample_batch': 0.00016681559674151531, 'time_algorithm_update': 0.004199272626406187, 'loss': 0.003381347537718036, 'time_step': 0.004448673941872336, 'init_value': -0.4663546085357666, 'ave_value': -0.2768030348997395, 'soft_opc': nan} step=616




2022-04-17 14:58.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145821/model_616.pt


Epoch 9/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:58.24 [info     ] FQE_20220417145821: epoch=9 step=693 epoch=9 metrics={'time_sample_batch': 0.00017054978903237875, 'time_algorithm_update': 0.004353928875613523, 'loss': 0.0031517876777797937, 'time_step': 0.004602850257576286, 'init_value': -0.47768688201904297, 'ave_value': -0.2761117955854347, 'soft_opc': nan} step=693




2022-04-17 14:58.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145821/model_693.pt


Epoch 10/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:58.24 [info     ] FQE_20220417145821: epoch=10 step=770 epoch=10 metrics={'time_sample_batch': 0.00015666268088600853, 'time_algorithm_update': 0.004162992749895368, 'loss': 0.0031079840362555795, 'time_step': 0.0044070250028139584, 'init_value': -0.5163227915763855, 'ave_value': -0.2826335771865136, 'soft_opc': nan} step=770




2022-04-17 14:58.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145821/model_770.pt
start
[ 0.00000000e+00  7.95731469e+08 -2.55189108e-01  2.54000047e-02
  2.57999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  3.30457556e-01 -3.26640595e-01  6.00000000e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.22789108e-01 -1.71999953e-02
  2.39998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.18067885e-01 -4.36092621e-01  6.00000000e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.89891077e-02  1.38000047e-02
  1.55999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.13215135e-01  3.00794900e-01]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.39310892e-01 -5.61999953e-02
 -4.10001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.81283783e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.95731469

Epoch 1/10:   0%|          | 0/77 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-17 14:58.25 [info     ] FQE_20220417145824: epoch=1 step=77 epoch=1 metrics={'time_sample_batch': 0.00016706020801098316, 'time_algorithm_update': 0.004210837475665204, 'loss': 0.009377234149724245, 'time_step': 0.004468660850029487, 'init_value': -0.13290052115917206, 'ave_value': -0.10108956610196614, 'soft_opc': nan} step=77




2022-04-17 14:58.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145824/model_77.pt


Epoch 2/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:58.25 [info     ] FQE_20220417145824: epoch=2 step=154 epoch=2 metrics={'time_sample_batch': 0.00015309259488985137, 'time_algorithm_update': 0.0038509492750291702, 'loss': 0.006448822827743632, 'time_step': 0.004074099776032683, 'init_value': -0.19383852183818817, 'ave_value': -0.13726941374810162, 'soft_opc': nan} step=154




2022-04-17 14:58.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145824/model_154.pt


Epoch 3/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:58.26 [info     ] FQE_20220417145824: epoch=3 step=231 epoch=3 metrics={'time_sample_batch': 0.00016651834760393416, 'time_algorithm_update': 0.004053292336402002, 'loss': 0.004706317144627502, 'time_step': 0.004303953864357688, 'init_value': -0.21478509902954102, 'ave_value': -0.14538767286114865, 'soft_opc': nan} step=231




2022-04-17 14:58.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145824/model_231.pt


Epoch 4/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:58.26 [info     ] FQE_20220417145824: epoch=4 step=308 epoch=4 metrics={'time_sample_batch': 0.0001723363802030489, 'time_algorithm_update': 0.004207555349771078, 'loss': 0.0036253506837823947, 'time_step': 0.004464697528195071, 'init_value': -0.2188187539577484, 'ave_value': -0.15339236407465226, 'soft_opc': nan} step=308




2022-04-17 14:58.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145824/model_308.pt


Epoch 5/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:58.26 [info     ] FQE_20220417145824: epoch=5 step=385 epoch=5 metrics={'time_sample_batch': 0.00016207818861131544, 'time_algorithm_update': 0.004144160778491528, 'loss': 0.00329298125247338, 'time_step': 0.004410387633682846, 'init_value': -0.219618558883667, 'ave_value': -0.16076079067346213, 'soft_opc': nan} step=385




2022-04-17 14:58.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145824/model_385.pt


Epoch 6/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:58.27 [info     ] FQE_20220417145824: epoch=6 step=462 epoch=6 metrics={'time_sample_batch': 0.00016486180293095575, 'time_algorithm_update': 0.004265342439923968, 'loss': 0.003016231515045677, 'time_step': 0.004514028499652813, 'init_value': -0.24969664216041565, 'ave_value': -0.18474408713681204, 'soft_opc': nan} step=462




2022-04-17 14:58.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145824/model_462.pt


Epoch 7/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:58.27 [info     ] FQE_20220417145824: epoch=7 step=539 epoch=7 metrics={'time_sample_batch': 0.00017123098497266894, 'time_algorithm_update': 0.004340190392035943, 'loss': 0.002721950977242419, 'time_step': 0.004600292676455015, 'init_value': -0.24385736882686615, 'ave_value': -0.17673425046844526, 'soft_opc': nan} step=539




2022-04-17 14:58.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145824/model_539.pt


Epoch 8/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:58.28 [info     ] FQE_20220417145824: epoch=8 step=616 epoch=8 metrics={'time_sample_batch': 0.00016081487977659547, 'time_algorithm_update': 0.0043153050657990695, 'loss': 0.002434825739375763, 'time_step': 0.004544762821940633, 'init_value': -0.25114932656288147, 'ave_value': -0.19171095730082408, 'soft_opc': nan} step=616




2022-04-17 14:58.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145824/model_616.pt


Epoch 9/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:58.28 [info     ] FQE_20220417145824: epoch=9 step=693 epoch=9 metrics={'time_sample_batch': 0.0001595422819063261, 'time_algorithm_update': 0.0038233453577215023, 'loss': 0.0021820623169096745, 'time_step': 0.004053571007468484, 'init_value': -0.24830514192581177, 'ave_value': -0.18731126812295065, 'soft_opc': nan} step=693




2022-04-17 14:58.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145824/model_693.pt


Epoch 10/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:58.28 [info     ] FQE_20220417145824: epoch=10 step=770 epoch=10 metrics={'time_sample_batch': 0.00015334339884968547, 'time_algorithm_update': 0.003873478282581676, 'loss': 0.002396786425190119, 'time_step': 0.004108085260762797, 'init_value': -0.25942516326904297, 'ave_value': -0.19983553043692498, 'soft_opc': nan} step=770




2022-04-17 14:58.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145824/model_770.pt
search iteration:  11
using hyper params:  [0.008547500892348521, 0.0008729760701007767, 7.002012577385446e-05, 5]
2022-04-17 14:58.28 [debug    ] RoundIterator is selected.
2022-04-17 14:58.28 [info     ] Directory is created at d3rlpy_logs/CQL_20220417145828
2022-04-17 14:58.28 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-17 14:58.28 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-17 14:58.28 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220417145828/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': True, 'dropout_rate': 0.2, 'use_dense': False}}, 'actor_learning_rate': 0.008547500892348521, 'actor_optim_factory': {'optim_

Epoch 1/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:58.34 [info     ] CQL_20220417145828: epoch=1 step=169 epoch=1 metrics={'time_sample_batch': 0.0003525138606686564, 'time_algorithm_update': 0.03067872792306031, 'temp_loss': 4.900160003695968, 'temp': 0.9938401227166667, 'alpha_loss': -17.93402133467635, 'alpha': 1.0086688275873308, 'critic_loss': 136.77023103251258, 'actor_loss': -1.7968782853974392, 'time_step': 0.031111000557622965, 'td_error': 0.6502394070512566, 'init_value': -0.04299561679363251, 'ave_value': 0.09067339080383766} step=169
2022-04-17 14:58.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145828/model_169.pt


Epoch 2/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:58.39 [info     ] CQL_20220417145828: epoch=2 step=338 epoch=2 metrics={'time_sample_batch': 0.0003373862723627034, 'time_algorithm_update': 0.02968504725123298, 'temp_loss': 4.9077657835018, 'temp': 0.9819971524046723, 'alpha_loss': -19.765853780261157, 'alpha': 1.0266746705805763, 'critic_loss': 115.95460686316856, 'actor_loss': -0.8939204956763066, 'time_step': 0.03009786126176281, 'td_error': 1.2518022609678943, 'init_value': -2.231355667114258, 'ave_value': -1.7165777589429405} step=338
2022-04-17 14:58.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145828/model_338.pt


Epoch 3/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:58.44 [info     ] CQL_20220417145828: epoch=3 step=507 epoch=3 metrics={'time_sample_batch': 0.0003391469018699149, 'time_algorithm_update': 0.03088123840693186, 'temp_loss': 4.88144816449408, 'temp': 0.9704868014042194, 'alpha_loss': -19.29337008323895, 'alpha': 1.0449091633396035, 'critic_loss': 92.53729780750162, 'actor_loss': 0.8653883090650542, 'time_step': 0.03129630540249616, 'td_error': 0.9099628274327906, 'init_value': -5.207850456237793, 'ave_value': -4.423846837840639} step=507
2022-04-17 14:58.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145828/model_507.pt


Epoch 4/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:58.50 [info     ] CQL_20220417145828: epoch=4 step=676 epoch=4 metrics={'time_sample_batch': 0.00033821862124832426, 'time_algorithm_update': 0.031219562835241917, 'temp_loss': 4.834818467585998, 'temp': 0.9591485656930144, 'alpha_loss': -18.988934014675884, 'alpha': 1.0627196625139586, 'critic_loss': 91.54233681943995, 'actor_loss': 2.576955171731802, 'time_step': 0.03162737287713226, 'td_error': 1.0928249055201003, 'init_value': -7.147473335266113, 'ave_value': -6.120570427469305} step=676
2022-04-17 14:58.50 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145828/model_676.pt


Epoch 5/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:58.56 [info     ] CQL_20220417145828: epoch=5 step=845 epoch=5 metrics={'time_sample_batch': 0.00034163407320101583, 'time_algorithm_update': 0.031198416941264677, 'temp_loss': 4.782179826815453, 'temp': 0.948021795975386, 'alpha_loss': -19.097330646402032, 'alpha': 1.0807861766871616, 'critic_loss': 103.69029073320196, 'actor_loss': 4.090213712150528, 'time_step': 0.03162300234010234, 'td_error': 1.1076621730349725, 'init_value': -9.607667922973633, 'ave_value': -8.183198294242223} step=845
2022-04-17 14:58.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145828/model_845.pt


Epoch 6/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:59.01 [info     ] CQL_20220417145828: epoch=6 step=1014 epoch=6 metrics={'time_sample_batch': 0.0003466704893394335, 'time_algorithm_update': 0.03010297950202897, 'temp_loss': 4.7287072514641215, 'temp': 0.93708518024027, 'alpha_loss': -19.369902943718362, 'alpha': 1.099347611150798, 'critic_loss': 122.21680256318764, 'actor_loss': 5.660121827435916, 'time_step': 0.030528175760303023, 'td_error': 1.2017889778848627, 'init_value': -11.331098556518555, 'ave_value': -9.738673495365692} step=1014
2022-04-17 14:59.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145828/model_1014.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.00000000e-

Epoch 1/10:   0%|          | 0/77 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-17 14:59.01 [info     ] FQE_20220417145901: epoch=1 step=77 epoch=1 metrics={'time_sample_batch': 0.0001607498565277496, 'time_algorithm_update': 0.004109670589496563, 'loss': 0.012448640667240728, 'time_step': 0.004355108583128297, 'init_value': -0.39328789710998535, 'ave_value': -0.35924540290961393, 'soft_opc': nan} step=77




2022-04-17 14:59.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145901/model_77.pt


Epoch 2/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:59.02 [info     ] FQE_20220417145901: epoch=2 step=154 epoch=2 metrics={'time_sample_batch': 0.00015888276038231788, 'time_algorithm_update': 0.0038806401289902723, 'loss': 0.00858607128649563, 'time_step': 0.00410930212441977, 'init_value': -0.5333283543586731, 'ave_value': -0.46684245330256385, 'soft_opc': nan} step=154




2022-04-17 14:59.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145901/model_154.pt


Epoch 3/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:59.02 [info     ] FQE_20220417145901: epoch=3 step=231 epoch=3 metrics={'time_sample_batch': 0.00015944939155083197, 'time_algorithm_update': 0.0038908487790590756, 'loss': 0.006853723305870186, 'time_step': 0.004123833272364232, 'init_value': -0.6052134037017822, 'ave_value': -0.5146660252465858, 'soft_opc': nan} step=231




2022-04-17 14:59.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145901/model_231.pt


Epoch 4/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:59.02 [info     ] FQE_20220417145901: epoch=4 step=308 epoch=4 metrics={'time_sample_batch': 0.000161301005970348, 'time_algorithm_update': 0.004142197695645419, 'loss': 0.005337100615120167, 'time_step': 0.004384759184602019, 'init_value': -0.7141352891921997, 'ave_value': -0.6124186412305446, 'soft_opc': nan} step=308




2022-04-17 14:59.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145901/model_308.pt


Epoch 5/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:59.03 [info     ] FQE_20220417145901: epoch=5 step=385 epoch=5 metrics={'time_sample_batch': 0.00015990765063793628, 'time_algorithm_update': 0.003929082449380453, 'loss': 0.004985286279690343, 'time_step': 0.004174932256921545, 'init_value': -0.6766952276229858, 'ave_value': -0.546831284557377, 'soft_opc': nan} step=385




2022-04-17 14:59.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145901/model_385.pt


Epoch 6/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:59.03 [info     ] FQE_20220417145901: epoch=6 step=462 epoch=6 metrics={'time_sample_batch': 0.00016811606171843294, 'time_algorithm_update': 0.004149966425709911, 'loss': 0.004734928834602818, 'time_step': 0.004417094317349521, 'init_value': -0.751701295375824, 'ave_value': -0.5945190834972236, 'soft_opc': nan} step=462




2022-04-17 14:59.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145901/model_462.pt


Epoch 7/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:59.04 [info     ] FQE_20220417145901: epoch=7 step=539 epoch=7 metrics={'time_sample_batch': 0.0001535539503221388, 'time_algorithm_update': 0.003906702066396738, 'loss': 0.004379072658983725, 'time_step': 0.004134323689844701, 'init_value': -0.7721802592277527, 'ave_value': -0.5942281085494402, 'soft_opc': nan} step=539




2022-04-17 14:59.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145901/model_539.pt


Epoch 8/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:59.04 [info     ] FQE_20220417145901: epoch=8 step=616 epoch=8 metrics={'time_sample_batch': 0.00016471317836216519, 'time_algorithm_update': 0.004131889962530755, 'loss': 0.004015961843361328, 'time_step': 0.004386146347244065, 'init_value': -0.8060303330421448, 'ave_value': -0.6135567300491505, 'soft_opc': nan} step=616




2022-04-17 14:59.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145901/model_616.pt


Epoch 9/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:59.04 [info     ] FQE_20220417145901: epoch=9 step=693 epoch=9 metrics={'time_sample_batch': 0.00017258099147251675, 'time_algorithm_update': 0.004278892046445376, 'loss': 0.0038262811586164034, 'time_step': 0.004530891195520178, 'init_value': -0.7859908938407898, 'ave_value': -0.5889490010032246, 'soft_opc': nan} step=693




2022-04-17 14:59.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145901/model_693.pt


Epoch 10/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:59.05 [info     ] FQE_20220417145901: epoch=10 step=770 epoch=10 metrics={'time_sample_batch': 0.00016567923805930397, 'time_algorithm_update': 0.004313224321836001, 'loss': 0.003860246680386655, 'time_step': 0.004568886447262454, 'init_value': -0.8561015725135803, 'ave_value': -0.6408240009602663, 'soft_opc': nan} step=770




2022-04-17 14:59.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145901/model_770.pt
start
[ 0.00000000e+00  7.95731469e+08 -2.55189108e-01  2.54000047e-02
  2.57999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  3.30457556e-01 -3.26640595e-01  6.00000000e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.22789108e-01 -1.71999953e-02
  2.39998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.18067885e-01 -4.36092621e-01  6.00000000e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.89891077e-02  1.38000047e-02
  1.55999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.13215135e-01  3.00794900e-01]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.39310892e-01 -5.61999953e-02
 -4.10001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.81283783e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.95731469

Epoch 1/10:   0%|          | 0/77 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-17 14:59.05 [info     ] FQE_20220417145905: epoch=1 step=77 epoch=1 metrics={'time_sample_batch': 0.00018469389382894937, 'time_algorithm_update': 0.004418212097960633, 'loss': 0.0024741774306983335, 'time_step': 0.0047081538609095985, 'init_value': -0.0019351579248905182, 'ave_value': 0.01889292297230379, 'soft_opc': nan} step=77




2022-04-17 14:59.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145905/model_77.pt


Epoch 2/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:59.06 [info     ] FQE_20220417145905: epoch=2 step=154 epoch=2 metrics={'time_sample_batch': 0.00017422205441957944, 'time_algorithm_update': 0.0042457332858791596, 'loss': 0.0008817420782545558, 'time_step': 0.004508684207866718, 'init_value': -0.0594012588262558, 'ave_value': -0.013446852380225251, 'soft_opc': nan} step=154




2022-04-17 14:59.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145905/model_154.pt


Epoch 3/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:59.06 [info     ] FQE_20220417145905: epoch=3 step=231 epoch=3 metrics={'time_sample_batch': 0.00016232279988078328, 'time_algorithm_update': 0.0043743585611318615, 'loss': 0.0007660989420920876, 'time_step': 0.004613204435868697, 'init_value': -0.1136551946401596, 'ave_value': -0.052463266058816564, 'soft_opc': nan} step=231




2022-04-17 14:59.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145905/model_231.pt


Epoch 4/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:59.06 [info     ] FQE_20220417145905: epoch=4 step=308 epoch=4 metrics={'time_sample_batch': 0.00017654431330693231, 'time_algorithm_update': 0.00440731296291599, 'loss': 0.0006409980428129943, 'time_step': 0.004658962224985098, 'init_value': -0.13292361795902252, 'ave_value': -0.06145310037442156, 'soft_opc': nan} step=308




2022-04-17 14:59.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145905/model_308.pt


Epoch 5/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:59.07 [info     ] FQE_20220417145905: epoch=5 step=385 epoch=5 metrics={'time_sample_batch': 0.00016617155694342278, 'time_algorithm_update': 0.004158955115776557, 'loss': 0.0005968130660823897, 'time_step': 0.004414248776126218, 'init_value': -0.15892332792282104, 'ave_value': -0.07863968572336005, 'soft_opc': nan} step=385




2022-04-17 14:59.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145905/model_385.pt


Epoch 6/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:59.07 [info     ] FQE_20220417145905: epoch=6 step=462 epoch=6 metrics={'time_sample_batch': 0.00015757919906021713, 'time_algorithm_update': 0.0040118167926738784, 'loss': 0.0005869923483401948, 'time_step': 0.004258967065191888, 'init_value': -0.2048627734184265, 'ave_value': -0.11358539305316905, 'soft_opc': nan} step=462




2022-04-17 14:59.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145905/model_462.pt


Epoch 7/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:59.08 [info     ] FQE_20220417145905: epoch=7 step=539 epoch=7 metrics={'time_sample_batch': 0.00017344796812379516, 'time_algorithm_update': 0.004303148814610073, 'loss': 0.0005556598227084071, 'time_step': 0.004562480109078544, 'init_value': -0.24024181067943573, 'ave_value': -0.13710021665739314, 'soft_opc': nan} step=539




2022-04-17 14:59.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145905/model_539.pt


Epoch 8/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:59.08 [info     ] FQE_20220417145905: epoch=8 step=616 epoch=8 metrics={'time_sample_batch': 0.00016539127795727222, 'time_algorithm_update': 0.004210020040536856, 'loss': 0.0005306572252325108, 'time_step': 0.004443277012218128, 'init_value': -0.2747355103492737, 'ave_value': -0.15840769897441606, 'soft_opc': nan} step=616




2022-04-17 14:59.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145905/model_616.pt


Epoch 9/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:59.08 [info     ] FQE_20220417145905: epoch=9 step=693 epoch=9 metrics={'time_sample_batch': 0.00015610843509822697, 'time_algorithm_update': 0.003855256291178914, 'loss': 0.00045429520889536125, 'time_step': 0.004098334869781098, 'init_value': -0.2849985659122467, 'ave_value': -0.1660719792551554, 'soft_opc': nan} step=693




2022-04-17 14:59.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145905/model_693.pt


Epoch 10/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:59.09 [info     ] FQE_20220417145905: epoch=10 step=770 epoch=10 metrics={'time_sample_batch': 0.00016565446729783888, 'time_algorithm_update': 0.004152195794241769, 'loss': 0.0005508068856003078, 'time_step': 0.004397804086858576, 'init_value': -0.32237017154693604, 'ave_value': -0.19137392814631934, 'soft_opc': nan} step=770




2022-04-17 14:59.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145905/model_770.pt
search iteration:  12
using hyper params:  [0.003923832417721357, 0.001981820685149067, 3.561894198027083e-05, 3]
2022-04-17 14:59.09 [debug    ] RoundIterator is selected.
2022-04-17 14:59.09 [info     ] Directory is created at d3rlpy_logs/CQL_20220417145909
2022-04-17 14:59.09 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-17 14:59.09 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-17 14:59.09 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220417145909/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': True, 'dropout_rate': 0.2, 'use_dense': False}}, 'actor_learning_rate': 0.003923832417721357, 'actor_optim_factory': {'optim_c

Epoch 1/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:59.14 [info     ] CQL_20220417145909: epoch=1 step=169 epoch=1 metrics={'time_sample_batch': 0.00032652905706823224, 'time_algorithm_update': 0.030189048609084633, 'temp_loss': 4.820217631977691, 'temp': 0.9968181852052903, 'alpha_loss': -18.182465728218034, 'alpha': 1.008742923567281, 'critic_loss': 64.8875814133142, 'actor_loss': -1.2138859462103195, 'time_step': 0.030588767937654575, 'td_error': 0.8466544649135721, 'init_value': -2.38834547996521, 'ave_value': -1.8942607859426455} step=169
2022-04-17 14:59.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145909/model_169.pt


Epoch 2/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:59.20 [info     ] CQL_20220417145909: epoch=2 step=338 epoch=2 metrics={'time_sample_batch': 0.00032765766572670117, 'time_algorithm_update': 0.0322117438683143, 'temp_loss': 4.997605021888687, 'temp': 0.990617307685536, 'alpha_loss': -18.319282723601752, 'alpha': 1.026164134578592, 'critic_loss': 46.04735671274761, 'actor_loss': 0.10128476745145913, 'time_step': 0.032611846923828125, 'td_error': 1.0337599468760406, 'init_value': -4.250754356384277, 'ave_value': -3.2846257613275434} step=338
2022-04-17 14:59.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145909/model_338.pt


Epoch 3/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:59.25 [info     ] CQL_20220417145909: epoch=3 step=507 epoch=3 metrics={'time_sample_batch': 0.0003334304990147698, 'time_algorithm_update': 0.031851839031693495, 'temp_loss': 4.966659777263213, 'temp': 0.9845983544750326, 'alpha_loss': -18.48910956975271, 'alpha': 1.0437634626083825, 'critic_loss': 57.43115234375, 'actor_loss': 1.0283269205036953, 'time_step': 0.032271558716452334, 'td_error': 1.1423240200622253, 'init_value': -5.436224460601807, 'ave_value': -4.327971516932185} step=507
2022-04-17 14:59.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145909/model_507.pt


Epoch 4/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:59.31 [info     ] CQL_20220417145909: epoch=4 step=676 epoch=4 metrics={'time_sample_batch': 0.00032663486412996373, 'time_algorithm_update': 0.03170543591651691, 'temp_loss': 4.937724339186087, 'temp': 0.9786756839272539, 'alpha_loss': -18.656872168095155, 'alpha': 1.0617244427020733, 'critic_loss': 72.7821828176284, 'actor_loss': 1.9156182806872757, 'time_step': 0.032109953242646164, 'td_error': 1.1210760871337269, 'init_value': -6.878053188323975, 'ave_value': -5.6076281857490535} step=676
2022-04-17 14:59.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145909/model_676.pt


Epoch 5/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:59.36 [info     ] CQL_20220417145909: epoch=5 step=845 epoch=5 metrics={'time_sample_batch': 0.00032039930129192285, 'time_algorithm_update': 0.030302696679470807, 'temp_loss': 4.909707486982176, 'temp': 0.9728205278074953, 'alpha_loss': -19.006874784210023, 'alpha': 1.0801795770430707, 'critic_loss': 92.71287843602649, 'actor_loss': 2.8875809816213756, 'time_step': 0.030703186283450156, 'td_error': 1.0922298586193242, 'init_value': -8.250833511352539, 'ave_value': -6.768242860801585} step=845
2022-04-17 14:59.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145909/model_845.pt


Epoch 6/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:59.42 [info     ] CQL_20220417145909: epoch=6 step=1014 epoch=6 metrics={'time_sample_batch': 0.0003255048447106717, 'time_algorithm_update': 0.03166888169282992, 'temp_loss': 4.879546803130201, 'temp': 0.9670247216901836, 'alpha_loss': -19.330582060052095, 'alpha': 1.0991313986524323, 'critic_loss': 117.72424329949554, 'actor_loss': 3.696390613296328, 'time_step': 0.032068143934893184, 'td_error': 1.2120453808664273, 'init_value': -9.085209846496582, 'ave_value': -7.56425399512858} step=1014
2022-04-17 14:59.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145909/model_1014.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.00000000e-

Epoch 1/10:   0%|          | 0/88 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-17 14:59.43 [info     ] FQE_20220417145942: epoch=1 step=88 epoch=1 metrics={'time_sample_batch': 0.00016843730753118342, 'time_algorithm_update': 0.004208358851346103, 'loss': 0.01101663009665737, 'time_step': 0.004454669627276334, 'init_value': -0.5148627758026123, 'ave_value': -0.48083591181952673, 'soft_opc': nan} step=88




2022-04-17 14:59.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145942/model_88.pt


Epoch 2/10:   0%|          | 0/88 [00:00<?, ?it/s]



2022-04-17 14:59.43 [info     ] FQE_20220417145942: epoch=2 step=176 epoch=2 metrics={'time_sample_batch': 0.00015836141326210716, 'time_algorithm_update': 0.003923852335322987, 'loss': 0.008046664122957736, 'time_step': 0.004160385240208019, 'init_value': -0.6360674500465393, 'ave_value': -0.5529446857201087, 'soft_opc': nan} step=176




2022-04-17 14:59.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145942/model_176.pt


Epoch 3/10:   0%|          | 0/88 [00:00<?, ?it/s]



2022-04-17 14:59.43 [info     ] FQE_20220417145942: epoch=3 step=264 epoch=3 metrics={'time_sample_batch': 0.0001806806434284557, 'time_algorithm_update': 0.0043517269871451636, 'loss': 0.006061680043454875, 'time_step': 0.004642269828102805, 'init_value': -0.7164139747619629, 'ave_value': -0.5878264946443541, 'soft_opc': nan} step=264




2022-04-17 14:59.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145942/model_264.pt


Epoch 4/10:   0%|          | 0/88 [00:00<?, ?it/s]



2022-04-17 14:59.44 [info     ] FQE_20220417145942: epoch=4 step=352 epoch=4 metrics={'time_sample_batch': 0.00016398592428727582, 'time_algorithm_update': 0.004127461801875721, 'loss': 0.0051665976881684565, 'time_step': 0.004363861950961026, 'init_value': -0.8183311820030212, 'ave_value': -0.6584947373415972, 'soft_opc': nan} step=352




2022-04-17 14:59.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145942/model_352.pt


Epoch 5/10:   0%|          | 0/88 [00:00<?, ?it/s]



2022-04-17 14:59.44 [info     ] FQE_20220417145942: epoch=5 step=440 epoch=5 metrics={'time_sample_batch': 0.00017276406288146973, 'time_algorithm_update': 0.004341252825476907, 'loss': 0.004637917342320593, 'time_step': 0.004593063484538685, 'init_value': -0.8850728869438171, 'ave_value': -0.6925292454055838, 'soft_opc': nan} step=440




2022-04-17 14:59.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145942/model_440.pt


Epoch 6/10:   0%|          | 0/88 [00:00<?, ?it/s]



2022-04-17 14:59.45 [info     ] FQE_20220417145942: epoch=6 step=528 epoch=6 metrics={'time_sample_batch': 0.00017094883051785556, 'time_algorithm_update': 0.004203769293698398, 'loss': 0.0043576797417534344, 'time_step': 0.004447419535030018, 'init_value': -0.9260687232017517, 'ave_value': -0.699802474744685, 'soft_opc': nan} step=528




2022-04-17 14:59.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145942/model_528.pt


Epoch 7/10:   0%|          | 0/88 [00:00<?, ?it/s]



2022-04-17 14:59.45 [info     ] FQE_20220417145942: epoch=7 step=616 epoch=7 metrics={'time_sample_batch': 0.00017203526063398883, 'time_algorithm_update': 0.004203138026324185, 'loss': 0.004260502401104366, 'time_step': 0.004451591860164295, 'init_value': -0.9858177900314331, 'ave_value': -0.7335445630120802, 'soft_opc': nan} step=616




2022-04-17 14:59.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145942/model_616.pt


Epoch 8/10:   0%|          | 0/88 [00:00<?, ?it/s]



2022-04-17 14:59.46 [info     ] FQE_20220417145942: epoch=8 step=704 epoch=8 metrics={'time_sample_batch': 0.00017988139932805842, 'time_algorithm_update': 0.00470374118198048, 'loss': 0.004143417289014906, 'time_step': 0.004959125410426746, 'init_value': -1.0700597763061523, 'ave_value': -0.8028955629011533, 'soft_opc': nan} step=704




2022-04-17 14:59.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145942/model_704.pt


Epoch 9/10:   0%|          | 0/88 [00:00<?, ?it/s]



2022-04-17 14:59.46 [info     ] FQE_20220417145942: epoch=9 step=792 epoch=9 metrics={'time_sample_batch': 0.00016320293599909002, 'time_algorithm_update': 0.00448194146156311, 'loss': 0.00404035305572589, 'time_step': 0.00472408804026517, 'init_value': -1.0821083784103394, 'ave_value': -0.790982354277963, 'soft_opc': nan} step=792




2022-04-17 14:59.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145942/model_792.pt


Epoch 10/10:   0%|          | 0/88 [00:00<?, ?it/s]



2022-04-17 14:59.47 [info     ] FQE_20220417145942: epoch=10 step=880 epoch=10 metrics={'time_sample_batch': 0.00018423524769869718, 'time_algorithm_update': 0.00448912111195651, 'loss': 0.003989272052422166, 'time_step': 0.004783156243237582, 'init_value': -1.1367719173431396, 'ave_value': -0.827884822466352, 'soft_opc': nan} step=880




2022-04-17 14:59.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145942/model_880.pt
start
[ 0.00000000e+00  7.95731469e+08 -2.55189108e-01  2.54000047e-02
  2.57999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  3.30457556e-01 -3.26640595e-01  6.00000000e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.22789108e-01 -1.71999953e-02
  2.39998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.18067885e-01 -4.36092621e-01  6.00000000e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.89891077e-02  1.38000047e-02
  1.55999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.13215135e-01  3.00794900e-01]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.39310892e-01 -5.61999953e-02
 -4.10001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.81283783e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.95731469

Epoch 1/10:   0%|          | 0/77 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-17 14:59.47 [info     ] FQE_20220417145947: epoch=1 step=77 epoch=1 metrics={'time_sample_batch': 0.0001602018034303343, 'time_algorithm_update': 0.0038368949642429103, 'loss': 0.0012463065534799856, 'time_step': 0.004069901131964349, 'init_value': -0.2514464259147644, 'ave_value': -0.22195502437718279, 'soft_opc': nan} step=77




2022-04-17 14:59.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145947/model_77.pt


Epoch 2/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:59.47 [info     ] FQE_20220417145947: epoch=2 step=154 epoch=2 metrics={'time_sample_batch': 0.00017106068598759638, 'time_algorithm_update': 0.004303628748113459, 'loss': 0.0008367827999732131, 'time_step': 0.004548723047429865, 'init_value': -0.2815752923488617, 'ave_value': -0.2331719994142249, 'soft_opc': nan} step=154




2022-04-17 14:59.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145947/model_154.pt


Epoch 3/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:59.48 [info     ] FQE_20220417145947: epoch=3 step=231 epoch=3 metrics={'time_sample_batch': 0.00017313523726029828, 'time_algorithm_update': 0.004272151302981687, 'loss': 0.0006745167291275674, 'time_step': 0.004512093283913352, 'init_value': -0.32459843158721924, 'ave_value': -0.26039718827148817, 'soft_opc': nan} step=231




2022-04-17 14:59.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145947/model_231.pt


Epoch 4/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:59.48 [info     ] FQE_20220417145947: epoch=4 step=308 epoch=4 metrics={'time_sample_batch': 0.00017278535025460378, 'time_algorithm_update': 0.004182756721199333, 'loss': 0.0006128885326287165, 'time_step': 0.004446017277705205, 'init_value': -0.3538874387741089, 'ave_value': -0.2787127561598748, 'soft_opc': nan} step=308




2022-04-17 14:59.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145947/model_308.pt


Epoch 5/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:59.49 [info     ] FQE_20220417145947: epoch=5 step=385 epoch=5 metrics={'time_sample_batch': 0.0001659145602932224, 'time_algorithm_update': 0.004196563324370942, 'loss': 0.0005906310260171894, 'time_step': 0.004433715498292601, 'init_value': -0.3852195143699646, 'ave_value': -0.30516712901828524, 'soft_opc': nan} step=385




2022-04-17 14:59.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145947/model_385.pt


Epoch 6/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:59.49 [info     ] FQE_20220417145947: epoch=6 step=462 epoch=6 metrics={'time_sample_batch': 0.00016721502527014003, 'time_algorithm_update': 0.004291419859056349, 'loss': 0.0006152373277839249, 'time_step': 0.0045374833144150774, 'init_value': -0.41339612007141113, 'ave_value': -0.3184733783876574, 'soft_opc': nan} step=462




2022-04-17 14:59.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145947/model_462.pt


Epoch 7/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:59.49 [info     ] FQE_20220417145947: epoch=7 step=539 epoch=7 metrics={'time_sample_batch': 0.00016549655369349888, 'time_algorithm_update': 0.00427313284440474, 'loss': 0.0005480611295055283, 'time_step': 0.004521741495504008, 'init_value': -0.44039902091026306, 'ave_value': -0.3357426414215887, 'soft_opc': nan} step=539




2022-04-17 14:59.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145947/model_539.pt


Epoch 8/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:59.50 [info     ] FQE_20220417145947: epoch=8 step=616 epoch=8 metrics={'time_sample_batch': 0.00017613869208794135, 'time_algorithm_update': 0.004517713150420747, 'loss': 0.000529948397284047, 'time_step': 0.00476912089756557, 'init_value': -0.4760790765285492, 'ave_value': -0.3571600026509783, 'soft_opc': nan} step=616




2022-04-17 14:59.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145947/model_616.pt


Epoch 9/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:59.50 [info     ] FQE_20220417145947: epoch=9 step=693 epoch=9 metrics={'time_sample_batch': 0.00017421276538403003, 'time_algorithm_update': 0.004312428561123935, 'loss': 0.0005030829608538671, 'time_step': 0.004576732585956524, 'init_value': -0.481110543012619, 'ave_value': -0.36049274482705573, 'soft_opc': nan} step=693




2022-04-17 14:59.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145947/model_693.pt


Epoch 10/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 14:59.51 [info     ] FQE_20220417145947: epoch=10 step=770 epoch=10 metrics={'time_sample_batch': 0.00016645951704545453, 'time_algorithm_update': 0.004093427162665825, 'loss': 0.0006216897698370256, 'time_step': 0.004348603161898526, 'init_value': -0.5039339661598206, 'ave_value': -0.37348590517366254, 'soft_opc': nan} step=770




2022-04-17 14:59.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417145947/model_770.pt
search iteration:  13
using hyper params:  [0.007729477134162164, 0.006944197948903637, 2.7179603238591002e-05, 5]
2022-04-17 14:59.51 [debug    ] RoundIterator is selected.
2022-04-17 14:59.51 [info     ] Directory is created at d3rlpy_logs/CQL_20220417145951
2022-04-17 14:59.51 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-17 14:59.51 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-17 14:59.51 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220417145951/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': True, 'dropout_rate': 0.2, 'use_dense': False}}, 'actor_learning_rate': 0.007729477134162164, 'actor_optim_factory': {'optim_

Epoch 1/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 14:59.56 [info     ] CQL_20220417145951: epoch=1 step=169 epoch=1 metrics={'time_sample_batch': 0.0003554905660053682, 'time_algorithm_update': 0.031232708304591433, 'temp_loss': 4.864625413036911, 'temp': 0.9975989637290232, 'alpha_loss': -19.243474215445435, 'alpha': 1.0087796259208544, 'critic_loss': 80.21891545402933, 'actor_loss': 0.9551449129743689, 'time_step': 0.03166822427828636, 'td_error': 1.0363927299207598, 'init_value': -5.865896224975586, 'ave_value': -5.097538674618747} step=169
2022-04-17 14:59.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145951/model_169.pt


Epoch 2/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 15:00.02 [info     ] CQL_20220417145951: epoch=2 step=338 epoch=2 metrics={'time_sample_batch': 0.00035636523771568165, 'time_algorithm_update': 0.031517666472485786, 'temp_loss': 5.002442213205191, 'temp': 0.9929054922606113, 'alpha_loss': -18.140344202165775, 'alpha': 1.025365825235491, 'critic_loss': 76.31640706259823, 'actor_loss': 2.9464306337593573, 'time_step': 0.03196451649863339, 'td_error': 1.155112264310251, 'init_value': -8.057859420776367, 'ave_value': -6.937878558388701} step=338
2022-04-17 15:00.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145951/model_338.pt


Epoch 3/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 15:00.08 [info     ] CQL_20220417145951: epoch=3 step=507 epoch=3 metrics={'time_sample_batch': 0.00036644089151416305, 'time_algorithm_update': 0.032661586118167676, 'temp_loss': 4.981275104206695, 'temp': 0.9883107807509293, 'alpha_loss': -18.274223474355843, 'alpha': 1.0423586100516236, 'critic_loss': 111.10386933095356, 'actor_loss': 3.9183998658106876, 'time_step': 0.03310805100661058, 'td_error': 1.1371047368516998, 'init_value': -8.62890911102295, 'ave_value': -7.477612235041352} step=507
2022-04-17 15:00.08 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145951/model_507.pt


Epoch 4/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 15:00.13 [info     ] CQL_20220417145951: epoch=4 step=676 epoch=4 metrics={'time_sample_batch': 0.00035386395877634987, 'time_algorithm_update': 0.03153701364641359, 'temp_loss': 4.962955599000468, 'temp': 0.9837696531115199, 'alpha_loss': -18.55203373474482, 'alpha': 1.0599915910754683, 'critic_loss': 159.15669300147061, 'actor_loss': 4.732377595450046, 'time_step': 0.03196854845306577, 'td_error': 1.1781893941019657, 'init_value': -9.869004249572754, 'ave_value': -8.399662111299532} step=676
2022-04-17 15:00.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145951/model_676.pt


Epoch 5/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 15:00.19 [info     ] CQL_20220417145951: epoch=5 step=845 epoch=5 metrics={'time_sample_batch': 0.0003407311862742407, 'time_algorithm_update': 0.031013682043764014, 'temp_loss': 4.941821270440457, 'temp': 0.9792679241423071, 'alpha_loss': -18.85669682293954, 'alpha': 1.078201080215048, 'critic_loss': 226.37206381171413, 'actor_loss': 5.1685133290714065, 'time_step': 0.031437762390227005, 'td_error': 1.1799950504884933, 'init_value': -9.655534744262695, 'ave_value': -8.3448042030807} step=845
2022-04-17 15:00.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145951/model_845.pt


Epoch 6/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 15:00.24 [info     ] CQL_20220417145951: epoch=6 step=1014 epoch=6 metrics={'time_sample_batch': 0.00036379148268840723, 'time_algorithm_update': 0.031010971972222863, 'temp_loss': 4.919411458912686, 'temp': 0.9748017999547474, 'alpha_loss': -19.18128185159356, 'alpha': 1.0969489065147715, 'critic_loss': 312.138035520294, 'actor_loss': 4.983914739281468, 'time_step': 0.03145118295793703, 'td_error': 1.186420825302604, 'init_value': -8.872112274169922, 'ave_value': -7.77355703703992} step=1014
2022-04-17 15:00.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417145951/model_1014.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.00000000e-01 

Epoch 1/10:   0%|          | 0/77 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-17 15:00.25 [info     ] FQE_20220417150024: epoch=1 step=77 epoch=1 metrics={'time_sample_batch': 0.00016950322436047838, 'time_algorithm_update': 0.004284437600668375, 'loss': 0.012103514407168735, 'time_step': 0.004530767341712852, 'init_value': -0.6516808867454529, 'ave_value': -0.6389271030286411, 'soft_opc': nan} step=77




2022-04-17 15:00.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150024/model_77.pt


Epoch 2/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:00.25 [info     ] FQE_20220417150024: epoch=2 step=154 epoch=2 metrics={'time_sample_batch': 0.00018423873108702822, 'time_algorithm_update': 0.004711309036651215, 'loss': 0.008153242627521614, 'time_step': 0.004995717630757914, 'init_value': -0.7651709318161011, 'ave_value': -0.7258330433218328, 'soft_opc': nan} step=154




2022-04-17 15:00.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150024/model_154.pt


Epoch 3/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:00.25 [info     ] FQE_20220417150024: epoch=3 step=231 epoch=3 metrics={'time_sample_batch': 0.00017237353634524656, 'time_algorithm_update': 0.0041756320309329345, 'loss': 0.006528220713961047, 'time_step': 0.004436718953120244, 'init_value': -0.8226829767227173, 'ave_value': -0.7625397732800192, 'soft_opc': nan} step=231




2022-04-17 15:00.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150024/model_231.pt


Epoch 4/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:00.26 [info     ] FQE_20220417150024: epoch=4 step=308 epoch=4 metrics={'time_sample_batch': 0.0001591180826162363, 'time_algorithm_update': 0.004110243413355443, 'loss': 0.005478860725733367, 'time_step': 0.00433676273791821, 'init_value': -0.8877694010734558, 'ave_value': -0.8180311865097768, 'soft_opc': nan} step=308




2022-04-17 15:00.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150024/model_308.pt


Epoch 5/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:00.26 [info     ] FQE_20220417150024: epoch=5 step=385 epoch=5 metrics={'time_sample_batch': 0.0001698964601987368, 'time_algorithm_update': 0.004411198876120828, 'loss': 0.0051506186514947705, 'time_step': 0.004655999022644836, 'init_value': -0.8700024485588074, 'ave_value': -0.7817284360930726, 'soft_opc': nan} step=385




2022-04-17 15:00.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150024/model_385.pt


Epoch 6/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:00.27 [info     ] FQE_20220417150024: epoch=6 step=462 epoch=6 metrics={'time_sample_batch': 0.00017399292487602728, 'time_algorithm_update': 0.00408957221291282, 'loss': 0.0049232580826025114, 'time_step': 0.004331666153746766, 'init_value': -0.9255155324935913, 'ave_value': -0.8171503887117446, 'soft_opc': nan} step=462




2022-04-17 15:00.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150024/model_462.pt


Epoch 7/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:00.27 [info     ] FQE_20220417150024: epoch=7 step=539 epoch=7 metrics={'time_sample_batch': 0.00018564756814535562, 'time_algorithm_update': 0.0045735495431082586, 'loss': 0.004705126681546499, 'time_step': 0.004862082468998896, 'init_value': -0.9452602863311768, 'ave_value': -0.8125426724955842, 'soft_opc': nan} step=539




2022-04-17 15:00.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150024/model_539.pt


Epoch 8/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:00.27 [info     ] FQE_20220417150024: epoch=8 step=616 epoch=8 metrics={'time_sample_batch': 0.00016106258739124644, 'time_algorithm_update': 0.004173439818543273, 'loss': 0.00440963357384619, 'time_step': 0.004412694410844283, 'init_value': -1.0072152614593506, 'ave_value': -0.870186513769734, 'soft_opc': nan} step=616




2022-04-17 15:00.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150024/model_616.pt


Epoch 9/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:00.28 [info     ] FQE_20220417150024: epoch=9 step=693 epoch=9 metrics={'time_sample_batch': 0.00016012749114593903, 'time_algorithm_update': 0.0043415125314291425, 'loss': 0.004193489811709756, 'time_step': 0.004564669225123021, 'init_value': -0.9913648962974548, 'ave_value': -0.8554403278339017, 'soft_opc': nan} step=693




2022-04-17 15:00.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150024/model_693.pt


Epoch 10/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:00.28 [info     ] FQE_20220417150024: epoch=10 step=770 epoch=10 metrics={'time_sample_batch': 0.00017640497777369116, 'time_algorithm_update': 0.00447318461034205, 'loss': 0.0041614907442942845, 'time_step': 0.004732680011105227, 'init_value': -1.0317623615264893, 'ave_value': -0.8722518895123456, 'soft_opc': nan} step=770




2022-04-17 15:00.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150024/model_770.pt
start
[ 0.00000000e+00  7.95731469e+08 -2.55189108e-01  2.54000047e-02
  2.57999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  3.30457556e-01 -3.26640595e-01  6.00000000e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.22789108e-01 -1.71999953e-02
  2.39998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.18067885e-01 -4.36092621e-01  6.00000000e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.89891077e-02  1.38000047e-02
  1.55999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.13215135e-01  3.00794900e-01]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.39310892e-01 -5.61999953e-02
 -4.10001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.81283783e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.95731469

Epoch 1/10:   0%|          | 0/77 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-17 15:00.29 [info     ] FQE_20220417150028: epoch=1 step=77 epoch=1 metrics={'time_sample_batch': 0.00015775569073565594, 'time_algorithm_update': 0.00403279143494445, 'loss': 0.010326005306772211, 'time_step': 0.004270838452624036, 'init_value': 0.27813199162483215, 'ave_value': 0.27844730623670527, 'soft_opc': nan} step=77




2022-04-17 15:00.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150028/model_77.pt


Epoch 2/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:00.29 [info     ] FQE_20220417150028: epoch=2 step=154 epoch=2 metrics={'time_sample_batch': 0.0001628244078004515, 'time_algorithm_update': 0.004036225281752549, 'loss': 0.006353757785657397, 'time_step': 0.004280183222386744, 'init_value': 0.21851015090942383, 'ave_value': 0.24912508081745457, 'soft_opc': nan} step=154




2022-04-17 15:00.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150028/model_154.pt


Epoch 3/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:00.30 [info     ] FQE_20220417150028: epoch=3 step=231 epoch=3 metrics={'time_sample_batch': 0.00016912547024813566, 'time_algorithm_update': 0.004117581751439479, 'loss': 0.00441947268539822, 'time_step': 0.004366187306193562, 'init_value': 0.21971620619297028, 'ave_value': 0.26171410320041416, 'soft_opc': nan} step=231




2022-04-17 15:00.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150028/model_231.pt


Epoch 4/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:00.30 [info     ] FQE_20220417150028: epoch=4 step=308 epoch=4 metrics={'time_sample_batch': 0.00016315571673504719, 'time_algorithm_update': 0.004103236384206004, 'loss': 0.0034720054540371935, 'time_step': 0.004350566244744635, 'init_value': 0.20165124535560608, 'ave_value': 0.23444311696666856, 'soft_opc': nan} step=308




2022-04-17 15:00.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150028/model_308.pt


Epoch 5/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:00.30 [info     ] FQE_20220417150028: epoch=5 step=385 epoch=5 metrics={'time_sample_batch': 0.00014546939304896763, 'time_algorithm_update': 0.003839050020490374, 'loss': 0.0029893995069160866, 'time_step': 0.004052979605538505, 'init_value': 0.2188793420791626, 'ave_value': 0.24853454363775682, 'soft_opc': nan} step=385




2022-04-17 15:00.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150028/model_385.pt


Epoch 6/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:00.31 [info     ] FQE_20220417150028: epoch=6 step=462 epoch=6 metrics={'time_sample_batch': 0.00016276248089678875, 'time_algorithm_update': 0.004082915070769075, 'loss': 0.002693061577760941, 'time_step': 0.0043157695175765395, 'init_value': 0.19976025819778442, 'ave_value': 0.22381430958976617, 'soft_opc': nan} step=462




2022-04-17 15:00.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150028/model_462.pt


Epoch 7/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:00.31 [info     ] FQE_20220417150028: epoch=7 step=539 epoch=7 metrics={'time_sample_batch': 0.00017058384882939325, 'time_algorithm_update': 0.004493038375656326, 'loss': 0.0023620449627943145, 'time_step': 0.004748071943010602, 'init_value': 0.19334019720554352, 'ave_value': 0.21484698642145944, 'soft_opc': nan} step=539




2022-04-17 15:00.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150028/model_539.pt


Epoch 8/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:00.31 [info     ] FQE_20220417150028: epoch=8 step=616 epoch=8 metrics={'time_sample_batch': 0.00017363065248960025, 'time_algorithm_update': 0.0043617099910587456, 'loss': 0.0021646194478992806, 'time_step': 0.004619006986741896, 'init_value': 0.14697930216789246, 'ave_value': 0.16837057553500206, 'soft_opc': nan} step=616




2022-04-17 15:00.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150028/model_616.pt


Epoch 9/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:00.32 [info     ] FQE_20220417150028: epoch=9 step=693 epoch=9 metrics={'time_sample_batch': 0.0001602513449532645, 'time_algorithm_update': 0.0040352654147457765, 'loss': 0.0019506671475004646, 'time_step': 0.004296584562821822, 'init_value': 0.15372216701507568, 'ave_value': 0.17032036074512713, 'soft_opc': nan} step=693




2022-04-17 15:00.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150028/model_693.pt


Epoch 10/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:00.32 [info     ] FQE_20220417150028: epoch=10 step=770 epoch=10 metrics={'time_sample_batch': 0.00016352108546665737, 'time_algorithm_update': 0.004200631921941584, 'loss': 0.0022664893903866314, 'time_step': 0.00443330058803806, 'init_value': 0.14569297432899475, 'ave_value': 0.15566754535444685, 'soft_opc': nan} step=770




2022-04-17 15:00.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150028/model_770.pt
search iteration:  14
using hyper params:  [0.009437598130974641, 0.003577915198686051, 7.42768135669248e-05, 5]
2022-04-17 15:00.32 [debug    ] RoundIterator is selected.
2022-04-17 15:00.32 [info     ] Directory is created at d3rlpy_logs/CQL_20220417150032
2022-04-17 15:00.32 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-17 15:00.32 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-17 15:00.32 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220417150032/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': True, 'dropout_rate': 0.2, 'use_dense': False}}, 'actor_learning_rate': 0.009437598130974641, 'actor_optim_factory': {'optim_cl

Epoch 1/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 15:00.38 [info     ] CQL_20220417150032: epoch=1 step=169 epoch=1 metrics={'time_sample_batch': 0.00036240752632095966, 'time_algorithm_update': 0.031415143661950466, 'temp_loss': 4.89899807709914, 'temp': 0.9935287498158111, 'alpha_loss': -18.947431970630173, 'alpha': 1.008808475274306, 'critic_loss': 101.79520511062893, 'actor_loss': -0.022018879960627246, 'time_step': 0.03186609335904996, 'td_error': 1.4586126040517953, 'init_value': -5.757528305053711, 'ave_value': -4.8871836073710035} step=169
2022-04-17 15:00.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417150032/model_169.pt


Epoch 2/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 15:00.43 [info     ] CQL_20220417150032: epoch=2 step=338 epoch=2 metrics={'time_sample_batch': 0.0003420770520994649, 'time_algorithm_update': 0.031128722535082573, 'temp_loss': 4.941751832792745, 'temp': 0.9810018631128165, 'alpha_loss': -18.187682428303553, 'alpha': 1.0261141216966527, 'critic_loss': 70.98380866417519, 'actor_loss': 2.841521769585694, 'time_step': 0.03154826869626017, 'td_error': 1.1705545794609544, 'init_value': -7.4247918128967285, 'ave_value': -6.387667234480918} step=338
2022-04-17 15:00.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417150032/model_338.pt


Epoch 3/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 15:00.49 [info     ] CQL_20220417150032: epoch=3 step=507 epoch=3 metrics={'time_sample_batch': 0.00035666149748852973, 'time_algorithm_update': 0.03058125563627164, 'temp_loss': 4.887202127445379, 'temp': 0.9687772037009516, 'alpha_loss': -18.220842429166716, 'alpha': 1.0431848827903794, 'critic_loss': 98.80386907814523, 'actor_loss': 4.31454593613303, 'time_step': 0.031017541885375977, 'td_error': 1.2001472035324594, 'init_value': -9.25572395324707, 'ave_value': -8.095952230079755} step=507
2022-04-17 15:00.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417150032/model_507.pt


Epoch 4/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 15:00.54 [info     ] CQL_20220417150032: epoch=4 step=676 epoch=4 metrics={'time_sample_batch': 0.0003531980796678532, 'time_algorithm_update': 0.030967417553331725, 'temp_loss': 4.828358706637952, 'temp': 0.9568105953684926, 'alpha_loss': -18.507976475551988, 'alpha': 1.0608680100130612, 'critic_loss': 134.3387420925163, 'actor_loss': 5.802788249134312, 'time_step': 0.03139411909340401, 'td_error': 1.1997617862261483, 'init_value': -10.73412799835205, 'ave_value': -9.374720436474224} step=676
2022-04-17 15:00.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417150032/model_676.pt


Epoch 5/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 15:01.00 [info     ] CQL_20220417150032: epoch=5 step=845 epoch=5 metrics={'time_sample_batch': 0.00036191093851123336, 'time_algorithm_update': 0.03031726278496917, 'temp_loss': 4.769756960445608, 'temp': 0.945075781035, 'alpha_loss': -18.866077456953963, 'alpha': 1.0791441322078366, 'critic_loss': 179.9360697368193, 'actor_loss': 6.98884299238758, 'time_step': 0.03075583164508526, 'td_error': 1.2518160638129816, 'init_value': -11.798190116882324, 'ave_value': -10.329795290285402} step=845
2022-04-17 15:01.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417150032/model_845.pt


Epoch 6/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 15:01.05 [info     ] CQL_20220417150032: epoch=6 step=1014 epoch=6 metrics={'time_sample_batch': 0.0003488585793760401, 'time_algorithm_update': 0.03061042311628895, 'temp_loss': 4.713739663186158, 'temp': 0.9335480596892227, 'alpha_loss': -19.22916993586975, 'alpha': 1.0979922946388199, 'critic_loss': 236.19067960660132, 'actor_loss': 7.887137455347727, 'time_step': 0.031039963107137285, 'td_error': 1.2712370142022698, 'init_value': -13.173858642578125, 'ave_value': -11.616246375779848} step=1014
2022-04-17 15:01.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417150032/model_1014.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.00000000

Epoch 1/10:   0%|          | 0/77 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-17 15:01.06 [info     ] FQE_20220417150105: epoch=1 step=77 epoch=1 metrics={'time_sample_batch': 0.00016178093947373428, 'time_algorithm_update': 0.004053134422797661, 'loss': 0.009454457954104458, 'time_step': 0.004284223952850738, 'init_value': -0.3635115623474121, 'ave_value': -0.35129663627426905, 'soft_opc': nan} step=77




2022-04-17 15:01.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150105/model_77.pt


Epoch 2/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:01.06 [info     ] FQE_20220417150105: epoch=2 step=154 epoch=2 metrics={'time_sample_batch': 0.00015391003001819956, 'time_algorithm_update': 0.0038832162881826423, 'loss': 0.00664249195584229, 'time_step': 0.0041228703090122765, 'init_value': -0.4454702138900757, 'ave_value': -0.3942412488900863, 'soft_opc': nan} step=154




2022-04-17 15:01.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150105/model_154.pt


Epoch 3/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:01.06 [info     ] FQE_20220417150105: epoch=3 step=231 epoch=3 metrics={'time_sample_batch': 0.00016422085947804637, 'time_algorithm_update': 0.0042692500275450865, 'loss': 0.005123826929114081, 'time_step': 0.0045088483141614245, 'init_value': -0.5176480412483215, 'ave_value': -0.44000664049977656, 'soft_opc': nan} step=231




2022-04-17 15:01.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150105/model_231.pt


Epoch 4/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:01.07 [info     ] FQE_20220417150105: epoch=4 step=308 epoch=4 metrics={'time_sample_batch': 0.00015511760463962306, 'time_algorithm_update': 0.0037778755287071327, 'loss': 0.004203162981535901, 'time_step': 0.004006618029111391, 'init_value': -0.5403017401695251, 'ave_value': -0.44087663764083707, 'soft_opc': nan} step=308




2022-04-17 15:01.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150105/model_308.pt


Epoch 5/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:01.07 [info     ] FQE_20220417150105: epoch=5 step=385 epoch=5 metrics={'time_sample_batch': 0.00016641616821289062, 'time_algorithm_update': 0.004273414611816406, 'loss': 0.003984945617591986, 'time_step': 0.004528500817038796, 'init_value': -0.521987795829773, 'ave_value': -0.3870418387288983, 'soft_opc': nan} step=385




2022-04-17 15:01.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150105/model_385.pt


Epoch 6/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:01.07 [info     ] FQE_20220417150105: epoch=6 step=462 epoch=6 metrics={'time_sample_batch': 0.00016095731165501978, 'time_algorithm_update': 0.004064844800280286, 'loss': 0.003960462609100361, 'time_step': 0.004297556815209326, 'init_value': -0.5570069551467896, 'ave_value': -0.3948385999613517, 'soft_opc': nan} step=462




2022-04-17 15:01.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150105/model_462.pt


Epoch 7/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:01.08 [info     ] FQE_20220417150105: epoch=7 step=539 epoch=7 metrics={'time_sample_batch': 0.00015347344534737722, 'time_algorithm_update': 0.0039520170781519506, 'loss': 0.003677281387779233, 'time_step': 0.004171439579554966, 'init_value': -0.5943514108657837, 'ave_value': -0.4062064167884019, 'soft_opc': nan} step=539




2022-04-17 15:01.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150105/model_539.pt


Epoch 8/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:01.08 [info     ] FQE_20220417150105: epoch=8 step=616 epoch=8 metrics={'time_sample_batch': 0.0001716861477145901, 'time_algorithm_update': 0.0044022999800644915, 'loss': 0.0033684611931298074, 'time_step': 0.0046780821564909696, 'init_value': -0.6181368827819824, 'ave_value': -0.4216199548126341, 'soft_opc': nan} step=616




2022-04-17 15:01.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150105/model_616.pt


Epoch 9/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:01.09 [info     ] FQE_20220417150105: epoch=9 step=693 epoch=9 metrics={'time_sample_batch': 0.0001765102535099178, 'time_algorithm_update': 0.004616189312625241, 'loss': 0.00328981967540095, 'time_step': 0.0048653986546900365, 'init_value': -0.6153018474578857, 'ave_value': -0.41582591475264447, 'soft_opc': nan} step=693




2022-04-17 15:01.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150105/model_693.pt


Epoch 10/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:01.09 [info     ] FQE_20220417150105: epoch=10 step=770 epoch=10 metrics={'time_sample_batch': 0.0001613660292191939, 'time_algorithm_update': 0.0040410772546545255, 'loss': 0.0032435229876647135, 'time_step': 0.004288165600268872, 'init_value': -0.6355737447738647, 'ave_value': -0.4083234530311447, 'soft_opc': nan} step=770




2022-04-17 15:01.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150105/model_770.pt
start
[ 0.00000000e+00  7.95731469e+08 -2.55189108e-01  2.54000047e-02
  2.57999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  3.30457556e-01 -3.26640595e-01  6.00000000e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.22789108e-01 -1.71999953e-02
  2.39998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.18067885e-01 -4.36092621e-01  6.00000000e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.89891077e-02  1.38000047e-02
  1.55999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.13215135e-01  3.00794900e-01]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.39310892e-01 -5.61999953e-02
 -4.10001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.81283783e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.95731469

Epoch 1/10:   0%|          | 0/88 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-17 15:01.10 [info     ] FQE_20220417150109: epoch=1 step=88 epoch=1 metrics={'time_sample_batch': 0.00016789544712413442, 'time_algorithm_update': 0.003999631513248791, 'loss': 0.008716322476340627, 'time_step': 0.0042338750579140405, 'init_value': -0.11124956607818604, 'ave_value': -0.1149340192552421, 'soft_opc': nan} step=88




2022-04-17 15:01.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150109/model_88.pt


Epoch 2/10:   0%|          | 0/88 [00:00<?, ?it/s]



2022-04-17 15:01.10 [info     ] FQE_20220417150109: epoch=2 step=176 epoch=2 metrics={'time_sample_batch': 0.00015966187823902476, 'time_algorithm_update': 0.003910771825096824, 'loss': 0.005269820733122866, 'time_step': 0.004142251881686124, 'init_value': -0.14419563114643097, 'ave_value': -0.137385473131865, 'soft_opc': nan} step=176




2022-04-17 15:01.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150109/model_176.pt


Epoch 3/10:   0%|          | 0/88 [00:00<?, ?it/s]



2022-04-17 15:01.10 [info     ] FQE_20220417150109: epoch=3 step=264 epoch=3 metrics={'time_sample_batch': 0.00016916610977866432, 'time_algorithm_update': 0.004190631888129495, 'loss': 0.0037976056945891205, 'time_step': 0.004448362372138284, 'init_value': -0.16712665557861328, 'ave_value': -0.15395425747207425, 'soft_opc': nan} step=264




2022-04-17 15:01.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150109/model_264.pt


Epoch 4/10:   0%|          | 0/88 [00:00<?, ?it/s]



2022-04-17 15:01.11 [info     ] FQE_20220417150109: epoch=4 step=352 epoch=4 metrics={'time_sample_batch': 0.00016503984277898616, 'time_algorithm_update': 0.004081704399802468, 'loss': 0.0031844719461250033, 'time_step': 0.004333869977430863, 'init_value': -0.1592855006456375, 'ave_value': -0.14179428686631154, 'soft_opc': nan} step=352




2022-04-17 15:01.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150109/model_352.pt


Epoch 5/10:   0%|          | 0/88 [00:00<?, ?it/s]



2022-04-17 15:01.11 [info     ] FQE_20220417150109: epoch=5 step=440 epoch=5 metrics={'time_sample_batch': 0.00017502362077886409, 'time_algorithm_update': 0.004120525988665494, 'loss': 0.002883685772592964, 'time_step': 0.0043932768431576815, 'init_value': -0.16499991714954376, 'ave_value': -0.14953460664748475, 'soft_opc': nan} step=440




2022-04-17 15:01.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150109/model_440.pt


Epoch 6/10:   0%|          | 0/88 [00:00<?, ?it/s]



2022-04-17 15:01.12 [info     ] FQE_20220417150109: epoch=6 step=528 epoch=6 metrics={'time_sample_batch': 0.0001643272963437167, 'time_algorithm_update': 0.004045792601325295, 'loss': 0.0026333654823247343, 'time_step': 0.00430248813195662, 'init_value': -0.18538928031921387, 'ave_value': -0.1677530139021844, 'soft_opc': nan} step=528




2022-04-17 15:01.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150109/model_528.pt


Epoch 7/10:   0%|          | 0/88 [00:00<?, ?it/s]



2022-04-17 15:01.12 [info     ] FQE_20220417150109: epoch=7 step=616 epoch=7 metrics={'time_sample_batch': 0.00017449259757995605, 'time_algorithm_update': 0.004428505897521973, 'loss': 0.0024656762662661176, 'time_step': 0.004680682312358509, 'init_value': -0.20428462326526642, 'ave_value': -0.18874913529232815, 'soft_opc': nan} step=616




2022-04-17 15:01.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150109/model_616.pt


Epoch 8/10:   0%|          | 0/88 [00:00<?, ?it/s]



2022-04-17 15:01.12 [info     ] FQE_20220417150109: epoch=8 step=704 epoch=8 metrics={'time_sample_batch': 0.00015417283231561834, 'time_algorithm_update': 0.003949902274391868, 'loss': 0.0022599955105265094, 'time_step': 0.0041766871105540886, 'init_value': -0.2353605329990387, 'ave_value': -0.22612629370881362, 'soft_opc': nan} step=704




2022-04-17 15:01.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150109/model_704.pt


Epoch 9/10:   0%|          | 0/88 [00:00<?, ?it/s]



2022-04-17 15:01.13 [info     ] FQE_20220417150109: epoch=9 step=792 epoch=9 metrics={'time_sample_batch': 0.00016579302874478427, 'time_algorithm_update': 0.004235538569363681, 'loss': 0.0021358966021745637, 'time_step': 0.004486517472700639, 'init_value': -0.20379380881786346, 'ave_value': -0.194660359049669, 'soft_opc': nan} step=792




2022-04-17 15:01.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150109/model_792.pt


Epoch 10/10:   0%|          | 0/88 [00:00<?, ?it/s]



2022-04-17 15:01.13 [info     ] FQE_20220417150109: epoch=10 step=880 epoch=10 metrics={'time_sample_batch': 0.0001738938418301669, 'time_algorithm_update': 0.004371456124565818, 'loss': 0.0021077117860179647, 'time_step': 0.004613277587023648, 'init_value': -0.2210514098405838, 'ave_value': -0.21592855603509659, 'soft_opc': nan} step=880




2022-04-17 15:01.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150109/model_880.pt
search iteration:  15
using hyper params:  [0.002678630928491801, 0.008944695496308606, 8.920707009949469e-05, 5]
2022-04-17 15:01.13 [debug    ] RoundIterator is selected.
2022-04-17 15:01.13 [info     ] Directory is created at d3rlpy_logs/CQL_20220417150113
2022-04-17 15:01.13 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-17 15:01.13 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-17 15:01.13 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220417150113/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': True, 'dropout_rate': 0.2, 'use_dense': False}}, 'actor_learning_rate': 0.002678630928491801, 'actor_optim_factory': {'optim_c

Epoch 1/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 15:01.19 [info     ] CQL_20220417150113: epoch=1 step=169 epoch=1 metrics={'time_sample_batch': 0.00035824013884956315, 'time_algorithm_update': 0.02970000272671852, 'temp_loss': 4.645892309719289, 'temp': 0.9919602916790888, 'alpha_loss': -18.807235119610848, 'alpha': 1.0087060568600716, 'critic_loss': 78.76470299444254, 'actor_loss': 1.6056703559877956, 'time_step': 0.030130885761870434, 'td_error': 1.3890726524601789, 'init_value': -6.2256855964660645, 'ave_value': -5.325605715298438} step=169
2022-04-17 15:01.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417150113/model_169.pt


Epoch 2/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 15:01.24 [info     ] CQL_20220417150113: epoch=2 step=338 epoch=2 metrics={'time_sample_batch': 0.0003397422429372573, 'time_algorithm_update': 0.030725968659982172, 'temp_loss': 4.905613828692916, 'temp': 0.9763466652328446, 'alpha_loss': -17.89185317451432, 'alpha': 1.0253301202898195, 'critic_loss': 81.42819940691163, 'actor_loss': 3.1636242104705268, 'time_step': 0.031144936409222303, 'td_error': 1.3002364613725692, 'init_value': -7.252917289733887, 'ave_value': -6.1917898743431845} step=338
2022-04-17 15:01.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417150113/model_338.pt


Epoch 3/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 15:01.29 [info     ] CQL_20220417150113: epoch=3 step=507 epoch=3 metrics={'time_sample_batch': 0.00033718735508664826, 'time_algorithm_update': 0.03024941647546531, 'temp_loss': 4.8466944497012525, 'temp': 0.9614788159816223, 'alpha_loss': -18.18018097284983, 'alpha': 1.0424845818231796, 'critic_loss': 119.68971351758968, 'actor_loss': 3.8811312145030006, 'time_step': 0.030658837606215617, 'td_error': 1.2764070359178972, 'init_value': -8.852079391479492, 'ave_value': -7.551872887761744} step=507
2022-04-17 15:01.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417150113/model_507.pt


Epoch 4/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 15:01.35 [info     ] CQL_20220417150113: epoch=4 step=676 epoch=4 metrics={'time_sample_batch': 0.00035478659635464823, 'time_algorithm_update': 0.030727042248968543, 'temp_loss': 4.78124475479126, 'temp': 0.9470941244497807, 'alpha_loss': -18.54224501841167, 'alpha': 1.0602949737797123, 'critic_loss': 178.1458113630848, 'actor_loss': 4.3031802868702, 'time_step': 0.03115935297407342, 'td_error': 1.1350856586514657, 'init_value': -8.619590759277344, 'ave_value': -7.408186513862094} step=676
2022-04-17 15:01.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417150113/model_676.pt


Epoch 5/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 15:01.40 [info     ] CQL_20220417150113: epoch=5 step=845 epoch=5 metrics={'time_sample_batch': 0.0003428134692491159, 'time_algorithm_update': 0.030310152550420816, 'temp_loss': 4.711308831999288, 'temp': 0.9330886849284877, 'alpha_loss': -18.826352102516672, 'alpha': 1.078670677348707, 'critic_loss': 261.51518782215004, 'actor_loss': 4.06455301533084, 'time_step': 0.030732690935304178, 'td_error': 1.0913419267081543, 'init_value': -7.600546360015869, 'ave_value': -6.5117268698924295} step=845
2022-04-17 15:01.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417150113/model_845.pt


Epoch 6/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 15:01.45 [info     ] CQL_20220417150113: epoch=6 step=1014 epoch=6 metrics={'time_sample_batch': 0.0003343644226796528, 'time_algorithm_update': 0.029589133855153823, 'temp_loss': 4.641813958184959, 'temp': 0.9194172818279831, 'alpha_loss': -19.14410450049406, 'alpha': 1.0975298930907391, 'critic_loss': 368.12458882529353, 'actor_loss': 3.3125893321968394, 'time_step': 0.029997082151604827, 'td_error': 1.0617948855419679, 'init_value': -5.9180755615234375, 'ave_value': -5.203492601519232} step=1014
2022-04-17 15:01.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417150113/model_1014.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.0000000

Epoch 1/10:   0%|          | 0/77 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-17 15:01.46 [info     ] FQE_20220417150146: epoch=1 step=77 epoch=1 metrics={'time_sample_batch': 0.00016340652069488128, 'time_algorithm_update': 0.0041652778526405235, 'loss': 0.011420539670998786, 'time_step': 0.004405284856821036, 'init_value': -0.31667816638946533, 'ave_value': -0.26333691775127577, 'soft_opc': nan} step=77




2022-04-17 15:01.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150146/model_77.pt


Epoch 2/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:01.46 [info     ] FQE_20220417150146: epoch=2 step=154 epoch=2 metrics={'time_sample_batch': 0.0001681470251702643, 'time_algorithm_update': 0.004295271712464172, 'loss': 0.0076514835916943365, 'time_step': 0.004557866554755669, 'init_value': -0.4205355644226074, 'ave_value': -0.3285799203625133, 'soft_opc': nan} step=154




2022-04-17 15:01.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150146/model_154.pt


Epoch 3/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:01.47 [info     ] FQE_20220417150146: epoch=3 step=231 epoch=3 metrics={'time_sample_batch': 0.00018546798012473366, 'time_algorithm_update': 0.0044382857037829114, 'loss': 0.0056799395741628745, 'time_step': 0.004727713473431476, 'init_value': -0.469788521528244, 'ave_value': -0.34951405844709893, 'soft_opc': nan} step=231




2022-04-17 15:01.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150146/model_231.pt


Epoch 4/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:01.47 [info     ] FQE_20220417150146: epoch=4 step=308 epoch=4 metrics={'time_sample_batch': 0.00017477939655254413, 'time_algorithm_update': 0.004430470528540674, 'loss': 0.00463402085610992, 'time_step': 0.004683460508074079, 'init_value': -0.5039652585983276, 'ave_value': -0.3767117262893432, 'soft_opc': nan} step=308




2022-04-17 15:01.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150146/model_308.pt


Epoch 5/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:01.48 [info     ] FQE_20220417150146: epoch=5 step=385 epoch=5 metrics={'time_sample_batch': 0.00016768876608316, 'time_algorithm_update': 0.004227245008790648, 'loss': 0.004318285391774851, 'time_step': 0.004474079454099977, 'init_value': -0.49573659896850586, 'ave_value': -0.35548309254887944, 'soft_opc': nan} step=385




2022-04-17 15:01.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150146/model_385.pt


Epoch 6/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:01.48 [info     ] FQE_20220417150146: epoch=6 step=462 epoch=6 metrics={'time_sample_batch': 0.00017045070598651837, 'time_algorithm_update': 0.004428330954019125, 'loss': 0.004147337398842558, 'time_step': 0.004696818141194133, 'init_value': -0.5492537617683411, 'ave_value': -0.3808556001381697, 'soft_opc': nan} step=462




2022-04-17 15:01.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150146/model_462.pt


Epoch 7/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:01.48 [info     ] FQE_20220417150146: epoch=7 step=539 epoch=7 metrics={'time_sample_batch': 0.0001663975901417918, 'time_algorithm_update': 0.0045044731784176515, 'loss': 0.003951032782070242, 'time_step': 0.004742306548279601, 'init_value': -0.5636061429977417, 'ave_value': -0.37348730904427735, 'soft_opc': nan} step=539




2022-04-17 15:01.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150146/model_539.pt


Epoch 8/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:01.49 [info     ] FQE_20220417150146: epoch=8 step=616 epoch=8 metrics={'time_sample_batch': 0.00015095302036830356, 'time_algorithm_update': 0.003917105786212079, 'loss': 0.003804977937879694, 'time_step': 0.004143832565902115, 'init_value': -0.6097308993339539, 'ave_value': -0.41014486746610823, 'soft_opc': nan} step=616




2022-04-17 15:01.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150146/model_616.pt


Epoch 9/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:01.49 [info     ] FQE_20220417150146: epoch=9 step=693 epoch=9 metrics={'time_sample_batch': 0.0001582851657619724, 'time_algorithm_update': 0.0040230782001049485, 'loss': 0.0035810092246377624, 'time_step': 0.004250399478070148, 'init_value': -0.6127648949623108, 'ave_value': -0.40695196079375506, 'soft_opc': nan} step=693




2022-04-17 15:01.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150146/model_693.pt


Epoch 10/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:01.50 [info     ] FQE_20220417150146: epoch=10 step=770 epoch=10 metrics={'time_sample_batch': 0.00017057146344866072, 'time_algorithm_update': 0.004323346274239677, 'loss': 0.0036654477219351315, 'time_step': 0.0045774633234197445, 'init_value': -0.6573132276535034, 'ave_value': -0.42910866311810036, 'soft_opc': nan} step=770




2022-04-17 15:01.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150146/model_770.pt
start
[ 0.00000000e+00  7.95731469e+08 -2.55189108e-01  2.54000047e-02
  2.57999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  3.30457556e-01 -3.26640595e-01  6.00000000e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.22789108e-01 -1.71999953e-02
  2.39998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.18067885e-01 -4.36092621e-01  6.00000000e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.89891077e-02  1.38000047e-02
  1.55999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.13215135e-01  3.00794900e-01]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.39310892e-01 -5.61999953e-02
 -4.10001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.81283783e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.95731469

Epoch 1/10:   0%|          | 0/77 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-17 15:01.50 [info     ] FQE_20220417150150: epoch=1 step=77 epoch=1 metrics={'time_sample_batch': 0.00016258908556653306, 'time_algorithm_update': 0.004143036805190049, 'loss': 0.0015139485147703051, 'time_step': 0.0043957759807636216, 'init_value': 0.004886331968009472, 'ave_value': 0.04251825442403421, 'soft_opc': nan} step=77




2022-04-17 15:01.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150150/model_77.pt


Epoch 2/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:01.50 [info     ] FQE_20220417150150: epoch=2 step=154 epoch=2 metrics={'time_sample_batch': 0.00016387097247235187, 'time_algorithm_update': 0.004051103220357523, 'loss': 0.0010037147799089503, 'time_step': 0.004297742595920315, 'init_value': -0.06767253577709198, 'ave_value': -0.0035370659326446484, 'soft_opc': nan} step=154




2022-04-17 15:01.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150150/model_154.pt


Epoch 3/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:01.51 [info     ] FQE_20220417150150: epoch=3 step=231 epoch=3 metrics={'time_sample_batch': 0.00017464006101930297, 'time_algorithm_update': 0.004343299122599812, 'loss': 0.0009061396280726926, 'time_step': 0.004613179665107232, 'init_value': -0.1209164559841156, 'ave_value': -0.03500165945120357, 'soft_opc': nan} step=231




2022-04-17 15:01.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150150/model_231.pt


Epoch 4/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:01.51 [info     ] FQE_20220417150150: epoch=4 step=308 epoch=4 metrics={'time_sample_batch': 0.00016212773013424563, 'time_algorithm_update': 0.004049493120862292, 'loss': 0.0007199945118030315, 'time_step': 0.004296733187390612, 'init_value': -0.14155638217926025, 'ave_value': -0.04025346640296079, 'soft_opc': nan} step=308




2022-04-17 15:01.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150150/model_308.pt


Epoch 5/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:01.52 [info     ] FQE_20220417150150: epoch=5 step=385 epoch=5 metrics={'time_sample_batch': 0.00017355014751483869, 'time_algorithm_update': 0.00428029778715852, 'loss': 0.0006333827443629527, 'time_step': 0.004558578714147791, 'init_value': -0.16161075234413147, 'ave_value': -0.05012182758802229, 'soft_opc': nan} step=385




2022-04-17 15:01.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150150/model_385.pt


Epoch 6/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:01.52 [info     ] FQE_20220417150150: epoch=6 step=462 epoch=6 metrics={'time_sample_batch': 0.00016111212891417665, 'time_algorithm_update': 0.003944731377936029, 'loss': 0.0006345605261112841, 'time_step': 0.004196284653304459, 'init_value': -0.20784595608711243, 'ave_value': -0.08258172428520681, 'soft_opc': nan} step=462




2022-04-17 15:01.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150150/model_462.pt


Epoch 7/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:01.52 [info     ] FQE_20220417150150: epoch=7 step=539 epoch=7 metrics={'time_sample_batch': 0.00014931195742124087, 'time_algorithm_update': 0.0037320403309611533, 'loss': 0.0005981604071249338, 'time_step': 0.0039556026458740234, 'init_value': -0.2354002147912979, 'ave_value': -0.09661973290279642, 'soft_opc': nan} step=539




2022-04-17 15:01.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150150/model_539.pt


Epoch 8/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:01.53 [info     ] FQE_20220417150150: epoch=8 step=616 epoch=8 metrics={'time_sample_batch': 0.00015648928555575284, 'time_algorithm_update': 0.0038587303904743936, 'loss': 0.0005729013512307149, 'time_step': 0.004081116094217672, 'init_value': -0.26787126064300537, 'ave_value': -0.11770990283505336, 'soft_opc': nan} step=616




2022-04-17 15:01.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150150/model_616.pt


Epoch 9/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:01.53 [info     ] FQE_20220417150150: epoch=9 step=693 epoch=9 metrics={'time_sample_batch': 0.00015209866808606431, 'time_algorithm_update': 0.003908922145893047, 'loss': 0.0005071414577889607, 'time_step': 0.004131475052276215, 'init_value': -0.280234158039093, 'ave_value': -0.12503469110307125, 'soft_opc': nan} step=693




2022-04-17 15:01.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150150/model_693.pt


Epoch 10/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:01.53 [info     ] FQE_20220417150150: epoch=10 step=770 epoch=10 metrics={'time_sample_batch': 0.0001626819759220272, 'time_algorithm_update': 0.004084265077268922, 'loss': 0.0005462078198823812, 'time_step': 0.0043169739958527804, 'init_value': -0.318446546792984, 'ave_value': -0.15347403812616525, 'soft_opc': nan} step=770




2022-04-17 15:01.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150150/model_770.pt
search iteration:  16
using hyper params:  [0.007821207716918833, 0.007043225059234194, 4.5152733707784914e-05, 3]
2022-04-17 15:01.53 [debug    ] RoundIterator is selected.
2022-04-17 15:01.53 [info     ] Directory is created at d3rlpy_logs/CQL_20220417150153
2022-04-17 15:01.53 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-17 15:01.53 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-17 15:01.53 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220417150153/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': True, 'dropout_rate': 0.2, 'use_dense': False}}, 'actor_learning_rate': 0.007821207716918833, 'actor_optim_factory': {'optim_

Epoch 1/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 15:01.59 [info     ] CQL_20220417150153: epoch=1 step=169 epoch=1 metrics={'time_sample_batch': 0.0003214968732122839, 'time_algorithm_update': 0.03144237275659685, 'temp_loss': 4.894855961997128, 'temp': 0.9960476798418711, 'alpha_loss': -18.71791398313624, 'alpha': 1.0087265058382022, 'critic_loss': 49.899053110878846, 'actor_loss': -0.30951015507945645, 'time_step': 0.03184606196612296, 'td_error': 0.9100365016804188, 'init_value': -3.0559611320495605, 'ave_value': -2.5174229934167216} step=169
2022-04-17 15:01.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417150153/model_169.pt


Epoch 2/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 15:02.05 [info     ] CQL_20220417150153: epoch=2 step=338 epoch=2 metrics={'time_sample_batch': 0.0003289414580757096, 'time_algorithm_update': 0.0310143719058065, 'temp_loss': 4.982778969601061, 'temp': 0.9883291301642649, 'alpha_loss': -18.01524890245065, 'alpha': 1.0255344908618362, 'critic_loss': 48.65274144911907, 'actor_loss': 0.5832879224450455, 'time_step': 0.03142255862083661, 'td_error': 1.0163585544841218, 'init_value': -4.327934265136719, 'ave_value': -3.5378738201255198} step=338
2022-04-17 15:02.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417150153/model_338.pt


Epoch 3/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 15:02.10 [info     ] CQL_20220417150153: epoch=3 step=507 epoch=3 metrics={'time_sample_batch': 0.000318994183512129, 'time_algorithm_update': 0.030533899216962285, 'temp_loss': 4.94721223334589, 'temp': 0.9807814183319814, 'alpha_loss': -18.186806334546333, 'alpha': 1.0427379636369514, 'critic_loss': 67.46881150917189, 'actor_loss': 1.2884925579178264, 'time_step': 0.03092946385490824, 'td_error': 1.0316902194897744, 'init_value': -5.1542181968688965, 'ave_value': -4.197714335188135} step=507
2022-04-17 15:02.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417150153/model_507.pt


Epoch 4/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 15:02.15 [info     ] CQL_20220417150153: epoch=4 step=676 epoch=4 metrics={'time_sample_batch': 0.0003125230236166328, 'time_algorithm_update': 0.03070255849488388, 'temp_loss': 4.910370643322285, 'temp': 0.9733504307340588, 'alpha_loss': -18.513995684110203, 'alpha': 1.0605461971294246, 'critic_loss': 96.02602214926094, 'actor_loss': 1.7668769281996777, 'time_step': 0.03109772811979937, 'td_error': 1.0538215795571686, 'init_value': -5.782269477844238, 'ave_value': -4.663226873090675} step=676
2022-04-17 15:02.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417150153/model_676.pt


Epoch 5/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 15:02.21 [info     ] CQL_20220417150153: epoch=5 step=845 epoch=5 metrics={'time_sample_batch': 0.0003182930353830552, 'time_algorithm_update': 0.031189943911761223, 'temp_loss': 4.877369742421709, 'temp': 0.9660105941563668, 'alpha_loss': -18.835424490934294, 'alpha': 1.0789131292930016, 'critic_loss': 136.30167655267658, 'actor_loss': 1.9627602911559787, 'time_step': 0.03159078761670717, 'td_error': 1.0610386333434934, 'init_value': -5.713486671447754, 'ave_value': -4.625064666346387} step=845
2022-04-17 15:02.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417150153/model_845.pt


Epoch 6/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 15:02.26 [info     ] CQL_20220417150153: epoch=6 step=1014 epoch=6 metrics={'time_sample_batch': 0.00031551524732239855, 'time_algorithm_update': 0.031240134549564157, 'temp_loss': 4.839939306473591, 'temp': 0.9587545183283337, 'alpha_loss': -19.17011337731717, 'alpha': 1.0977906531836155, 'critic_loss': 190.01093775280833, 'actor_loss': 1.867015212950622, 'time_step': 0.03163892136523004, 'td_error': 1.0132339626710634, 'init_value': -5.127198219299316, 'ave_value': -4.243586193441271} step=1014
2022-04-17 15:02.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417150153/model_1014.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.00000000e

Epoch 1/10:   0%|          | 0/77 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-17 15:02.27 [info     ] FQE_20220417150227: epoch=1 step=77 epoch=1 metrics={'time_sample_batch': 0.00017793147594897778, 'time_algorithm_update': 0.004253300753506747, 'loss': 0.01266976875289307, 'time_step': 0.004507436380757914, 'init_value': -0.30654463171958923, 'ave_value': -0.28290208364660674, 'soft_opc': nan} step=77




2022-04-17 15:02.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150227/model_77.pt


Epoch 2/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:02.27 [info     ] FQE_20220417150227: epoch=2 step=154 epoch=2 metrics={'time_sample_batch': 0.0001653231583632432, 'time_algorithm_update': 0.00414260331686441, 'loss': 0.00953823219162303, 'time_step': 0.0043954477681742085, 'init_value': -0.467510461807251, 'ave_value': -0.41292362321872966, 'soft_opc': nan} step=154




2022-04-17 15:02.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150227/model_154.pt


Epoch 3/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:02.28 [info     ] FQE_20220417150227: epoch=3 step=231 epoch=3 metrics={'time_sample_batch': 0.000158817737133472, 'time_algorithm_update': 0.004155803036380124, 'loss': 0.00781723368888745, 'time_step': 0.004383204819320084, 'init_value': -0.5636094212532043, 'ave_value': -0.4742537543848828, 'soft_opc': nan} step=231




2022-04-17 15:02.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150227/model_231.pt


Epoch 4/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:02.28 [info     ] FQE_20220417150227: epoch=4 step=308 epoch=4 metrics={'time_sample_batch': 0.00014835828310483462, 'time_algorithm_update': 0.0038252000684862015, 'loss': 0.006093706373222076, 'time_step': 0.004040838836075424, 'init_value': -0.6896295547485352, 'ave_value': -0.6017190059563061, 'soft_opc': nan} step=308




2022-04-17 15:02.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150227/model_308.pt


Epoch 5/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:02.28 [info     ] FQE_20220417150227: epoch=5 step=385 epoch=5 metrics={'time_sample_batch': 0.00016990884557946935, 'time_algorithm_update': 0.004398104432341341, 'loss': 0.00568206260258985, 'time_step': 0.0046521378802014636, 'init_value': -0.6587453484535217, 'ave_value': -0.5354635991625958, 'soft_opc': nan} step=385




2022-04-17 15:02.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150227/model_385.pt


Epoch 6/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:02.29 [info     ] FQE_20220417150227: epoch=6 step=462 epoch=6 metrics={'time_sample_batch': 0.00016203793612393466, 'time_algorithm_update': 0.004424098250153777, 'loss': 0.005556123790802893, 'time_step': 0.004671561253535283, 'init_value': -0.7002733945846558, 'ave_value': -0.5547207765482568, 'soft_opc': nan} step=462




2022-04-17 15:02.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150227/model_462.pt


Epoch 7/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:02.29 [info     ] FQE_20220417150227: epoch=7 step=539 epoch=7 metrics={'time_sample_batch': 0.00016893659319196428, 'time_algorithm_update': 0.004239042083938401, 'loss': 0.005000916506200061, 'time_step': 0.004489784116868849, 'init_value': -0.7265254259109497, 'ave_value': -0.5595874556952769, 'soft_opc': nan} step=539




2022-04-17 15:02.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150227/model_539.pt


Epoch 8/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:02.30 [info     ] FQE_20220417150227: epoch=8 step=616 epoch=8 metrics={'time_sample_batch': 0.00017347273888526025, 'time_algorithm_update': 0.004238249419571518, 'loss': 0.004728862184232899, 'time_step': 0.00451554570879255, 'init_value': -0.774499773979187, 'ave_value': -0.5960899263083398, 'soft_opc': nan} step=616




2022-04-17 15:02.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150227/model_616.pt


Epoch 9/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:02.30 [info     ] FQE_20220417150227: epoch=9 step=693 epoch=9 metrics={'time_sample_batch': 0.00016457693917410715, 'time_algorithm_update': 0.004218101501464844, 'loss': 0.004521503563752615, 'time_step': 0.0044545012635070005, 'init_value': -0.7553375363349915, 'ave_value': -0.573287095961807, 'soft_opc': nan} step=693




2022-04-17 15:02.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150227/model_693.pt


Epoch 10/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:02.30 [info     ] FQE_20220417150227: epoch=10 step=770 epoch=10 metrics={'time_sample_batch': 0.00016677844059931769, 'time_algorithm_update': 0.004394051316496614, 'loss': 0.00454005975179471, 'time_step': 0.004645050346077263, 'init_value': -0.7769871950149536, 'ave_value': -0.5753201826332925, 'soft_opc': nan} step=770




2022-04-17 15:02.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150227/model_770.pt
start
[ 0.00000000e+00  7.95731469e+08 -2.55189108e-01  2.54000047e-02
  2.57999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  3.30457556e-01 -3.26640595e-01  6.00000000e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.22789108e-01 -1.71999953e-02
  2.39998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.18067885e-01 -4.36092621e-01  6.00000000e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.89891077e-02  1.38000047e-02
  1.55999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.13215135e-01  3.00794900e-01]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.39310892e-01 -5.61999953e-02
 -4.10001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.81283783e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.95731469

Epoch 1/10:   0%|          | 0/77 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-17 15:02.31 [info     ] FQE_20220417150231: epoch=1 step=77 epoch=1 metrics={'time_sample_batch': 0.00015446737215116425, 'time_algorithm_update': 0.003959250140499759, 'loss': 0.010371808023131514, 'time_step': 0.004177839725048511, 'init_value': -0.18656080961227417, 'ave_value': -0.1485327440657997, 'soft_opc': nan} step=77




2022-04-17 15:02.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150231/model_77.pt


Epoch 2/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:02.31 [info     ] FQE_20220417150231: epoch=2 step=154 epoch=2 metrics={'time_sample_batch': 0.00017307950304700181, 'time_algorithm_update': 0.004220894404820034, 'loss': 0.006576174978989285, 'time_step': 0.004493385166316838, 'init_value': -0.24552670121192932, 'ave_value': -0.17998659121761987, 'soft_opc': nan} step=154




2022-04-17 15:02.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150231/model_154.pt


Epoch 3/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:02.32 [info     ] FQE_20220417150231: epoch=3 step=231 epoch=3 metrics={'time_sample_batch': 0.00017485370883693944, 'time_algorithm_update': 0.004455086472746614, 'loss': 0.004572887926108458, 'time_step': 0.004738244143399325, 'init_value': -0.27370935678482056, 'ave_value': -0.19919690602198914, 'soft_opc': nan} step=231




2022-04-17 15:02.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150231/model_231.pt


Epoch 4/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:02.32 [info     ] FQE_20220417150231: epoch=4 step=308 epoch=4 metrics={'time_sample_batch': 0.00017156848659763088, 'time_algorithm_update': 0.004339555641273399, 'loss': 0.003683248060066011, 'time_step': 0.004596914563860212, 'init_value': -0.26501476764678955, 'ave_value': -0.19602319634611812, 'soft_opc': nan} step=308




2022-04-17 15:02.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150231/model_308.pt


Epoch 5/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:02.33 [info     ] FQE_20220417150231: epoch=5 step=385 epoch=5 metrics={'time_sample_batch': 0.00015873103946834416, 'time_algorithm_update': 0.003951490699470817, 'loss': 0.0032683824954022256, 'time_step': 0.004194141982437729, 'init_value': -0.24974775314331055, 'ave_value': -0.18497919863643678, 'soft_opc': nan} step=385




2022-04-17 15:02.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150231/model_385.pt


Epoch 6/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:02.33 [info     ] FQE_20220417150231: epoch=6 step=462 epoch=6 metrics={'time_sample_batch': 0.0001668465601933467, 'time_algorithm_update': 0.004264503330379338, 'loss': 0.0031158330429998504, 'time_step': 0.004506213324410575, 'init_value': -0.27495452761650085, 'ave_value': -0.20847254878933633, 'soft_opc': nan} step=462




2022-04-17 15:02.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150231/model_462.pt


Epoch 7/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:02.33 [info     ] FQE_20220417150231: epoch=7 step=539 epoch=7 metrics={'time_sample_batch': 0.0001629823214047915, 'time_algorithm_update': 0.004252018866600928, 'loss': 0.0027000803391558008, 'time_step': 0.00449128584428267, 'init_value': -0.26965004205703735, 'ave_value': -0.21614497813924743, 'soft_opc': nan} step=539




2022-04-17 15:02.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150231/model_539.pt


Epoch 8/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:02.34 [info     ] FQE_20220417150231: epoch=8 step=616 epoch=8 metrics={'time_sample_batch': 0.00016575045399851613, 'time_algorithm_update': 0.0042749287246109605, 'loss': 0.0024734721246913267, 'time_step': 0.004506538440654804, 'init_value': -0.2608543038368225, 'ave_value': -0.2019153251337844, 'soft_opc': nan} step=616




2022-04-17 15:02.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150231/model_616.pt


Epoch 9/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:02.34 [info     ] FQE_20220417150231: epoch=9 step=693 epoch=9 metrics={'time_sample_batch': 0.00016540366333800478, 'time_algorithm_update': 0.004541994689346908, 'loss': 0.0024295327965738614, 'time_step': 0.004782038849669617, 'init_value': -0.2716020941734314, 'ave_value': -0.219265153633246, 'soft_opc': nan} step=693




2022-04-17 15:02.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150231/model_693.pt


Epoch 10/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:02.34 [info     ] FQE_20220417150231: epoch=10 step=770 epoch=10 metrics={'time_sample_batch': 0.00016407223490925578, 'time_algorithm_update': 0.0041666990750795834, 'loss': 0.00236925560460946, 'time_step': 0.004401857202703302, 'init_value': -0.2933236360549927, 'ave_value': -0.24417155636726198, 'soft_opc': nan} step=770




2022-04-17 15:02.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150231/model_770.pt
search iteration:  17
using hyper params:  [0.00949602426661441, 0.009381639039016527, 8.087059824696625e-05, 7]
2022-04-17 15:02.35 [debug    ] RoundIterator is selected.
2022-04-17 15:02.35 [info     ] Directory is created at d3rlpy_logs/CQL_20220417150235
2022-04-17 15:02.35 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-17 15:02.35 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-17 15:02.35 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220417150235/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': True, 'dropout_rate': 0.2, 'use_dense': False}}, 'actor_learning_rate': 0.00949602426661441, 'actor_optim_factory': {'optim_cls

Epoch 1/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 15:02.39 [info     ] CQL_20220417150235: epoch=1 step=169 epoch=1 metrics={'time_sample_batch': 0.00032118650583120494, 'time_algorithm_update': 0.024932845809755945, 'temp_loss': 4.851120384487174, 'temp': 0.993031900309952, 'alpha_loss': -21.8110829415406, 'alpha': 1.0090550536940084, 'critic_loss': 109.38125348514353, 'actor_loss': 2.625426816825683, 'time_step': 0.025320789517735587, 'td_error': 0.9144799022696579, 'init_value': -8.21080207824707, 'ave_value': -6.8790434998864525} step=169
2022-04-17 15:02.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417150235/model_169.pt


Epoch 2/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 15:02.43 [info     ] CQL_20220417150235: epoch=2 step=338 epoch=2 metrics={'time_sample_batch': 0.0003052166933138695, 'time_algorithm_update': 0.024111663096049833, 'temp_loss': 4.926771530738244, 'temp': 0.9793596599228989, 'alpha_loss': -19.621631915752705, 'alpha': 1.025468729656829, 'critic_loss': 108.47717565051197, 'actor_loss': 5.134510562264707, 'time_step': 0.024483960055740626, 'td_error': 0.9060618129498356, 'init_value': -10.116050720214844, 'ave_value': -8.726891375984158} step=338
2022-04-17 15:02.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417150235/model_338.pt


Epoch 3/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 15:02.49 [info     ] CQL_20220417150235: epoch=3 step=507 epoch=3 metrics={'time_sample_batch': 0.00035645270488671296, 'time_algorithm_update': 0.03086423309597038, 'temp_loss': 4.862604451602732, 'temp': 0.9660472580667078, 'alpha_loss': -19.358215806046886, 'alpha': 1.0418447129119783, 'critic_loss': 159.20844661554642, 'actor_loss': 6.452600202616855, 'time_step': 0.03129783043494591, 'td_error': 1.087683169635499, 'init_value': -12.215757369995117, 'ave_value': -10.398279260643967} step=507
2022-04-17 15:02.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417150235/model_507.pt


Epoch 4/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 15:02.54 [info     ] CQL_20220417150235: epoch=4 step=676 epoch=4 metrics={'time_sample_batch': 0.0003564188466269589, 'time_algorithm_update': 0.031125592056816146, 'temp_loss': 4.803873335821389, 'temp': 0.9530529520920747, 'alpha_loss': -19.172358033219737, 'alpha': 1.058709656698464, 'critic_loss': 234.23757176145293, 'actor_loss': 7.254431315427701, 'time_step': 0.03156434149431759, 'td_error': 1.2091859949663197, 'init_value': -13.100744247436523, 'ave_value': -11.36033359727344} step=676
2022-04-17 15:02.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417150235/model_676.pt


Epoch 5/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 15:03.00 [info     ] CQL_20220417150235: epoch=5 step=845 epoch=5 metrics={'time_sample_batch': 0.0003746289473313552, 'time_algorithm_update': 0.030542032253107376, 'temp_loss': 4.742452122050629, 'temp': 0.9403325858906176, 'alpha_loss': -19.09055416400616, 'alpha': 1.0760512556550066, 'critic_loss': 337.1198741303393, 'actor_loss': 7.364900972716201, 'time_step': 0.031000265708336465, 'td_error': 1.13996746916265, 'init_value': -11.61423110961914, 'ave_value': -10.013789086255942} step=845
2022-04-17 15:03.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417150235/model_845.pt


Epoch 6/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 15:03.05 [info     ] CQL_20220417150235: epoch=6 step=1014 epoch=6 metrics={'time_sample_batch': 0.0003679094935309957, 'time_algorithm_update': 0.03079085660404002, 'temp_loss': 4.684212899066993, 'temp': 0.9278534414500175, 'alpha_loss': -19.209222353421726, 'alpha': 1.0938769099275036, 'critic_loss': 469.78514812401767, 'actor_loss': 6.484162914682422, 'time_step': 0.03124249334166036, 'td_error': 1.104807068881489, 'init_value': -9.70817756652832, 'ave_value': -8.629318408235774} step=1014
2022-04-17 15:03.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417150235/model_1014.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.00000000e-01

Epoch 1/10:   0%|          | 0/77 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-17 15:03.06 [info     ] FQE_20220417150305: epoch=1 step=77 epoch=1 metrics={'time_sample_batch': 0.00017691897107409194, 'time_algorithm_update': 0.004530197614199156, 'loss': 0.007358035870960781, 'time_step': 0.004791213320447253, 'init_value': -0.3609863817691803, 'ave_value': -0.32670712860989143, 'soft_opc': nan} step=77




2022-04-17 15:03.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150305/model_77.pt


Epoch 2/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:03.06 [info     ] FQE_20220417150305: epoch=2 step=154 epoch=2 metrics={'time_sample_batch': 0.00017748869858778916, 'time_algorithm_update': 0.004408554597334428, 'loss': 0.0051140600252993305, 'time_step': 0.004673332362980037, 'init_value': -0.48563894629478455, 'ave_value': -0.4118163561163185, 'soft_opc': nan} step=154




2022-04-17 15:03.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150305/model_154.pt


Epoch 3/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:03.06 [info     ] FQE_20220417150305: epoch=3 step=231 epoch=3 metrics={'time_sample_batch': 0.00015789502626889712, 'time_algorithm_update': 0.0040567819174233966, 'loss': 0.003919130901793961, 'time_step': 0.00429503329388507, 'init_value': -0.579077422618866, 'ave_value': -0.4690531268782981, 'soft_opc': nan} step=231




2022-04-17 15:03.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150305/model_231.pt


Epoch 4/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:03.07 [info     ] FQE_20220417150305: epoch=4 step=308 epoch=4 metrics={'time_sample_batch': 0.00016117095947265625, 'time_algorithm_update': 0.004134432061926111, 'loss': 0.0030648407205794152, 'time_step': 0.004374730122553838, 'init_value': -0.6108182072639465, 'ave_value': -0.4894607978763881, 'soft_opc': nan} step=308




2022-04-17 15:03.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150305/model_308.pt


Epoch 5/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:03.07 [info     ] FQE_20220417150305: epoch=5 step=385 epoch=5 metrics={'time_sample_batch': 0.0001761417884331245, 'time_algorithm_update': 0.004405876258751015, 'loss': 0.0028825284015121205, 'time_step': 0.004661414530370143, 'init_value': -0.5975546836853027, 'ave_value': -0.4430664755672485, 'soft_opc': nan} step=385




2022-04-17 15:03.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150305/model_385.pt


Epoch 6/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:03.08 [info     ] FQE_20220417150305: epoch=6 step=462 epoch=6 metrics={'time_sample_batch': 0.00015717048149604303, 'time_algorithm_update': 0.004043219925521256, 'loss': 0.0027948063888962008, 'time_step': 0.004275938133140663, 'init_value': -0.6663700938224792, 'ave_value': -0.47921271863150167, 'soft_opc': nan} step=462




2022-04-17 15:03.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150305/model_462.pt


Epoch 7/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:03.08 [info     ] FQE_20220417150305: epoch=7 step=539 epoch=7 metrics={'time_sample_batch': 0.00016579999552144631, 'time_algorithm_update': 0.004298950170541739, 'loss': 0.00264776367754615, 'time_step': 0.004537204643348594, 'init_value': -0.7319265604019165, 'ave_value': -0.5152542825754698, 'soft_opc': nan} step=539




2022-04-17 15:03.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150305/model_539.pt


Epoch 8/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:03.08 [info     ] FQE_20220417150305: epoch=8 step=616 epoch=8 metrics={'time_sample_batch': 0.00015401530575442625, 'time_algorithm_update': 0.0038694685155695133, 'loss': 0.0025240967047170967, 'time_step': 0.004100976052222314, 'init_value': -0.7853091359138489, 'ave_value': -0.5478172751495967, 'soft_opc': nan} step=616




2022-04-17 15:03.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150305/model_616.pt


Epoch 9/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:03.09 [info     ] FQE_20220417150305: epoch=9 step=693 epoch=9 metrics={'time_sample_batch': 0.00016445927805714794, 'time_algorithm_update': 0.004144959635548778, 'loss': 0.0024123057970994867, 'time_step': 0.004397568764624658, 'init_value': -0.7897553443908691, 'ave_value': -0.5418032226597403, 'soft_opc': nan} step=693




2022-04-17 15:03.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150305/model_693.pt


Epoch 10/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:03.09 [info     ] FQE_20220417150305: epoch=10 step=770 epoch=10 metrics={'time_sample_batch': 0.0001741198750285359, 'time_algorithm_update': 0.004496880940028599, 'loss': 0.0024628862194329888, 'time_step': 0.004759457204248998, 'init_value': -0.8450596332550049, 'ave_value': -0.5685102467869854, 'soft_opc': nan} step=770




2022-04-17 15:03.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150305/model_770.pt
start
[ 0.00000000e+00  7.95731469e+08 -2.55189108e-01  2.54000047e-02
  2.57999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  3.30457556e-01 -3.26640595e-01  6.00000000e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.22789108e-01 -1.71999953e-02
  2.39998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.18067885e-01 -4.36092621e-01  6.00000000e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.89891077e-02  1.38000047e-02
  1.55999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.13215135e-01  3.00794900e-01]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.39310892e-01 -5.61999953e-02
 -4.10001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.81283783e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.95731469

Epoch 1/10:   0%|          | 0/77 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-17 15:03.10 [info     ] FQE_20220417150309: epoch=1 step=77 epoch=1 metrics={'time_sample_batch': 0.0001713022009118811, 'time_algorithm_update': 0.004304254209840453, 'loss': 0.009502838807204714, 'time_step': 0.004554457478709035, 'init_value': -0.3907187879085541, 'ave_value': -0.3710210823126741, 'soft_opc': nan} step=77




2022-04-17 15:03.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150309/model_77.pt


Epoch 2/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:03.10 [info     ] FQE_20220417150309: epoch=2 step=154 epoch=2 metrics={'time_sample_batch': 0.00017364613421551594, 'time_algorithm_update': 0.0044176857192795, 'loss': 0.006564516725333093, 'time_step': 0.004678998674665179, 'init_value': -0.47212281823158264, 'ave_value': -0.43194631334092165, 'soft_opc': nan} step=154




2022-04-17 15:03.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150309/model_154.pt


Epoch 3/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:03.10 [info     ] FQE_20220417150309: epoch=3 step=231 epoch=3 metrics={'time_sample_batch': 0.000179290771484375, 'time_algorithm_update': 0.004562213823392794, 'loss': 0.00515068394688333, 'time_step': 0.0048637978442303545, 'init_value': -0.5075687170028687, 'ave_value': -0.4493557406841098, 'soft_opc': nan} step=231




2022-04-17 15:03.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150309/model_231.pt


Epoch 4/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:03.11 [info     ] FQE_20220417150309: epoch=4 step=308 epoch=4 metrics={'time_sample_batch': 0.0001663046997862977, 'time_algorithm_update': 0.00416623152695693, 'loss': 0.004069827309531438, 'time_step': 0.004415747407194856, 'init_value': -0.518727719783783, 'ave_value': -0.4555298147840543, 'soft_opc': nan} step=308




2022-04-17 15:03.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150309/model_308.pt


Epoch 5/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:03.11 [info     ] FQE_20220417150309: epoch=5 step=385 epoch=5 metrics={'time_sample_batch': 0.0001792319409258954, 'time_algorithm_update': 0.004366580542031821, 'loss': 0.0037807497852058573, 'time_step': 0.004642186226782861, 'init_value': -0.5120471715927124, 'ave_value': -0.434645500295871, 'soft_opc': nan} step=385




2022-04-17 15:03.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150309/model_385.pt


Epoch 6/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:03.12 [info     ] FQE_20220417150309: epoch=6 step=462 epoch=6 metrics={'time_sample_batch': 0.00017208557624321478, 'time_algorithm_update': 0.004229579653058734, 'loss': 0.0034505101782572154, 'time_step': 0.004486644422853148, 'init_value': -0.5568424463272095, 'ave_value': -0.47556473011369105, 'soft_opc': nan} step=462




2022-04-17 15:03.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150309/model_462.pt


Epoch 7/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:03.12 [info     ] FQE_20220417150309: epoch=7 step=539 epoch=7 metrics={'time_sample_batch': 0.0001682553972516741, 'time_algorithm_update': 0.004170866755696086, 'loss': 0.0030783972681754017, 'time_step': 0.0044076195010891205, 'init_value': -0.5962565541267395, 'ave_value': -0.4977815894661723, 'soft_opc': nan} step=539




2022-04-17 15:03.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150309/model_539.pt


Epoch 8/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:03.12 [info     ] FQE_20220417150309: epoch=8 step=616 epoch=8 metrics={'time_sample_batch': 0.00016358610871550325, 'time_algorithm_update': 0.004246851066490273, 'loss': 0.0028547064624968675, 'time_step': 0.004484309778585062, 'init_value': -0.6298821568489075, 'ave_value': -0.5220326360952747, 'soft_opc': nan} step=616




2022-04-17 15:03.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150309/model_616.pt


Epoch 9/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:03.13 [info     ] FQE_20220417150309: epoch=9 step=693 epoch=9 metrics={'time_sample_batch': 0.00015579260788954697, 'time_algorithm_update': 0.003982655413739093, 'loss': 0.002659580166838676, 'time_step': 0.004208029090584099, 'init_value': -0.6172506213188171, 'ave_value': -0.5128116554102382, 'soft_opc': nan} step=693




2022-04-17 15:03.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150309/model_693.pt


Epoch 10/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:03.13 [info     ] FQE_20220417150309: epoch=10 step=770 epoch=10 metrics={'time_sample_batch': 0.00016546868658685066, 'time_algorithm_update': 0.004222823427869128, 'loss': 0.002645008045236592, 'time_step': 0.004469787919676149, 'init_value': -0.6372475028038025, 'ave_value': -0.5225691927043168, 'soft_opc': nan} step=770




2022-04-17 15:03.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150309/model_770.pt
search iteration:  18
using hyper params:  [0.001580779792392886, 0.005156922125841681, 4.693326018957822e-05, 7]
2022-04-17 15:03.13 [debug    ] RoundIterator is selected.
2022-04-17 15:03.13 [info     ] Directory is created at d3rlpy_logs/CQL_20220417150313
2022-04-17 15:03.13 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-17 15:03.13 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-17 15:03.13 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220417150313/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': True, 'dropout_rate': 0.2, 'use_dense': False}}, 'actor_learning_rate': 0.001580779792392886, 'actor_optim_factory': {'optim_c

Epoch 1/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 15:03.19 [info     ] CQL_20220417150313: epoch=1 step=169 epoch=1 metrics={'time_sample_batch': 0.0003618672049257177, 'time_algorithm_update': 0.031144293102286977, 'temp_loss': 4.608243170574572, 'temp': 0.99582773905534, 'alpha_loss': -19.413406259209445, 'alpha': 1.008895338639705, 'critic_loss': 125.21329529892058, 'actor_loss': 1.4675909406202432, 'time_step': 0.0315860051375169, 'td_error': 1.2645973747192087, 'init_value': -8.705984115600586, 'ave_value': -6.963769374291102} step=169
2022-04-17 15:03.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417150313/model_169.pt


Epoch 2/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 15:03.24 [info     ] CQL_20220417150313: epoch=2 step=338 epoch=2 metrics={'time_sample_batch': 0.00036800965754943486, 'time_algorithm_update': 0.030665091508944357, 'temp_loss': 4.9252915918474365, 'temp': 0.9875622903101543, 'alpha_loss': -18.181829915244197, 'alpha': 1.0258168073800893, 'critic_loss': 95.57009332419852, 'actor_loss': 4.853064425598235, 'time_step': 0.031109460006804156, 'td_error': 1.3702819554347894, 'init_value': -11.042887687683105, 'ave_value': -9.262725951070184} step=338
2022-04-17 15:03.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417150313/model_338.pt


Epoch 3/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 15:03.30 [info     ] CQL_20220417150313: epoch=3 step=507 epoch=3 metrics={'time_sample_batch': 0.00037986428074582795, 'time_algorithm_update': 0.030099208538348857, 'temp_loss': 4.931258345496725, 'temp': 0.9795232796104703, 'alpha_loss': -18.323428012915617, 'alpha': 1.0426955018523176, 'critic_loss': 134.9539404423279, 'actor_loss': 6.777183572216146, 'time_step': 0.03055996725545127, 'td_error': 1.4325966380353075, 'init_value': -13.445391654968262, 'ave_value': -11.046797862525459} step=507
2022-04-17 15:03.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417150313/model_507.pt


Epoch 4/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 15:03.35 [info     ] CQL_20220417150313: epoch=4 step=676 epoch=4 metrics={'time_sample_batch': 0.0003565528689051521, 'time_algorithm_update': 0.02927630046415611, 'temp_loss': 4.902130098737908, 'temp': 0.9716671994451941, 'alpha_loss': -18.586695281711556, 'alpha': 1.0602885999623135, 'critic_loss': 190.31820579393374, 'actor_loss': 8.812665061837823, 'time_step': 0.029705888420872434, 'td_error': 1.4983531348079504, 'init_value': -15.51966381072998, 'ave_value': -13.1324400507652} step=676
2022-04-17 15:03.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417150313/model_676.pt


Epoch 5/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 15:03.40 [info     ] CQL_20220417150313: epoch=5 step=845 epoch=5 metrics={'time_sample_batch': 0.00036998754422340165, 'time_algorithm_update': 0.030893778659888273, 'temp_loss': 4.867128335512602, 'temp': 0.9639546331569288, 'alpha_loss': -18.99179816387109, 'alpha': 1.0785201689195352, 'critic_loss': 259.7380140857584, 'actor_loss': 10.107742241853792, 'time_step': 0.03134234276043593, 'td_error': 1.7376140076903173, 'init_value': -17.154542922973633, 'ave_value': -14.628297218932762} step=845
2022-04-17 15:03.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417150313/model_845.pt


Epoch 6/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 15:03.45 [info     ] CQL_20220417150313: epoch=6 step=1014 epoch=6 metrics={'time_sample_batch': 0.00038205660306490387, 'time_algorithm_update': 0.030022285393709262, 'temp_loss': 4.829853658845439, 'temp': 0.9563615505511944, 'alpha_loss': -19.34399906135875, 'alpha': 1.0973277804414197, 'critic_loss': 345.7749492938702, 'actor_loss': 10.94888741871309, 'time_step': 0.030481243980001415, 'td_error': 1.6536866113128958, 'init_value': -17.557754516601562, 'ave_value': -14.963156219301997} step=1014
2022-04-17 15:03.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417150313/model_1014.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.0000000

Epoch 1/10:   0%|          | 0/77 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-17 15:03.46 [info     ] FQE_20220417150346: epoch=1 step=77 epoch=1 metrics={'time_sample_batch': 0.00016991813461501878, 'time_algorithm_update': 0.00424099587774896, 'loss': 0.007334256336068759, 'time_step': 0.004493487345707881, 'init_value': 0.04128097742795944, 'ave_value': 0.07938074731679114, 'soft_opc': nan} step=77




2022-04-17 15:03.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150346/model_77.pt


Epoch 2/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:03.46 [info     ] FQE_20220417150346: epoch=2 step=154 epoch=2 metrics={'time_sample_batch': 0.0001660477031360973, 'time_algorithm_update': 0.00413824056650137, 'loss': 0.004827198932286014, 'time_step': 0.004375055238798067, 'init_value': -0.052446842193603516, 'ave_value': 0.020816852704428873, 'soft_opc': nan} step=154




2022-04-17 15:03.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150346/model_154.pt


Epoch 3/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:03.47 [info     ] FQE_20220417150346: epoch=3 step=231 epoch=3 metrics={'time_sample_batch': 0.00015702185692725244, 'time_algorithm_update': 0.004040083327850738, 'loss': 0.003487707041627304, 'time_step': 0.0042809418269566125, 'init_value': -0.12571214139461517, 'ave_value': -0.01775031402824564, 'soft_opc': nan} step=231




2022-04-17 15:03.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150346/model_231.pt


Epoch 4/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:03.47 [info     ] FQE_20220417150346: epoch=4 step=308 epoch=4 metrics={'time_sample_batch': 0.00016390193592418325, 'time_algorithm_update': 0.004070727856128247, 'loss': 0.0028923741517612686, 'time_step': 0.004318076294737977, 'init_value': -0.15805266797542572, 'ave_value': -0.04001702174275845, 'soft_opc': nan} step=308




2022-04-17 15:03.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150346/model_308.pt


Epoch 5/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:03.48 [info     ] FQE_20220417150346: epoch=5 step=385 epoch=5 metrics={'time_sample_batch': 0.00017052501827091366, 'time_algorithm_update': 0.004302752482426631, 'loss': 0.002657195116248127, 'time_step': 0.004574877875191825, 'init_value': -0.187529057264328, 'ave_value': -0.037128624716891094, 'soft_opc': nan} step=385




2022-04-17 15:03.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150346/model_385.pt


Epoch 6/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:03.48 [info     ] FQE_20220417150346: epoch=6 step=462 epoch=6 metrics={'time_sample_batch': 0.0001832386115928749, 'time_algorithm_update': 0.004596316969239867, 'loss': 0.002596314624556667, 'time_step': 0.0048772855238481, 'init_value': -0.21301184594631195, 'ave_value': -0.036748165303321034, 'soft_opc': nan} step=462




2022-04-17 15:03.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150346/model_462.pt


Epoch 7/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:03.48 [info     ] FQE_20220417150346: epoch=7 step=539 epoch=7 metrics={'time_sample_batch': 0.000162700553993126, 'time_algorithm_update': 0.0040393278196260525, 'loss': 0.0024267710176865, 'time_step': 0.00426846355586857, 'init_value': -0.25967422127723694, 'ave_value': -0.05322334132249559, 'soft_opc': nan} step=539




2022-04-17 15:03.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150346/model_539.pt


Epoch 8/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:03.49 [info     ] FQE_20220417150346: epoch=8 step=616 epoch=8 metrics={'time_sample_batch': 0.00015887966403713475, 'time_algorithm_update': 0.004015996858671114, 'loss': 0.0022726519768090023, 'time_step': 0.0042541553447772934, 'init_value': -0.3275204002857208, 'ave_value': -0.09440307971593496, 'soft_opc': nan} step=616




2022-04-17 15:03.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150346/model_616.pt


Epoch 9/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:03.49 [info     ] FQE_20220417150346: epoch=9 step=693 epoch=9 metrics={'time_sample_batch': 0.00017177903807008422, 'time_algorithm_update': 0.004369178375640473, 'loss': 0.0021637489493065453, 'time_step': 0.004612699731603846, 'init_value': -0.33252573013305664, 'ave_value': -0.09171695412715544, 'soft_opc': nan} step=693




2022-04-17 15:03.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150346/model_693.pt


Epoch 10/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:03.50 [info     ] FQE_20220417150346: epoch=10 step=770 epoch=10 metrics={'time_sample_batch': 0.00016492682617980165, 'time_algorithm_update': 0.004256610746507521, 'loss': 0.0021390170660232763, 'time_step': 0.0044924005285485995, 'init_value': -0.4013225734233856, 'ave_value': -0.13125152753179406, 'soft_opc': nan} step=770




2022-04-17 15:03.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150346/model_770.pt
start
[ 0.00000000e+00  7.95731469e+08 -2.55189108e-01  2.54000047e-02
  2.57999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  3.30457556e-01 -3.26640595e-01  6.00000000e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.22789108e-01 -1.71999953e-02
  2.39998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.18067885e-01 -4.36092621e-01  6.00000000e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.89891077e-02  1.38000047e-02
  1.55999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.13215135e-01  3.00794900e-01]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.39310892e-01 -5.61999953e-02
 -4.10001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.81283783e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.95731469

Epoch 1/10:   0%|          | 0/77 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-17 15:03.50 [info     ] FQE_20220417150350: epoch=1 step=77 epoch=1 metrics={'time_sample_batch': 0.00015844617571149553, 'time_algorithm_update': 0.003998412714376078, 'loss': 0.0012916141876031632, 'time_step': 0.0042300286231102885, 'init_value': 0.03659621626138687, 'ave_value': 0.04349964913431773, 'soft_opc': nan} step=77




2022-04-17 15:03.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150350/model_77.pt


Epoch 2/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:03.50 [info     ] FQE_20220417150350: epoch=2 step=154 epoch=2 metrics={'time_sample_batch': 0.00016873533075506037, 'time_algorithm_update': 0.004175000376515575, 'loss': 0.0007666991547342728, 'time_step': 0.004424621532489727, 'init_value': -0.008224607445299625, 'ave_value': 0.02006426905323793, 'soft_opc': nan} step=154




2022-04-17 15:03.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150350/model_154.pt


Epoch 3/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:03.51 [info     ] FQE_20220417150350: epoch=3 step=231 epoch=3 metrics={'time_sample_batch': 0.000165431530444653, 'time_algorithm_update': 0.004021217296649883, 'loss': 0.0006095083812538946, 'time_step': 0.004254687916148792, 'init_value': -0.05718667060136795, 'ave_value': -0.014097970297039897, 'soft_opc': nan} step=231




2022-04-17 15:03.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150350/model_231.pt


Epoch 4/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:03.51 [info     ] FQE_20220417150350: epoch=4 step=308 epoch=4 metrics={'time_sample_batch': 0.00017737723016119622, 'time_algorithm_update': 0.00447061774018523, 'loss': 0.0005131165316334454, 'time_step': 0.004737900449083997, 'init_value': -0.07311463356018066, 'ave_value': -0.01889953967068462, 'soft_opc': nan} step=308




2022-04-17 15:03.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150350/model_308.pt


Epoch 5/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:03.52 [info     ] FQE_20220417150350: epoch=5 step=385 epoch=5 metrics={'time_sample_batch': 0.0001867127108883548, 'time_algorithm_update': 0.0046509674617222375, 'loss': 0.0004962104107721939, 'time_step': 0.004941206473808784, 'init_value': -0.09239441156387329, 'ave_value': -0.03126905438173479, 'soft_opc': nan} step=385




2022-04-17 15:03.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150350/model_385.pt


Epoch 6/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:03.52 [info     ] FQE_20220417150350: epoch=6 step=462 epoch=6 metrics={'time_sample_batch': 0.0001674534438492416, 'time_algorithm_update': 0.004382895184801771, 'loss': 0.0005254537895603058, 'time_step': 0.004639201350026317, 'init_value': -0.1385430097579956, 'ave_value': -0.0659387406649756, 'soft_opc': nan} step=462




2022-04-17 15:03.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150350/model_462.pt


Epoch 7/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:03.52 [info     ] FQE_20220417150350: epoch=7 step=539 epoch=7 metrics={'time_sample_batch': 0.00017613869208794135, 'time_algorithm_update': 0.0043230180616502635, 'loss': 0.0004598905529687786, 'time_step': 0.004593263972889294, 'init_value': -0.16443464159965515, 'ave_value': -0.08268021972417865, 'soft_opc': nan} step=539




2022-04-17 15:03.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150350/model_539.pt


Epoch 8/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:03.53 [info     ] FQE_20220417150350: epoch=8 step=616 epoch=8 metrics={'time_sample_batch': 0.00015876200292017553, 'time_algorithm_update': 0.003911300138993697, 'loss': 0.00040879624392293004, 'time_step': 0.004150406106725916, 'init_value': -0.19819988310337067, 'ave_value': -0.104402112562696, 'soft_opc': nan} step=616




2022-04-17 15:03.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150350/model_616.pt


Epoch 9/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:03.53 [info     ] FQE_20220417150350: epoch=9 step=693 epoch=9 metrics={'time_sample_batch': 0.00016739461329076197, 'time_algorithm_update': 0.004353464423836052, 'loss': 0.00035754244843748845, 'time_step': 0.004602342456966252, 'init_value': -0.20358368754386902, 'ave_value': -0.10565744653535034, 'soft_opc': nan} step=693




2022-04-17 15:03.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150350/model_693.pt


Epoch 10/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:03.54 [info     ] FQE_20220417150350: epoch=10 step=770 epoch=10 metrics={'time_sample_batch': 0.00016395147744711344, 'time_algorithm_update': 0.004383734294346401, 'loss': 0.00040306101162697176, 'time_step': 0.004625757019241135, 'init_value': -0.23845739662647247, 'ave_value': -0.12796161264214698, 'soft_opc': nan} step=770




2022-04-17 15:03.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150350/model_770.pt
search iteration:  19
using hyper params:  [0.005398049443405106, 0.0099702392714585, 1.454262116172138e-05, 7]
2022-04-17 15:03.54 [debug    ] RoundIterator is selected.
2022-04-17 15:03.54 [info     ] Directory is created at d3rlpy_logs/CQL_20220417150354
2022-04-17 15:03.54 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-17 15:03.54 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-17 15:03.54 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220417150354/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': True, 'dropout_rate': 0.2, 'use_dense': False}}, 'actor_learning_rate': 0.005398049443405106, 'actor_optim_factory': {'optim_cls

Epoch 1/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 15:03.59 [info     ] CQL_20220417150354: epoch=1 step=169 epoch=1 metrics={'time_sample_batch': 0.00036835670471191406, 'time_algorithm_update': 0.02957264911493606, 'temp_loss': 4.822600030334743, 'temp': 0.9987380233036696, 'alpha_loss': -20.614555302456285, 'alpha': 1.0088900553404228, 'critic_loss': 111.42990220651119, 'actor_loss': 2.8832984545527127, 'time_step': 0.030014836576563366, 'td_error': 1.0886419728684258, 'init_value': -8.650524139404297, 'ave_value': -7.417250721390183} step=169
2022-04-17 15:03.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417150354/model_169.pt


Epoch 2/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 15:04.04 [info     ] CQL_20220417150354: epoch=2 step=338 epoch=2 metrics={'time_sample_batch': 0.000379620219123434, 'time_algorithm_update': 0.030713920762553016, 'temp_loss': 5.019139351929433, 'temp': 0.9962273381165498, 'alpha_loss': -18.169054640820747, 'alpha': 1.0249631157993564, 'critic_loss': 104.30031080076681, 'actor_loss': 5.057378138310811, 'time_step': 0.031173037354057357, 'td_error': 1.5285337777649222, 'init_value': -11.238370895385742, 'ave_value': -9.723971272417018} step=338
2022-04-17 15:04.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417150354/model_338.pt


Epoch 3/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 15:04.10 [info     ] CQL_20220417150354: epoch=3 step=507 epoch=3 metrics={'time_sample_batch': 0.00036642396238428603, 'time_algorithm_update': 0.030435018990872174, 'temp_loss': 5.010975377799491, 'temp': 0.9937384431884133, 'alpha_loss': -18.271977892994176, 'alpha': 1.041266734783466, 'critic_loss': 156.98795359233426, 'actor_loss': 6.591647966373602, 'time_step': 0.03088184221256414, 'td_error': 1.3829245112622313, 'init_value': -12.244380950927734, 'ave_value': -10.447564411506997} step=507
2022-04-17 15:04.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417150354/model_507.pt


Epoch 4/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 15:04.15 [info     ] CQL_20220417150354: epoch=4 step=676 epoch=4 metrics={'time_sample_batch': 0.00036929909294173565, 'time_algorithm_update': 0.030813743376872947, 'temp_loss': 5.002173420945568, 'temp': 0.9912781048808578, 'alpha_loss': -18.602400661220212, 'alpha': 1.0584762978130544, 'critic_loss': 234.2917937329535, 'actor_loss': 7.4529318104128865, 'time_step': 0.03126404130247218, 'td_error': 1.3937641262611495, 'init_value': -12.613737106323242, 'ave_value': -10.863139529915543} step=676
2022-04-17 15:04.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417150354/model_676.pt


Epoch 5/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 15:04.21 [info     ] CQL_20220417150354: epoch=5 step=845 epoch=5 metrics={'time_sample_batch': 0.000370533508661936, 'time_algorithm_update': 0.03075774181523972, 'temp_loss': 4.989770017432038, 'temp': 0.9888350529783576, 'alpha_loss': -18.91801240458291, 'alpha': 1.0763956745700722, 'critic_loss': 337.145497519589, 'actor_loss': 7.496662946847769, 'time_step': 0.031206297451222435, 'td_error': 1.2275913357555612, 'init_value': -11.650822639465332, 'ave_value': -10.221067493026322} step=845
2022-04-17 15:04.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417150354/model_845.pt


Epoch 6/6:   0%|          | 0/169 [00:00<?, ?it/s]

2022-04-17 15:04.26 [info     ] CQL_20220417150354: epoch=6 step=1014 epoch=6 metrics={'time_sample_batch': 0.0003656085426285422, 'time_algorithm_update': 0.030268619751789162, 'temp_loss': 4.979379555177406, 'temp': 0.9864049663205119, 'alpha_loss': -19.154114424124273, 'alpha': 1.0948908977959988, 'critic_loss': 470.8638352614183, 'actor_loss': 6.5462380640605495, 'time_step': 0.030717466004501433, 'td_error': 1.1313483291331887, 'init_value': -9.410684585571289, 'ave_value': -8.493186917519784} step=1014
2022-04-17 15:04.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417150354/model_1014.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -1.85999953e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.58249611e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 101 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  8.00004692e-04
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.83681292e-02  6.00000000

Epoch 1/10:   0%|          | 0/77 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-17 15:04.26 [info     ] FQE_20220417150426: epoch=1 step=77 epoch=1 metrics={'time_sample_batch': 0.00016218036800235897, 'time_algorithm_update': 0.00408962175443575, 'loss': 0.009544787486990938, 'time_step': 0.0043247365332269045, 'init_value': -0.42578279972076416, 'ave_value': -0.3953517524374498, 'soft_opc': nan} step=77




2022-04-17 15:04.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150426/model_77.pt


Epoch 2/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:04.27 [info     ] FQE_20220417150426: epoch=2 step=154 epoch=2 metrics={'time_sample_batch': 0.0001795322864086597, 'time_algorithm_update': 0.004455142206959911, 'loss': 0.005962328474315537, 'time_step': 0.004741619159648945, 'init_value': -0.5132035613059998, 'ave_value': -0.43730156565974426, 'soft_opc': nan} step=154




2022-04-17 15:04.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150426/model_154.pt


Epoch 3/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:04.27 [info     ] FQE_20220417150426: epoch=3 step=231 epoch=3 metrics={'time_sample_batch': 0.00016377808211685775, 'time_algorithm_update': 0.004053666994169161, 'loss': 0.004703612928581122, 'time_step': 0.004294222051447088, 'init_value': -0.5638577938079834, 'ave_value': -0.465551059176256, 'soft_opc': nan} step=231




2022-04-17 15:04.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150426/model_231.pt


Epoch 4/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:04.28 [info     ] FQE_20220417150426: epoch=4 step=308 epoch=4 metrics={'time_sample_batch': 0.00017405794812487319, 'time_algorithm_update': 0.00448076446335037, 'loss': 0.004052289035896976, 'time_step': 0.004747319531131101, 'init_value': -0.606212854385376, 'ave_value': -0.5058157279684737, 'soft_opc': nan} step=308




2022-04-17 15:04.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150426/model_308.pt


Epoch 5/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:04.28 [info     ] FQE_20220417150426: epoch=5 step=385 epoch=5 metrics={'time_sample_batch': 0.00016793337735262784, 'time_algorithm_update': 0.00424504280090332, 'loss': 0.0038978689782587545, 'time_step': 0.004510990985028156, 'init_value': -0.5724453926086426, 'ave_value': -0.45075738048097036, 'soft_opc': nan} step=385




2022-04-17 15:04.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150426/model_385.pt


Epoch 6/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:04.29 [info     ] FQE_20220417150426: epoch=6 step=462 epoch=6 metrics={'time_sample_batch': 0.0001699305199957513, 'time_algorithm_update': 0.004476061115017185, 'loss': 0.003844310875752239, 'time_step': 0.004722731454031808, 'init_value': -0.6244493722915649, 'ave_value': -0.47552400293281755, 'soft_opc': nan} step=462




2022-04-17 15:04.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150426/model_462.pt


Epoch 7/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:04.29 [info     ] FQE_20220417150426: epoch=7 step=539 epoch=7 metrics={'time_sample_batch': 0.00015455097347110897, 'time_algorithm_update': 0.003988321725424234, 'loss': 0.003518457436788972, 'time_step': 0.004223386962692459, 'init_value': -0.6575307846069336, 'ave_value': -0.49375180390638274, 'soft_opc': nan} step=539




2022-04-17 15:04.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150426/model_539.pt


Epoch 8/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:04.29 [info     ] FQE_20220417150426: epoch=8 step=616 epoch=8 metrics={'time_sample_batch': 0.00015875271388462612, 'time_algorithm_update': 0.003917415420730393, 'loss': 0.003200376094863206, 'time_step': 0.004154285827240386, 'init_value': -0.6927079558372498, 'ave_value': -0.5179897573550005, 'soft_opc': nan} step=616




2022-04-17 15:04.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150426/model_616.pt


Epoch 9/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:04.30 [info     ] FQE_20220417150426: epoch=9 step=693 epoch=9 metrics={'time_sample_batch': 0.00018949632520799513, 'time_algorithm_update': 0.004446379550091632, 'loss': 0.0029825739390156292, 'time_step': 0.0047468457903180805, 'init_value': -0.6453095078468323, 'ave_value': -0.48294516230622925, 'soft_opc': nan} step=693




2022-04-17 15:04.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150426/model_693.pt


Epoch 10/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:04.30 [info     ] FQE_20220417150426: epoch=10 step=770 epoch=10 metrics={'time_sample_batch': 0.00015787954454298144, 'time_algorithm_update': 0.003994666136704482, 'loss': 0.003315562606099751, 'time_step': 0.0042401846353109784, 'init_value': -0.6818684935569763, 'ave_value': -0.498951595414195, 'soft_opc': nan} step=770




2022-04-17 15:04.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150426/model_770.pt
start
[ 0.00000000e+00  7.95731469e+08 -2.55189108e-01  2.54000047e-02
  2.57999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  3.30457556e-01 -3.26640595e-01  6.00000000e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.22789108e-01 -1.71999953e-02
  2.39998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.18067885e-01 -4.36092621e-01  6.00000000e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.89891077e-02  1.38000047e-02
  1.55999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.13215135e-01  3.00794900e-01]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.39310892e-01 -5.61999953e-02
 -4.10001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.81283783e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.95731469

Epoch 1/10:   0%|          | 0/77 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-17 15:04.31 [info     ] FQE_20220417150430: epoch=1 step=77 epoch=1 metrics={'time_sample_batch': 0.0001735439548244724, 'time_algorithm_update': 0.004469555693787414, 'loss': 0.009568599760290477, 'time_step': 0.0047395291266503275, 'init_value': -0.06917528808116913, 'ave_value': -0.04907061515401143, 'soft_opc': nan} step=77




2022-04-17 15:04.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150430/model_77.pt


Epoch 2/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:04.31 [info     ] FQE_20220417150430: epoch=2 step=154 epoch=2 metrics={'time_sample_batch': 0.00016047428180645038, 'time_algorithm_update': 0.004124879837036133, 'loss': 0.006291883510791443, 'time_step': 0.004368323784369927, 'init_value': -0.1260036677122116, 'ave_value': -0.08305045559557642, 'soft_opc': nan} step=154




2022-04-17 15:04.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150430/model_154.pt


Epoch 3/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:04.31 [info     ] FQE_20220417150430: epoch=3 step=231 epoch=3 metrics={'time_sample_batch': 0.00015867840160023081, 'time_algorithm_update': 0.004002506082708185, 'loss': 0.004688452605714465, 'time_step': 0.0042246626569079114, 'init_value': -0.17117756605148315, 'ave_value': -0.10709517795324057, 'soft_opc': nan} step=231




2022-04-17 15:04.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150430/model_231.pt


Epoch 4/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:04.32 [info     ] FQE_20220417150430: epoch=4 step=308 epoch=4 metrics={'time_sample_batch': 0.00015306472778320312, 'time_algorithm_update': 0.003811356309172395, 'loss': 0.0036835047860883856, 'time_step': 0.0040285773091502, 'init_value': -0.1685561090707779, 'ave_value': -0.10659280282336178, 'soft_opc': nan} step=308




2022-04-17 15:04.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150430/model_308.pt


Epoch 5/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:04.32 [info     ] FQE_20220417150430: epoch=5 step=385 epoch=5 metrics={'time_sample_batch': 0.00015103662168824828, 'time_algorithm_update': 0.0037599105339545707, 'loss': 0.0033381595986883166, 'time_step': 0.00398822883506874, 'init_value': -0.17103523015975952, 'ave_value': -0.113415540952806, 'soft_opc': nan} step=385




2022-04-17 15:04.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150430/model_385.pt


Epoch 6/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:04.32 [info     ] FQE_20220417150430: epoch=6 step=462 epoch=6 metrics={'time_sample_batch': 0.0001664657097358208, 'time_algorithm_update': 0.004194634301321847, 'loss': 0.0029280298909583648, 'time_step': 0.0044509652373078586, 'init_value': -0.1954234093427658, 'ave_value': -0.1472984076989395, 'soft_opc': nan} step=462




2022-04-17 15:04.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150430/model_462.pt


Epoch 7/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:04.33 [info     ] FQE_20220417150430: epoch=7 step=539 epoch=7 metrics={'time_sample_batch': 0.00016702614821396866, 'time_algorithm_update': 0.004126994640796216, 'loss': 0.0026457990132659286, 'time_step': 0.004373377019708807, 'init_value': -0.16500519216060638, 'ave_value': -0.13200020409180774, 'soft_opc': nan} step=539




2022-04-17 15:04.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150430/model_539.pt


Epoch 8/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:04.33 [info     ] FQE_20220417150430: epoch=8 step=616 epoch=8 metrics={'time_sample_batch': 0.00014556537974964488, 'time_algorithm_update': 0.003631297644082602, 'loss': 0.0025231461418647465, 'time_step': 0.003851509713507318, 'init_value': -0.19049590826034546, 'ave_value': -0.17517887367657176, 'soft_opc': nan} step=616




2022-04-17 15:04.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150430/model_616.pt


Epoch 9/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:04.34 [info     ] FQE_20220417150430: epoch=9 step=693 epoch=9 metrics={'time_sample_batch': 0.00017098018101283481, 'time_algorithm_update': 0.004550868814641779, 'loss': 0.0021742461566728625, 'time_step': 0.004793848310198102, 'init_value': -0.17744670808315277, 'ave_value': -0.16381982802853895, 'soft_opc': nan} step=693




2022-04-17 15:04.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150430/model_693.pt


Epoch 10/10:   0%|          | 0/77 [00:00<?, ?it/s]



2022-04-17 15:04.34 [info     ] FQE_20220417150430: epoch=10 step=770 epoch=10 metrics={'time_sample_batch': 0.00017018751664595172, 'time_algorithm_update': 0.004591502152480088, 'loss': 0.0023497399789365854, 'time_step': 0.004828338499193068, 'init_value': -0.1851753443479538, 'ave_value': -0.18074607690313815, 'soft_opc': nan} step=770




2022-04-17 15:04.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417150430/model_770.pt


## Reading hyper params from file

In [34]:
with open("hyperparams_cql.pkl", "rb") as f:
    data = pkl.load(f)

print(data)

[0.00469557651656814, 0.003304999882985713, 4.52538497359818e-05, 7]
